In [None]:
# pip install StackAPI
# Save keys in stackexchange.json
# Docs are here: https://stackapi.readthedocs.io/en/latest/
# https://api.stackexchange.com/docs


In [31]:
import pandas as pd
import numpy as np
import json 
from datetime import datetime
from stackapi import StackAPI
from collections import Counter
import plotly.express as px

In [2]:
key_file = "./stackexchange.json"
keys = json.load(open(key_file,"r"))

In [3]:
SITE = StackAPI("robotics")
start_day_this = datetime(2023,1,1)
stop_day_this = datetime(2023,12,8)
start_day_last = datetime(2022,1,1)
stop_day_last = datetime(2022,12,31)
this_year = 2023
last_year = 2022

In [5]:
# Get all questions tagged "ROS" in 2023
ros_questions = SITE.fetch('questions', fromdate=start_day_this, todate=stop_day_this, tagged='ros')

In [6]:
print(len(ros_questions["items"]))

439


In [7]:
def fetch_results(start_day,stop_day,tags):
    full_results = []
    for i in range(1,10):
        temp = SITE.fetch('questions', fromdate=start_day, todate=stop_day, tagged=tags, page=i)
        fetched = len(temp["items"])
        
        full_results += temp["items"]
        if fetched < 500:
            break
    print("{0} questions tagged with {1} between {2} and {3} ".format(len(full_results),tags,start_day,stop_day))
    return full_results

In [14]:
# TODO purge "ROS2" questions from "ROS" set

ros_this = fetch_results(start_day_this,stop_day_this,"ros")
ros1_this = fetch_results(start_day_this,stop_day_this,"ros1")
ros2_this = fetch_results(start_day_this,stop_day_this,"ros2")
ros_last = fetch_results(start_day_last,stop_day_last,"ros")
ros1_last = fetch_results(start_day_last,stop_day_last,"ros1")
ros2_last = fetch_results(start_day_last,stop_day_last,"ros2")

439 questions tagged with ros between 2023-01-01 00:00:00 and 2023-12-08 00:00:00 
0 questions tagged with ros1 between 2023-01-01 00:00:00 and 2023-12-08 00:00:00 
1980 questions tagged with ros2 between 2023-01-01 00:00:00 and 2023-12-08 00:00:00 
1408 questions tagged with ros between 2022-01-01 00:00:00 and 2022-12-31 00:00:00 
0 questions tagged with ros1 between 2022-01-01 00:00:00 and 2022-12-31 00:00:00 
386 questions tagged with ros2 between 2022-01-01 00:00:00 and 2022-12-31 00:00:00 


In [15]:
def filter_out_ros2_posts(post_list):
# We have a lot of posts where both tags #ROS and ROS2 are present
# we'll assume those aren't ROS 1 posts
    output = []
    count = 0
    for post in post_list:
        if not ("ros" in post["tags"] and "ros2" in post["tags"]):
            output.append(post)
        else:
            count += 1
    print("Removed {0} posts".format(count))
    return output
# We have a lot of posts that have both #ROS and #ROS2, we're going to say
# that these are ROS2 sets and remove them from the ROS 1 set. 
print(len(ros_this))
ros_this = filter_out_ros2_posts(ros_this)
print(len(ros_this))
print("----------------")
print(len(ros_last))
ros_last = filter_out_ros2_posts(ros_last)
print(len(ros_last))


439
Removed 91 posts
348
----------------
1408
Removed 328 posts
1080


In [16]:
# we're ignoring the "ros1" tag and assuming the tag "ros" == ROS 1 
ros_q_this = len(ros_this) + len(ros2_this)
ros1_q_this = len(ros_this)
ros2_q_this = len(ros2_this)
ros_q_last = len(ros_last) + len(ros2_last)
ros1_q_last = len(ros_last)
ros2_q_last = len(ros2_last)
print("In {0} there were {1} ROS questions asked.".format(this_year,ros_q_this))
print("In {0} there were {1} ROS questions asked.".format(last_year,ros_q_last))
total_change = (ros_q_this-ros_q_last) / ros_q_last
print("In {0} there was a {1:+.2f}% change in ROS questions asked.".format(this_year,total_change*100.0))
this_prct_ros2 = ros2_q_this / ros_q_this
last_prct_ros2 = ros2_q_last / ros_q_last
print("In {0} {1:.2f}% of questions were ROS 2 and {2:.2f}% were ROS 1 ".format(this_year,this_prct_ros2*100.0,100.0-(this_prct_ros2*100.0)))
print("In {0} {1:.2f}% of questions were ROS 2 and {2:.2f}% were ROS 1 ".format(last_year,last_prct_ros2*100.0,100.0-(last_prct_ros2*100.0)))

In 2023 there were 2328 ROS questions asked.
In 2022 there were 1466 ROS questions asked.
In 2023 there was a +58.80% change in ROS questions asked.
In 2023 85.05% of questions were ROS 2 and 14.95% were ROS 1 
In 2022 26.33% of questions were ROS 2 and 73.67% were ROS 1 


In [17]:
def summarize_tags(questions):
    summary = {}
    summary["tags"] = [] 
    summary["answers"] = 0
    summary["is_answered"] = 0
    summary["views"] = 0
    summary["scores"] = []
    summary["reputations"] = [] 
    for i in questions:
        summary["tags"] += i["tags"]
        summary["scores"].append(i["score"])
        summary["answers"] += i["answer_count"]
        if i["is_answered"]:
            summary["is_answered"] += 1
        summary["views"] += i["view_count"]
        if "reputation" in i["owner"]:
            summary["reputations"].append(i["owner"]["reputation"])
    summary["tags"] = Counter(summary["tags"])
    summary["total"] = len(questions)
    summary["prct_answered"] = summary["is_answered"] / len(questions)
    summary["total_answers"] = np.sum(summary["answers"])
    summary["total_views"] = np.sum(summary["views"])
    summary["median_reputation"] = np.median(summary["reputations"])
    return summary

In [19]:
ros_this_summary = summarize_tags(ros_this)
ros2_this_summary = summarize_tags(ros2_this)
all_ros_this_summary = summarize_tags(ros_this+ros2_this)
ros_last_summary = summarize_tags(ros_last)
ros2_last_summary = summarize_tags(ros2_last)
all_ros_last_summary = summarize_tags(ros_last+ros2_last)
names = ["ros_this","ros_last","ros2_this","ros2_last","all_this","all_last"]
dsets = [ros_this_summary,ros_last_summary,ros2_this_summary,ros2_last_summary,all_ros_this_summary,all_ros_last_summary]

ros_this: [('ros-noetic', 52), ('gazebo', 36), ('rviz', 36), ('moveit', 25), ('navigation', 24), ('ros-melodic', 23), ('python', 16), ('c++', 12), ('rospy', 11), ('ubuntu', 10)]
ros_last: [('ros-melodic', 228), ('navigation', 89), ('gazebo', 80), ('moveit', 69), ('rviz', 57), ('ros-kinetic', 55), ('tf2', 38), ('transform', 37), ('python', 29), ('python3', 29)]
ros2_this: [('ros-humble', 365), ('gazebo', 221), ('ros', 197), ('nav2', 161), ('rviz', 102), ('ros-foxy', 88), ('python', 86), ('navigation', 83), ('moveit', 79), ('colcon', 70)]
ros2_last: [('ros', 144), ('navigation', 22), ('roslaunch', 19), ('c++', 16), ('python', 16), ('gazebo', 12), ('colcon', 11), ('rviz', 11), ('docker', 9), ('mapping', 7)]
all_this: [('ros', 545), ('ros-humble', 370), ('gazebo', 257), ('nav2', 164), ('rviz', 138), ('navigation', 107), ('moveit', 104), ('python', 102), ('ros-foxy', 88), ('urdf', 77)]
all_last: [('ros2', 386), ('ros-melodic', 228), ('navigation', 111), ('gazebo', 92), ('moveit', 75), ('rvi

In [63]:
df = pd.DataFrame.from_dict(all_ros_this_summary["tags"], orient='index').reset_index()
df.columns.values[0] = "Tag"
temp = "{0} Count".format(this_year)
df.columns.values[1] = temp
df = df.sort_values(by=temp,ascending=False)
fig = px.bar(df[2:42], x="Tag", y=temp, title="Top ROS Question Tags on Robot Stack Exchange Tags for {0}".format(this_year))
fig.show()

In [72]:
# Fiddle with tags and create a data frame that shows tags by ROS1, ROS2, and all ROS last year
last_count = "{0} Count".format(last_year)
r1 = "ROS 1"
r2 = "ROS 2"
for i,row in df.iterrows():
    t = row["Tag"]
    df.at[i,last_count] = 0
    if t in all_ros_last_summary["tags"]:
        df.at[i,last_count]=int(all_ros_last_summary["tags"][t]) 
    df.at[i,r1] = 0
    if t in ros_this_summary["tags"]:
        df.at[i,r1]=int(ros_this_summary["tags"][t])  
    df.at[i,r2] = 0
    if t in ros2_this_summary["tags"]:
        df.at[i,r2]=int(ros2_this_summary["tags"][t])
df

Unnamed: 0,Tag,2023 Count,2022 Count,ROS 1,ROS 2
254,ros2,1980,386.0,0.0,1980.0
0,ros,545,1224.0,348.0,197.0
24,ros-humble,370,5.0,5.0,365.0
1,gazebo,257,92.0,36.0,221.0
222,nav2,164,1.0,3.0,161.0
...,...,...,...,...,...
267,object-recognition,1,0.0,0.0,1.0
269,beaglebone,1,0.0,0.0,1.0
270,beagle-bone,1,0.0,0.0,1.0
298,stepper-motor,1,0.0,0.0,1.0


In [79]:
# The first two are ROS and ROS 2 so skip them
fig = px.bar(df[2:52], x="Tag", y=["ROS 1","ROS 2"], title="Top ROS Question Tags on Robot Stack Exchange Tags for {0}".format(this_year))
fig.show()

In [83]:
fig = px.bar(df[2:42], x="Tag", y=["2023 Count","2022 Count"], barmode='group',title="Change in Tag Frequency")
fig.show()

In [23]:
# Note that ROS2 is often in the set of ROS questions, so we probably should purge those questions so we don't get an overcount
print("In {0} the top ten ROS tags were: {1}\n".format(last_year,all_ros_last_summary["tags"].most_common()[2:11]))
print("In {0} the top ten ROS tags were: {1}\n".format(this_year,all_ros_this_summary["tags"].most_common()[2:11]))
print("In {0} the top ten ROS 1 tags were: {1}\n".format(this_year,ros_this_summary["tags"].most_common()[1:11]))
print("In {0} the top ten ROS 2 tags were: {1}\n".format(this_year,ros2_this_summary["tags"].most_common()[1:11]))


In 2022 the top ten ROS tags were: [('ros-melodic', 228), ('navigation', 111), ('gazebo', 92), ('moveit', 75), ('rviz', 68), ('ros-kinetic', 55), ('python', 45), ('tf2', 43), ('transform', 42)]

In 2023 the top ten ROS tags were: [('ros-humble', 370), ('gazebo', 257), ('nav2', 164), ('rviz', 138), ('navigation', 107), ('moveit', 104), ('python', 102), ('ros-foxy', 88), ('urdf', 77)]

In 2023 the top ten ROS 1 tags were: [('ros-noetic', 52), ('gazebo', 36), ('rviz', 36), ('moveit', 25), ('navigation', 24), ('ros-melodic', 23), ('python', 16), ('c++', 12), ('rospy', 11), ('ubuntu', 10)]

In 2023 the top ten ROS 2 tags were: [('ros-humble', 365), ('gazebo', 221), ('ros', 197), ('nav2', 161), ('rviz', 102), ('ros-foxy', 88), ('python', 86), ('navigation', 83), ('moveit', 79), ('colcon', 70)]



In [30]:
# Percent answered 
print("In {0} there were {1} questions and {2} answers, {3:.2f}% of questions were answered.\n".format(last_year,all_ros_last_summary["total"],
                                                                                                       all_ros_last_summary["total_answers"],
                                                                                                       100.0*all_ros_last_summary["prct_answered"]
                                                                                                     ))
print("In {0} there were {1} questions and {2} answers, {3:.2f}% of questions were answered.\n".format(this_year,all_ros_this_summary["total"],
                                                                                                       all_ros_this_summary["total_answers"],
                                                                                                       100.0*all_ros_this_summary["prct_answered"]
                                                                                                     ))
print("-"*20)
print("In {0} there were {1} ROS 1 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(last_year,ros_last_summary["total"],
                                                                                                       ros_last_summary["total_answers"],
                                                                                                       100.0*ros_last_summary["prct_answered"]
                                                                                                     ))
print("In {0} there were {1} ROS 1 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(this_year,ros_this_summary["total"],
                                                                                                        ros_this_summary["total_answers"],
                                                                                                       100.0*ros_this_summary["prct_answered"]
                                                                                                     ))
print("-"*20)
print("In {0} there were {1} ROS 2 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(last_year,ros2_last_summary["total"],
                                                                                                       ros2_last_summary["total_answers"],
                                                                                                       100.0*ros2_last_summary["prct_answered"]
                                                                                                     ))
print("In {0} there were {1} ROS 2 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(this_year,ros2_this_summary["total"],
                                                                                                        ros2_this_summary["total_answers"],
                                                                                                       100.0*ros2_this_summary["prct_answered"]
                                                                                                     ))


In 2022 there were 1466 questions and 1611 answers, 97.07% of questions were answered.

In 2023 there were 2328 questions and 1788 answers, 50.73% of questions were answered.

--------------------
In 2022 there were 1080 ROS 1 questions and 1178 answers, 96.94% of questions were answered.

In 2023 there were 348 ROS 1 questions and 292 answers, 59.77% of questions were answered.

--------------------
In 2022 there were 386 ROS 2 questions and 433 answers, 97.41% of questions were answered.

In 2023 there were 1980 ROS 2 questions and 1496 answers, 49.14% of questions were answered.



In [None]:
# Assemble a dataframe of tags data