In [1]:
# pip install StackAPI
# Save keys in stackexchange.json
# Docs are here: https://stackapi.readthedocs.io/en/latest/
# https://api.stackexchange.com/docs


In [2]:
import pandas as pd
import numpy as np
import json 
from datetime import datetime
from stackapi import StackAPI
from collections import Counter
import plotly.express as px

In [3]:
key_file = "./stackexchange.json"
keys = json.load(open(key_file,"r"))

In [8]:
SITE = StackAPI("robotics")
start_day_this = datetime(2024,1,1)
stop_day_this = datetime(2024,12,31)
start_day_last = datetime(2023,1,1)
stop_day_last = datetime(2023,12,31)
this_year = 2024
last_year = 2023

In [9]:
# Get all questions tagged "ROS" in 2023
ros_questions = SITE.fetch('questions', fromdate=start_day_this, todate=stop_day_this, tagged='ros')

In [10]:
print(len(ros_questions["items"]))

434


In [11]:
def fetch_results(start_day,stop_day,tags):
    full_results = []
    for i in range(1,10):
        temp = SITE.fetch('questions', fromdate=start_day, todate=stop_day, tagged=tags, page=i)
        fetched = len(temp["items"])
        
        full_results += temp["items"]
        if fetched < 500:
            break
    print("{0} questions tagged with {1} between {2} and {3} ".format(len(full_results),tags,start_day,stop_day))
    return full_results

In [12]:
ros_this = fetch_results(start_day_this,stop_day_this,"ros")
ros1_this = fetch_results(start_day_this,stop_day_this,"ros1")
ros2_this = fetch_results(start_day_this,stop_day_this,"ros2")
ros_last = fetch_results(start_day_last,stop_day_last,"ros")
ros1_last = fetch_results(start_day_last,stop_day_last,"ros1")
ros2_last = fetch_results(start_day_last,stop_day_last,"ros2")

434 questions tagged with ros between 2024-01-01 00:00:00 and 2024-12-31 00:00:00 
18 questions tagged with ros1 between 2024-01-01 00:00:00 and 2024-12-31 00:00:00 
4500 questions tagged with ros2 between 2024-01-01 00:00:00 and 2024-12-31 00:00:00 
396 questions tagged with ros between 2023-01-01 00:00:00 and 2023-12-31 00:00:00 
0 questions tagged with ros1 between 2023-01-01 00:00:00 and 2023-12-31 00:00:00 
1488 questions tagged with ros2 between 2023-01-01 00:00:00 and 2023-12-31 00:00:00 


In [13]:
def filter_out_ros2_posts(post_list):
# We have a lot of posts where both tags #ROS and ROS2 are present
# we'll assume those aren't ROS 1 posts
    output = []
    count = 0
    for post in post_list:
        if not ("ros" in post["tags"] and "ros2" in post["tags"]):
            output.append(post)
        else:
            count += 1
    print("Removed {0} posts".format(count))
    return output
# We have a lot of posts that have both #ROS and #ROS2, we're going to say
# that these are ROS2 sets and remove them from the ROS 1 set. 
print(len(ros_this))
ros_this = filter_out_ros2_posts(ros_this)
print(len(ros_this))
print("----------------")
print(len(ros_last))
ros_last = filter_out_ros2_posts(ros_last)
print(len(ros_last))


434
Removed 92 posts
342
----------------
396
Removed 88 posts
308


In [14]:
#TODO, these DFs could use flattening
this_df = pd.DataFrame(data=(ros_this+ros2_this))
last_df = pd.DataFrame(data=(ros_last+ros2_last))
this_df

Unnamed: 0,tags,owner,is_answered,view_count,answer_count,score,last_activity_date,creation_date,question_id,content_license,link,title,last_edit_date,accepted_answer_id,protected_date,closed_date,closed_reason
0,"[ros, ubuntu, catkin-make, ros-noetic, jetson]","{'account_id': 13372738, 'reputation': 1, 'use...",False,25,1,0,1737270275,1724158795,112700,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,ROS Noetic: catkin_make fails due to undefined...,,,,,
1,"[ros, imu, robot-localization, hector-slam]","{'account_id': 18465466, 'reputation': 21, 'us...",False,247,1,0,1737122540,1713731793,110770,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,Implementing IMU on Hector SLAM,,,,,
2,"[ros, dynamic-reconfigure]","{'account_id': 34811445, 'reputation': 1, 'use...",False,28,1,0,1736572145,1723539167,112623,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,Using dynamic_reconfigure witouth the graphica...,,,,,
3,"[ros, gazebo, ubuntu, nvidia, intel]","{'account_id': 22559253, 'reputation': 1, 'use...",False,106,1,0,1736510505,1712891100,110591,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,Intel Error while launching Gazebo on Ubuntu 2...,1.713173e+09,,,,
4,"[ros, ros-noetic, nodes, universal-robot]","{'account_id': 34571296, 'reputation': 1, 'use...",False,38,1,0,1735884095,1722841365,112510,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,"Launch scripts with same nodes, how do i chang...",,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4837,[ros2],"{'account_id': 8223235, 'reputation': 3, 'user...",True,101,1,0,1705038148,1705032980,107554,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,facing issue while using tricycle_controller i...,,107555.0,,,
4838,"[ros2, mcap]","{'account_id': 24935208, 'reputation': 11, 'us...",True,591,2,1,1705000584,1704858717,107510,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,How to read a mcap file and publish message us...,,,,,
4839,"[ros2, ros2-control, joint-trajectory-controll...","{'account_id': 30136160, 'reputation': 15, 'us...",True,22,1,0,1704908824,1704899405,107531,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,JTC ignores URDF velocity limit,,107532.0,,,
4840,[ros2],"{'account_id': 27502365, 'reputation': 1, 'use...",True,211,2,0,1704879867,1704675817,107466,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,Trying to install ros-ROS_DISTRO-behaviortree-...,1.704866e+09,,,,


In [18]:
# Change seconds from epoch to datetime
this_df["creation_date"]=(pd.to_datetime(this_df["creation_date"],unit='s'))
last_df["creation_date"]=(pd.to_datetime(last_df["creation_date"],unit='s'))

# Unholy hack to group data by month, count questions, and then construct a new dataframe
# that has questions by month
tdf = this_df.groupby([this_df["creation_date"].dt.month.rename('month')]).agg({'count'})
t_temp = tdf["tags"]
t_temp = t_temp["count"].tolist()
ldf = last_df.groupby([last_df["creation_date"].dt.month.rename('month')]).agg({'count'})
l_temp = ldf["tags"]
l_temp = l_temp["count"].tolist()
months = [i for i in range(1,13)]
new_df = []
for l,t,m in zip(l_temp,t_temp,months):
    temp = {}
    temp["{0}".format(last_year)] = l
    temp["{0}".format(this_year)] = t
    temp["month"] = m 
    new_df.append(temp)
new_df = pd.DataFrame(data=new_df)
new_df

Unnamed: 0,2023,2024,month
0,54,252,1
1,58,403,2
2,53,361,3
3,59,457,4
4,84,612,5
5,68,601,6
6,86,490,7
7,233,425,8
8,280,421,9
9,320,456,10


In [33]:
fig = px.line(new_df, x="month", y=["2023","2024"], title="Robotics Stack Overflow Questions Per Month")
fig.update_layout(
    xaxis_title="Month", 
    yaxis_title="New Questions Tagged with [ROS,ROS1,ROS2]",
    xaxis = dict(
        tickmode = 'array',
        tickvals = [i for i in range(1,13)],
        ticktext = ["Jan","Feb","March","April","May","Jun","July","Aug","Sept","Oct","Nov","Dec"]
    )
)

fig.show()

In [20]:
# we're ignoring the "ros1" tag and assuming the tag "ros" =="" ROS 1 
ros_q_this = len(ros_this) + len(ros2_this)
ros1_q_this = len(ros_this)
ros2_q_this = len(ros2_this)
ros_q_last = len(ros_last) + len(ros2_last)
ros1_q_last = len(ros_last)
ros2_q_last = len(ros2_last)
print("In {0} there were {1} ROS questions asked.".format(this_year,ros_q_this))
print("In {0} there were {1} ROS questions asked.".format(last_year,ros_q_last))
total_change = (ros_q_this-ros_q_last) / ros_q_last
print("In {0} there was a {1:+.2f}% change in ROS questions asked.".format(this_year,total_change*100.0))
this_prct_ros2 = ros2_q_this / ros_q_this
last_prct_ros2 = ros2_q_last / ros_q_last
print("In {0} {1:.2f}% of questions were ROS 2 and {2:.2f}% were ROS 1 ".format(this_year,this_prct_ros2*100.0,100.0-(this_prct_ros2*100.0)))
print("In {0} {1} questions were tagged \"ros2\" and {2} were tagged just \"ros\".".format(this_year,ros2_q_this,ros1_q_this))
print("----------------------------")
print("In {0} there were {1} ROS questions asked.".format(last_year,ros_q_last))
print("In {0} {1:.2f}% of questions were ROS 2 and {2:.2f}% were ROS 1 ".format(last_year,last_prct_ros2*100.0,100.0-(last_prct_ros2*100.0)))
print("In {0} {1} questions were tagged \"ros2\" and {2} were tagged just \"ros\".".format(last_year,ros2_q_last,ros1_q_last))


In 2024 there were 4842 ROS questions asked.
In 2023 there were 1796 ROS questions asked.
In 2024 there was a +169.60% change in ROS questions asked.
In 2024 92.94% of questions were ROS 2 and 7.06% were ROS 1 
In 2024 4500 questions were tagged "ros2" and 342 were tagged just "ros".
----------------------------
In 2023 there were 1796 ROS questions asked.
In 2023 82.85% of questions were ROS 2 and 17.15% were ROS 1 
In 2023 1488 questions were tagged "ros2" and 308 were tagged just "ros".


In [21]:
def summarize_tags(questions):
    summary = {}
    summary["tags"] = [] 
    summary["answers"] = 0
    summary["is_answered"] = 0
    summary["views"] = 0
    summary["scores"] = []
    summary["reputations"] = [] 
    for i in questions:
        summary["tags"] += i["tags"]
        summary["scores"].append(i["score"])
        summary["answers"] += i["answer_count"]
        if i["is_answered"]:
            summary["is_answered"] += 1
        summary["views"] += i["view_count"]
        if "reputation" in i["owner"]:
            summary["reputations"].append(i["owner"]["reputation"])
    summary["tags"] = Counter(summary["tags"])
    summary["total"] = len(questions)
    summary["prct_answered"] = summary["is_answered"] / len(questions)
    summary["total_answers"] = np.sum(summary["answers"])
    summary["total_views"] = np.sum(summary["views"])
    summary["median_reputation"] = np.median(summary["reputations"])
    return summary

In [22]:
# Summarize all of data into dictionary for ROS 1, ROS 2, and all ROS for this year and last
ros_this_summary = summarize_tags(ros_this)
ros2_this_summary = summarize_tags(ros2_this)
all_ros_this_summary = summarize_tags(ros_this+ros2_this)
ros_last_summary = summarize_tags(ros_last)
ros2_last_summary = summarize_tags(ros2_last)
all_ros_last_summary = summarize_tags(ros_last+ros2_last)
names = ["ros_this","ros_last","ros2_this","ros2_last","all_this","all_last"]
dsets = [ros_this_summary,ros_last_summary,ros2_this_summary,ros2_last_summary,all_ros_this_summary,all_ros_last_summary]

In [36]:
df = pd.DataFrame.from_dict(all_ros_this_summary["tags"], orient='index').reset_index()
df.columns.values[0] = "Tag"
temp = "{0} Count".format(this_year)
df.columns.values[1] = temp
df = df.sort_values(by=temp,ascending=False)
fig = px.bar(df[2:42], x="Tag", y=temp, title="Top ROS Question Tags on Robot Stack Exchange Tags for {0}".format(this_year))
fig.show()

In [24]:
# Fiddle with tags and create a data frame that shows tags by ROS1, ROS2, and all ROS last year
last_count = "{0} Count".format(last_year)
r1 = "ROS 1"
r2 = "ROS 2"
for i,row in df.iterrows():
    t = row["Tag"]
    df.at[i,last_count] = 0
    if t in all_ros_last_summary["tags"]:
        df.at[i,last_count]=int(all_ros_last_summary["tags"][t]) 
    df.at[i,r1] = 0
    if t in ros_this_summary["tags"]:
        df.at[i,r1]=int(ros_this_summary["tags"][t])  
    df.at[i,r2] = 0
    if t in ros2_this_summary["tags"]:
        df.at[i,r2]=int(ros2_this_summary["tags"][t])
df
df.to_csv("{0}TagCounts.csv".format(this_year))

In [25]:
# The first two are ROS and ROS 2 so skip them
fig = px.bar(df[2:52], x="Tag", y=["ROS 1","ROS 2"],  title="Top ROS Question Tags on Robot Stack Exchange for {0}".format(this_year))
fig.update_layout(
        #autosize=False,
        #width=800,
        #height=400,
        #template='plotly_dark',
        yaxis_title=dict(text='Number of Questions'),#, font=dict(size=16, color='#FFFFFF')),
        #yaxis_title=dict(text='7 day avg', font=dict(size=16, color='#FFFFFF')),
        #plot_bgcolor='rgb(50, 50, 50)',
        #xaxis=dict(tickfont=dict(size=14, color='#FFFFFF')),
        #yaxis=dict(tickfont=dict(size=14, color='#FFFFFF')),
        #legend=dict(x=0.1, y=1.1, orientation='h', font=dict(color='#FFFFFF')),
        #margin=dict(l=10, r=10, t=100, b=50)
    )

fig.show()

In [34]:
fig = px.bar(df[2:42], x="Tag", y=["2024 Count","2023 Count"], barmode='group',title="Change in Tag Frequency")
fig.show()

In [27]:
# Note that ROS2 is often in the set of ROS questions, so we probably should purge those questions so we don't get an overcount
print("In {0} the top ten ROS tags were: {1}\n".format(last_year,all_ros_last_summary["tags"].most_common()[2:11]))
print("In {0} the top ten ROS tags were: {1}\n".format(this_year,all_ros_this_summary["tags"].most_common()[2:11]))
print("In {0} the top ten ROS 1 tags were: {1}\n".format(this_year,ros_this_summary["tags"].most_common()[1:11]))
print("In {0} the top ten ROS 2 tags were: {1}\n".format(this_year,ros2_this_summary["tags"].most_common()[1:11]))


In 2023 the top ten ROS tags were: [('ros-humble', 310), ('gazebo', 166), ('nav2', 134), ('rviz', 98), ('navigation', 89), ('moveit', 82), ('python', 81), ('urdf', 66), ('c++', 59)]

In 2024 the top ten ROS tags were: [('ros', 658), ('gazebo', 626), ('nav2', 428), ('ros2-control', 319), ('rviz', 281), ('python', 266), ('navigation', 224), ('colcon', 205), ('moveit2', 199)]

In 2024 the top ten ROS 1 tags were: [('ros-noetic', 62), ('gazebo', 60), ('moveit', 29), ('navigation', 27), ('rviz', 20), ('urdf', 17), ('python', 17), ('slam', 17), ('ros-melodic', 15), ('ubuntu', 14)]

In 2024 the top ten ROS 2 tags were: [('ros-humble', 831), ('gazebo', 566), ('nav2', 425), ('ros2-control', 318), ('ros', 316), ('rviz', 261), ('python', 249), ('colcon', 203), ('moveit2', 198), ('navigation', 197)]



In [28]:
# Percent answered 
print("In {0} there were {1} questions and {2} answers, {3:.2f}% of questions were answered.\n".format(last_year,all_ros_last_summary["total"],
                                                                                                       all_ros_last_summary["total_answers"],
                                                                                                       100.0*all_ros_last_summary["prct_answered"]
                                                                                                     ))
print("In {0} there were {1} questions and {2} answers, {3:.2f}% of questions were answered.\n".format(this_year,all_ros_this_summary["total"],
                                                                                                       all_ros_this_summary["total_answers"],
                                                                                                       100.0*all_ros_this_summary["prct_answered"]
                                                                                                     ))
print("-"*20)
print("In {0} there were {1} ROS 1 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(last_year,ros_last_summary["total"],
                                                                                                       ros_last_summary["total_answers"],
                                                                                                       100.0*ros_last_summary["prct_answered"]
                                                                                                     ))
print("In {0} there were {1} ROS 1 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(this_year,ros_this_summary["total"],
                                                                                                        ros_this_summary["total_answers"],
                                                                                                       100.0*ros_this_summary["prct_answered"]
                                                                                                     ))
print("-"*20)
print("In {0} there were {1} ROS 2 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(last_year,ros2_last_summary["total"],
                                                                                                       ros2_last_summary["total_answers"],
                                                                                                       100.0*ros2_last_summary["prct_answered"]
                                                                                                     ))
print("In {0} there were {1} ROS 2 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(this_year,ros2_this_summary["total"],
                                                                                                        ros2_this_summary["total_answers"],
                                                                                                       100.0*ros2_this_summary["prct_answered"]
                                                                                                     ))


In 2023 there were 1796 questions and 1885 answers, 73.55% of questions were answered.

In 2024 there were 4842 questions and 2923 answers, 33.66% of questions were answered.

--------------------
In 2023 there were 308 ROS 1 questions and 321 answers, 75.97% of questions were answered.

In 2024 there were 342 ROS 1 questions and 178 answers, 24.56% of questions were answered.

--------------------
In 2023 there were 1488 ROS 2 questions and 1564 answers, 73.05% of questions were answered.

In 2024 there were 4500 ROS 2 questions and 2745 answers, 34.36% of questions were answered.



In [29]:
# For a given tag, count the number of views for that tag, i.e. tell us the "most viewed tag"
def make_view_chart(q_list,tags):
    # Get views per tag, divided by the number of tagged questions
    counts = dict(tags)
    views = {}
    for c in counts.keys():
        views[c] = 0
    for q in q_list:
        for t in q["tags"]:
            views[t] += q["view_count"]
    normed = {}
    for k in counts.keys():
        normed[k] = views[k] / counts[k] 
    retval = []
    for k in counts.keys():
        retval.append({"tag":k,"views":views[k],"count":counts[k],"normed":normed[k]})
    return retval

output = make_view_chart(ros_this+ros2_this,all_ros_this_summary["tags"])

In [30]:
view_df = pd.DataFrame(data=output)
view_df = view_df.sort_values(by="normed",ascending=False)
view_df

Unnamed: 0,tag,views,count,normed
475,rosversion,10675,10,1067.500000
406,environment,5738,7,819.714286
407,anaconda,6421,8,802.625000
560,bag-to-pcd,2058,3,686.000000
183,troubleshooting,4683,9,520.333333
...,...,...,...,...
495,machine-learning,45,5,9.000000
454,roswtf,40,5,8.000000
455,feature,40,5,8.000000
42,local-planner,6,1,6.000000


In [31]:
fig = px.bar(view_df[0:50], x="tag", y="normed", title="RSE ROS Question Tags with a High Number of Views Per Question")
fig.update_layout(
        #autosize=False,
        #width=800,
        #height=400,
        #template='plotly_dark',
        yaxis_title=dict(text='Views per Question Tagged'),#, font=dict(size=16, color='#FFFFFF')),
        #yaxis_title=dict(text='7 day avg', font=dict(size=16, color='#FFFFFF')),
        #plot_bgcolor='rgb(50, 50, 50)',
        #xaxis=dict(tickfont=dict(size=14, color='#FFFFFF')),
        #yaxis=dict(tickfont=dict(size=14, color='#FFFFFF')),
        #legend=dict(x=0.1, y=1.1, orientation='h', font=dict(color='#FFFFFF')),
        #margin=dict(l=10, r=10, t=100, b=50)
    )
fig.show()

In [35]:
view_df = view_df.sort_values(by="views",ascending=False)
fig = px.bar(view_df[2:52], x="tag", y="views", title="Tags with high number of views per question")
fig.show()