In [1]:
# pip install StackAPI
# Save keys in stackexchange.json
# Docs are here: https://stackapi.readthedocs.io/en/latest/
# https://api.stackexchange.com/docs


In [2]:
import pandas as pd
import numpy as np
import json 
from datetime import datetime
from stackapi import StackAPI
from collections import Counter
import plotly.express as px

In [3]:
key_file = "./stackexchange.json"
keys = json.load(open(key_file,"r"))

In [25]:
SITE = StackAPI("robotics")
start_day_this = datetime(2023,1,1)
stop_day_this = datetime(2023,12,31)
start_day_last = datetime(2022,1,1)
stop_day_last = datetime(2022,12,31)
this_year = 2023
last_year = 2022

In [26]:
# Get all questions tagged "ROS" in 2023
ros_questions = SITE.fetch('questions', fromdate=start_day_this, todate=stop_day_this, tagged='ros')

In [27]:
print(len(ros_questions["items"]))

477


In [28]:
def fetch_results(start_day,stop_day,tags):
    full_results = []
    for i in range(1,10):
        temp = SITE.fetch('questions', fromdate=start_day, todate=stop_day, tagged=tags, page=i)
        fetched = len(temp["items"])
        
        full_results += temp["items"]
        if fetched < 500:
            break
    print("{0} questions tagged with {1} between {2} and {3} ".format(len(full_results),tags,start_day,stop_day))
    return full_results

In [29]:
ros_this = fetch_results(start_day_this,stop_day_this,"ros")
ros1_this = fetch_results(start_day_this,stop_day_this,"ros1")
ros2_this = fetch_results(start_day_this,stop_day_this,"ros2")
ros_last = fetch_results(start_day_last,stop_day_last,"ros")
ros1_last = fetch_results(start_day_last,stop_day_last,"ros1")
ros2_last = fetch_results(start_day_last,stop_day_last,"ros2")

477 questions tagged with ros between 2023-01-01 00:00:00 and 2023-12-31 00:00:00 
0 questions tagged with ros1 between 2023-01-01 00:00:00 and 2023-12-31 00:00:00 
2460 questions tagged with ros2 between 2023-01-01 00:00:00 and 2023-12-31 00:00:00 
1408 questions tagged with ros between 2022-01-01 00:00:00 and 2022-12-31 00:00:00 
0 questions tagged with ros1 between 2022-01-01 00:00:00 and 2022-12-31 00:00:00 
386 questions tagged with ros2 between 2022-01-01 00:00:00 and 2022-12-31 00:00:00 


In [30]:
def filter_out_ros2_posts(post_list):
# We have a lot of posts where both tags #ROS and ROS2 are present
# we'll assume those aren't ROS 1 posts
    output = []
    count = 0
    for post in post_list:
        if not ("ros" in post["tags"] and "ros2" in post["tags"]):
            output.append(post)
        else:
            count += 1
    print("Removed {0} posts".format(count))
    return output
# We have a lot of posts that have both #ROS and #ROS2, we're going to say
# that these are ROS2 sets and remove them from the ROS 1 set. 
print(len(ros_this))
ros_this = filter_out_ros2_posts(ros_this)
print(len(ros_this))
print("----------------")
print(len(ros_last))
ros_last = filter_out_ros2_posts(ros_last)
print(len(ros_last))


477
Removed 99 posts
378
----------------
1408
Removed 328 posts
1080


In [31]:
#TODO, these DFs could use flattening
this_df = pd.DataFrame(data=(ros_this+ros2_this))
last_df = pd.DataFrame(data=(ros_last+ros2_last))
this_df

Unnamed: 0,tags,owner,is_answered,view_count,answer_count,score,last_activity_date,creation_date,last_edit_date,question_id,content_license,link,title,bounty_amount,bounty_closes_date,accepted_answer_id,closed_date,closed_reason,migrated_from
0,"[ros, slam, mapping]","{'account_id': 29817991, 'reputation': 11, 'us...",False,60,2,1,1704380891,1699008199,1.699191e+09,105179,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,SLAM but without actually driving the Robot?,,,,,,
1,"[ros, ros-noetic, universal-robot, rosmatlab]","{'account_id': 17588702, 'reputation': 1, 'use...",False,9,1,0,1704206711,1703626211,,107254,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,Error establishing the connection between MATL...,,,,,,
2,"[ros, gazebo, gazebo-ignition, ignition-fortress]","{'account_id': 25888956, 'reputation': 15, 'us...",False,34,0,0,1704161363,1701762526,1.704161e+09,105866,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,Can I launch a gazebo classic project in gazeb...,,,,,,
3,"[ros, moveit, path-planning, quaternion]","{'account_id': 18137066, 'reputation': 1, 'use...",False,73,1,0,1704134175,1703502940,1.704072e+09,107234,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,Customize SLERP used in MoveIt motion planning,100.0,1.704595e+09,,,,
4,"[ros, navigation, costmap, turtlebot3]","{'account_id': 30237000, 'reputation': 1, 'use...",True,23,1,0,1704040336,1703900890,1.703903e+09,107316,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,Stuck on Astar Planner plugin for movebase to ...,,,107338.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2833,"[ros2, ros-humble]","{'account_id': 2562097, 'reputation': 130, 'us...",True,1003,2,0,1673040884,1672926710,1.672954e+09,24252,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/2...,ROS2 Humble packages for Ubuntu 22.04 - what i...,,,24265.0,,,
2834,[ros2],"{'user_type': 'does_not_exist', 'display_name'...",True,6,1,0,1672931293,1672923191,,102890,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,ros2 run bash script from Python package,,,102891.0,,,
2835,[ros2],"{'user_type': 'does_not_exist', 'display_name'...",True,12,1,0,1672818260,1672711466,,102881,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,What should be published in /clock when using ...,,,102882.0,,,
2836,"[ros, ros2, colcon]","{'user_type': 'does_not_exist', 'display_name'...",True,9,1,0,1672619126,1672617799,,102879,CC BY-SA 4.0,https://robotics.stackexchange.com/questions/1...,colcon build error: can&#39;t copy &#39;resour...,,,102880.0,,,


In [32]:
# Change seconds from epoch to datetime
this_df["creation_date"]=(pd.to_datetime(this_df["creation_date"],unit='s'))
last_df["creation_date"]=(pd.to_datetime(last_df["creation_date"],unit='s'))

# Unholy hack to group data by month, count questions, and then construct a new dataframe
# that has questions by month
tdf = this_df.groupby([this_df["creation_date"].dt.month.rename('month')]).agg({'count'})
t_temp = tdf["tags"]
t_temp = t_temp["count"].tolist()
ldf = last_df.groupby([last_df["creation_date"].dt.month.rename('month')]).agg({'count'})
l_temp = ldf["tags"]
l_temp = l_temp["count"].tolist()
months = [i for i in range(1,13)]
new_df = []
for l,t,m in zip(l_temp,t_temp,months):
    temp = {}
    temp["{0}".format(last_year)] = l
    temp["{0}".format(this_year)] = t
    temp["month"] = m 
    new_df.append(temp)
new_df = pd.DataFrame(data=new_df)
new_df

Unnamed: 0,2022,2023,month
0,136,55,1
1,84,59,2
2,138,73,3
3,147,83,4
4,211,98,5
5,151,82,6
6,142,121,7
7,116,377,8
8,88,529,9
9,118,709,10


In [34]:
fig = px.line(new_df, x="month", y=["2022","2023"], title="Robotics Stack Overflow Questions Per Month")
fig.update_layout(
    xaxis_title="Month", 
    yaxis_title="New Questions Tagged with [ROS,ROS1,ROS2]",
    xaxis = dict(
        tickmode = 'array',
        tickvals = [i for i in range(1,13)],
        ticktext = ["Jan","Feb","March","April","May","Jun","July","Aug","Sept","Oct","Nov","Dec"]
    )
)

fig.show()

In [13]:
# we're ignoring the "ros1" tag and assuming the tag "ros" =="" ROS 1 
ros_q_this = len(ros_this) + len(ros2_this)
ros1_q_this = len(ros_this)
ros2_q_this = len(ros2_this)
ros_q_last = len(ros_last) + len(ros2_last)
ros1_q_last = len(ros_last)
ros2_q_last = len(ros2_last)
print("In {0} there were {1} ROS questions asked.".format(this_year,ros_q_this))
print("In {0} there were {1} ROS questions asked.".format(last_year,ros_q_last))
total_change = (ros_q_this-ros_q_last) / ros_q_last
print("In {0} there was a {1:+.2f}% change in ROS questions asked.".format(this_year,total_change*100.0))
this_prct_ros2 = ros2_q_this / ros_q_this
last_prct_ros2 = ros2_q_last / ros_q_last
print("In {0} {1:.2f}% of questions were ROS 2 and {2:.2f}% were ROS 1 ".format(this_year,this_prct_ros2*100.0,100.0-(this_prct_ros2*100.0)))
print("In {0} {1} questions were tagged \"ros2\" and {2} were tagged just \"ros\".".format(this_year,ros2_q_this,ros1_q_this))
print("----------------------------")
print("In {0} there were {1} ROS questions asked.".format(last_year,ros_q_last))
print("In {0} {1:.2f}% of questions were ROS 2 and {2:.2f}% were ROS 1 ".format(last_year,last_prct_ros2*100.0,100.0-(last_prct_ros2*100.0)))
print("In {0} {1} questions were tagged \"ros2\" and {2} were tagged just \"ros\".".format(last_year,ros2_q_last,ros1_q_last))


In 2023 there were 2328 ROS questions asked.
In 2022 there were 1466 ROS questions asked.
In 2023 there was a +58.80% change in ROS questions asked.
In 2023 85.05% of questions were ROS 2 and 14.95% were ROS 1 
In 2023 1980 questions were tagged "ros2" and 348 were tagged just "ros".
----------------------------
In 2022 there were 1466 ROS questions asked.
In 2022 26.33% of questions were ROS 2 and 73.67% were ROS 1 
In 2022 386 questions were tagged "ros2" and 1080 were tagged just "ros".


In [35]:
def summarize_tags(questions):
    summary = {}
    summary["tags"] = [] 
    summary["answers"] = 0
    summary["is_answered"] = 0
    summary["views"] = 0
    summary["scores"] = []
    summary["reputations"] = [] 
    for i in questions:
        summary["tags"] += i["tags"]
        summary["scores"].append(i["score"])
        summary["answers"] += i["answer_count"]
        if i["is_answered"]:
            summary["is_answered"] += 1
        summary["views"] += i["view_count"]
        if "reputation" in i["owner"]:
            summary["reputations"].append(i["owner"]["reputation"])
    summary["tags"] = Counter(summary["tags"])
    summary["total"] = len(questions)
    summary["prct_answered"] = summary["is_answered"] / len(questions)
    summary["total_answers"] = np.sum(summary["answers"])
    summary["total_views"] = np.sum(summary["views"])
    summary["median_reputation"] = np.median(summary["reputations"])
    return summary

In [36]:
# Summarize all of data into dictionary for ROS 1, ROS 2, and all ROS for this year and last
ros_this_summary = summarize_tags(ros_this)
ros2_this_summary = summarize_tags(ros2_this)
all_ros_this_summary = summarize_tags(ros_this+ros2_this)
ros_last_summary = summarize_tags(ros_last)
ros2_last_summary = summarize_tags(ros2_last)
all_ros_last_summary = summarize_tags(ros_last+ros2_last)
names = ["ros_this","ros_last","ros2_this","ros2_last","all_this","all_last"]
dsets = [ros_this_summary,ros_last_summary,ros2_this_summary,ros2_last_summary,all_ros_this_summary,all_ros_last_summary]

In [37]:
df = pd.DataFrame.from_dict(all_ros_this_summary["tags"], orient='index').reset_index()
df.columns.values[0] = "Tag"
temp = "{0} Count".format(this_year)
df.columns.values[1] = temp
df = df.sort_values(by=temp,ascending=False)
fig = px.bar(df[2:42], x="Tag", y=temp, title="Top ROS Question Tags on Robot Stack Exchange Tags for {0}".format(this_year))
fig.show()

In [38]:
# Fiddle with tags and create a data frame that shows tags by ROS1, ROS2, and all ROS last year
last_count = "{0} Count".format(last_year)
r1 = "ROS 1"
r2 = "ROS 2"
for i,row in df.iterrows():
    t = row["Tag"]
    df.at[i,last_count] = 0
    if t in all_ros_last_summary["tags"]:
        df.at[i,last_count]=int(all_ros_last_summary["tags"][t]) 
    df.at[i,r1] = 0
    if t in ros_this_summary["tags"]:
        df.at[i,r1]=int(ros_this_summary["tags"][t])  
    df.at[i,r2] = 0
    if t in ros2_this_summary["tags"]:
        df.at[i,r2]=int(ros2_this_summary["tags"][t])
df
df.to_csv("{0}TagCounts.csv".format(this_year))

In [58]:
# The first two are ROS and ROS 2 so skip them
fig = px.bar(df[2:52], x="Tag", y=["ROS 1","ROS 2"],  title="Top ROS Question Tags on Robot Stack Exchange for {0}".format(this_year))
fig.update_layout(
        #autosize=False,
        #width=800,
        #height=400,
        #template='plotly_dark',
        yaxis_title=dict(text='Number of Questions'),#, font=dict(size=16, color='#FFFFFF')),
        #yaxis_title=dict(text='7 day avg', font=dict(size=16, color='#FFFFFF')),
        #plot_bgcolor='rgb(50, 50, 50)',
        #xaxis=dict(tickfont=dict(size=14, color='#FFFFFF')),
        #yaxis=dict(tickfont=dict(size=14, color='#FFFFFF')),
        #legend=dict(x=0.1, y=1.1, orientation='h', font=dict(color='#FFFFFF')),
        #margin=dict(l=10, r=10, t=100, b=50)
    )

fig.show()

In [40]:
fig = px.bar(df[2:42], x="Tag", y=["2023 Count","2022 Count"], barmode='group',title="Change in Tag Frequency")
fig.show()

In [41]:
# Note that ROS2 is often in the set of ROS questions, so we probably should purge those questions so we don't get an overcount
print("In {0} the top ten ROS tags were: {1}\n".format(last_year,all_ros_last_summary["tags"].most_common()[2:11]))
print("In {0} the top ten ROS tags were: {1}\n".format(this_year,all_ros_this_summary["tags"].most_common()[2:11]))
print("In {0} the top ten ROS 1 tags were: {1}\n".format(this_year,ros_this_summary["tags"].most_common()[1:11]))
print("In {0} the top ten ROS 2 tags were: {1}\n".format(this_year,ros2_this_summary["tags"].most_common()[1:11]))


In 2022 the top ten ROS tags were: [('ros-melodic', 228), ('navigation', 111), ('gazebo', 92), ('moveit', 75), ('rviz', 68), ('ros-kinetic', 55), ('python', 45), ('tf2', 43), ('transform', 42)]

In 2023 the top ten ROS tags were: [('ros-humble', 483), ('gazebo', 305), ('nav2', 201), ('rviz', 170), ('navigation', 127), ('moveit', 124), ('python', 118), ('ros-foxy', 105), ('urdf', 102)]

In 2023 the top ten ROS 1 tags were: [('ros-noetic', 58), ('gazebo', 40), ('rviz', 37), ('moveit', 26), ('navigation', 26), ('ros-melodic', 26), ('python', 16), ('c++', 12), ('ubuntu', 11), ('urdf', 11)]

In 2023 the top ten ROS 2 tags were: [('ros-humble', 478), ('gazebo', 265), ('ros', 240), ('nav2', 198), ('rviz', 133), ('ros-foxy', 105), ('python', 102), ('navigation', 101), ('moveit', 98), ('urdf', 91)]



In [60]:
# Percent answered 
print("In {0} there were {1} questions and {2} answers, {3:.2f}% of questions were answered.\n".format(last_year,all_ros_last_summary["total"],
                                                                                                       all_ros_last_summary["total_answers"],
                                                                                                       100.0*all_ros_last_summary["prct_answered"]
                                                                                                     ))
print("In {0} there were {1} questions and {2} answers, {3:.2f}% of questions were answered.\n".format(this_year,all_ros_this_summary["total"],
                                                                                                       all_ros_this_summary["total_answers"],
                                                                                                       100.0*all_ros_this_summary["prct_answered"]
                                                                                                     ))
print("-"*20)
print("In {0} there were {1} ROS 1 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(last_year,ros_last_summary["total"],
                                                                                                       ros_last_summary["total_answers"],
                                                                                                       100.0*ros_last_summary["prct_answered"]
                                                                                                     ))
print("In {0} there were {1} ROS 1 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(this_year,ros_this_summary["total"],
                                                                                                        ros_this_summary["total_answers"],
                                                                                                       100.0*ros_this_summary["prct_answered"]
                                                                                                     ))
print("-"*20)
print("In {0} there were {1} ROS 2 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(last_year,ros2_last_summary["total"],
                                                                                                       ros2_last_summary["total_answers"],
                                                                                                       100.0*ros2_last_summary["prct_answered"]
                                                                                                     ))
print("In {0} there were {1} ROS 2 questions and {2} answers, {3:.2f}% of questions were answered.\n".format(this_year,ros2_this_summary["total"],
                                                                                                        ros2_this_summary["total_answers"],
                                                                                                       100.0*ros2_this_summary["prct_answered"]
                                                                                                     ))


In 2022 there were 1466 questions and 1611 answers, 97.20% of questions were answered.

In 2023 there were 2838 questions and 2132 answers, 48.87% of questions were answered.

--------------------
In 2022 there were 1080 ROS 1 questions and 1178 answers, 97.13% of questions were answered.

In 2023 there were 378 ROS 1 questions and 309 answers, 57.41% of questions were answered.

--------------------
In 2022 there were 386 ROS 2 questions and 433 answers, 97.41% of questions were answered.

In 2023 there were 2460 ROS 2 questions and 1823 answers, 47.56% of questions were answered.



In [61]:
# For a given tag, count the number of views for that tag, i.e. tell us the "most viewed tag"
def make_view_chart(q_list,tags):
    # Get views per tag, divided by the number of tagged questions
    counts = dict(tags)
    views = {}
    for c in counts.keys():
        views[c] = 0
    for q in q_list:
        for t in q["tags"]:
            views[t] += q["view_count"]
    normed = {}
    for k in counts.keys():
        normed[k] = views[k] / counts[k] 
    retval = []
    for k in counts.keys():
        retval.append({"tag":k,"views":views[k],"count":counts[k],"normed":normed[k]})
    return retval

output = make_view_chart(ros_this+ros2_this,all_ros_this_summary["tags"])

In [62]:
view_df = pd.DataFrame(data=output)
view_df = view_df.sort_values(by="normed",ascending=False)
view_df

Unnamed: 0,tag,views,count,normed
479,arm-cpu,2013,3,671.000000
400,rplidar,5429,10,542.900000
188,ubuntu-20.04,3472,7,496.000000
313,synchronization,442,1,442.000000
235,use-sim-time,2717,7,388.142857
...,...,...,...,...
262,pathplanning,3,1,3.000000
232,translation,3,1,3.000000
496,root,2,1,2.000000
259,uwsim,4,2,2.000000


In [63]:
fig = px.bar(view_df[0:50], x="tag", y="normed", title="RSE ROS Question Tags with a High Number of Views Per Question")
fig.update_layout(
        #autosize=False,
        #width=800,
        #height=400,
        #template='plotly_dark',
        yaxis_title=dict(text='Views per Question Tagged'),#, font=dict(size=16, color='#FFFFFF')),
        #yaxis_title=dict(text='7 day avg', font=dict(size=16, color='#FFFFFF')),
        #plot_bgcolor='rgb(50, 50, 50)',
        #xaxis=dict(tickfont=dict(size=14, color='#FFFFFF')),
        #yaxis=dict(tickfont=dict(size=14, color='#FFFFFF')),
        #legend=dict(x=0.1, y=1.1, orientation='h', font=dict(color='#FFFFFF')),
        #margin=dict(l=10, r=10, t=100, b=50)
    )
fig.show()

In [47]:
view_df = view_df.sort_values(by="views",ascending=False)
fig = px.bar(view_df[2:52], x="tag", y="views", title="Tags with high number of views per question")
fig.show()