In [8]:
import numpy as np
import pandas as pd
import plotly.express as px

In [9]:
path = "../data/2020_total.txt"

def process_package_dump(fname,date):
    package_counts = [] 
    distro_counts = []
    arch_counts = []
    arch_x_os_counts = []
    stats = []
    with open(fname,'r') as fp:
        # remove the first line
        f = fp.readline()
        # remove all the empty 
        f = fp.readline()
        while "not a ros package name" in f:
            f = fp.readline()

        while "Breakdown" not in f:
            temp = f.split(":")
            parts = temp[0].split("-")
            distro = parts[1]
            package = "-".join(parts[2:])
            data = {
                "package":package,
                "distro":distro,
                "name":temp[0],
                "count":int(temp[1]),
                "date":date
            }

            package_counts.append(data)
            f = fp.readline()
        # Done with the modules 

        f = fp.readline()
        while "Breakdown" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            distro_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Results" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Unique" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_x_os_counts.append(data)
            f = fp.readline()

        f = fp.readline()    
        while len(f) > 0:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":temp[1],
                "date":date
            }
            stats.append((temp[0],temp[1]))
            f = fp.readline()
    retval = {}
    retval["package"] = package_counts
    retval["distro"] = distro_counts
    retval["arch"] = arch_counts
    retval["arch_x_os"] = arch_x_os_counts
    retval["stats"] = stats
    return retval


In [22]:
fname = "../data/Stats072020.txt"
stats_2020 = process_package_dump(fname,"2020")
fname = "../data/Stats072021.txt"
stats_2021 = process_package_dump(fname,"2021")

In [23]:
def dumb_find(lst, k, v):
    for i, dic in enumerate(lst):
        if dic[k] == v:
            return lst[i]
    return None

def join_stats(stats_list,idx="prct"):
    joined = []
    # LIST SHOULD BE THE NEWEST FIRST!!!
    first = stats_list[0]
    for entry in first:
        new_entry = {}
        new_entry["name"] = entry["name"]
        new_entry.update(entry)
        del new_entry[idx]
        del new_entry["date"]
        
        new_entry[entry["date"]] = entry[idx]
        for other in stats_list[1:]:
            temp = dumb_find(other,"name",entry["name"])
            if temp:
                new_entry[temp["date"]] = temp[idx]
        joined.append(new_entry)
    return joined

In [24]:
joined_distro = join_stats([stats_2021["distro"],stats_2020["distro"]])
distro_df = pd.DataFrame(data=joined_distro)
distro_df.to_csv("distro.csv")
distro_df

Unnamed: 0,name,2021,2020
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.0
5,groovy,0.0,0.0
6,hydro,0.0,0.0
7,indigo,0.8,1.18
8,jade,0.0,0.0
9,kinetic,8.54,22.04


In [25]:
joined_arch = join_stats([stats_2021["arch"],stats_2020["arch"]])
arch_df = pd.DataFrame(data=joined_arch)
arch_df.to_csv("arch.csv")
arch_df.head()

Unnamed: 0,name,2021,2020
0,i386,0.31,0.34
1,amd64,85.03,87.35
2,armhf,0.5,1.0
3,arm64,7.2,4.38
4,source,0.0,0.0


In [26]:
joined_arch_x_os = join_stats([stats_2021["arch_x_os"],stats_2020["arch_x_os"]])
arch_os_df = pd.DataFrame(data=joined_arch_x_os)
arch_os_df.to_csv("arch_os.csv")
arch_os_df.head()

Unnamed: 0,name,2021,2020
0,bionic_amd64,33.43,54.96
1,bionic_arm64,4.75,3.63
2,bionic_armhf,0.18,0.49
3,buster_amd64,0.15,0.12
4,buster_arm64,0.02,0.02


In [27]:
joined_package = join_stats([stats_2021["package"],stats_2020["package"]],idx="count")
package_df = pd.DataFrame(data=joined_package)


In [29]:
package_df["YoY"] = package_df["2021"]-package_df["2020"]
package_df["YoY_Prct"] = 100.00*package_df["YoY"]/package_df["2020"]

package_df.to_csv("package.csv")
package_df.head()


Unnamed: 0,name,package,distro,2021,2020,YoY,YoY_Prct
0,python3-catkin-pkg-modules,pkg-modules,catkin,99516,85426.0,14090.0,16.493808
1,python-rosdep-modules,modules,rosdep,95621,132648.0,-37027.0,-27.913727
2,python-rospkg,,rospkg,84686,169401.0,-84715.0,-50.00856
3,python-rospkg-modules,modules,rospkg,84588,169937.0,-85349.0,-50.223907
4,python3-rospkg-modules,modules,rospkg,80373,34859.0,45514.0,130.565994


In [30]:
package_df[0:20]

Unnamed: 0,name,package,distro,2021,2020,YoY,YoY_Prct
0,python3-catkin-pkg-modules,pkg-modules,catkin,99516,85426.0,14090.0,16.493808
1,python-rosdep-modules,modules,rosdep,95621,132648.0,-37027.0,-27.913727
2,python-rospkg,,rospkg,84686,169401.0,-84715.0,-50.00856
3,python-rospkg-modules,modules,rospkg,84588,169937.0,-85349.0,-50.223907
4,python3-rospkg-modules,modules,rospkg,80373,34859.0,45514.0,130.565994
5,python-catkin-pkg,pkg,catkin,79672,174668.0,-94996.0,-54.386608
6,python-catkin-pkg-modules,pkg-modules,catkin,79431,176359.0,-96928.0,-54.96062
7,python-rosdistro-modules,modules,rosdistro,79187,150564.0,-71377.0,-47.406419
8,python-rosdistro,,rosdistro,78944,150008.0,-71064.0,-47.373473
9,python3-rosdep-modules,modules,rosdep,76209,24104.0,52105.0,216.167441


In [31]:
dashing = package_df[package_df["distro"]=="dashing"]
dashing[0:20]

Unnamed: 0,name,package,distro,2021,2020,YoY,YoY_Prct
1426,ros-dashing-ros-workspace,ros-workspace,dashing,6647,16056.0,-9409.0,-58.601146
1443,ros-dashing-ament-cmake-core,ament-cmake-core,dashing,6291,15209.0,-8918.0,-58.636334
1447,ros-dashing-ament-package,ament-package,dashing,6099,14156.0,-8057.0,-56.915795
1451,ros-dashing-rcl-yaml-param-parser,rcl-yaml-param-parser,dashing,6050,15624.0,-9574.0,-61.277522
1453,ros-dashing-rcl,rcl,dashing,6045,15632.0,-9587.0,-61.329324
1456,ros-dashing-std-msgs,std-msgs,dashing,5933,15043.0,-9110.0,-60.559729
1457,ros-dashing-rmw-implementation,rmw-implementation,dashing,5925,15448.0,-9523.0,-61.64552
1458,ros-dashing-rosidl-typesupport-c,rosidl-typesupport-c,dashing,5916,15443.0,-9527.0,-61.691381
1459,ros-dashing-rosidl-typesupport-cpp,rosidl-typesupport-cpp,dashing,5908,15430.0,-9522.0,-61.710953
1460,ros-dashing-builtin-interfaces,builtin-interfaces,dashing,5907,15416.0,-9509.0,-61.682667


In [32]:
package_df.iloc[package_df["YoY_Prct"].argmin()]

name        python-vtk6
package                
distro             vtk6
2021                 51
2020              38567
YoY              -38516
YoY_Prct       -99.8678
Name: 8049, dtype: object

In [33]:
kinetic = package_df[package_df["distro"] == "kinetic"]

In [34]:
kinetic.iloc[kinetic["YoY_Prct"].argmin()]

name        ros-kinetic-rosbag-pandas
package                 rosbag-pandas
distro                        kinetic
2021                               20
2020                             1046
YoY                             -1026
YoY_Prct                      -98.088
Name: 12311, dtype: object

In [35]:
kinetic.iloc[kinetic["YoY_Prct"].argmax()]

name        ros-kinetic-ros-speech-recognition
package                 ros-speech-recognition
distro                                 kinetic
2021                                       333
2020                                        35
YoY                                        298
YoY_Prct                               851.429
Name: 4375, dtype: object

In [36]:
kinetic["YoY_Prct"].median()

-56.79012345679013

In [37]:
melodic = package_df[package_df["distro"] == "melodic"]

In [38]:
melodic.iloc[melodic["YoY_Prct"].argmin()]

name        ros-melodic-jderobot-assets
package                 jderobot-assets
distro                          melodic
2021                                 32
2020                                807
YoY                                -775
YoY_Prct                       -96.0347
Name: 10229, dtype: object

In [39]:
go

<module 'plotly.graph_objs' from '/home/kscottz/.local/lib/python3.6/site-packages/plotly/graph_objs/__init__.py'>

1.55
6.17


In [70]:
from plotly import tools
import plotly.offline as py
import plotly.graph_objs as go

temp = distro_df[distro_df["2021"]>1]
leftovers = distro_df[distro_df["2021"]<1]

all_other_2021 = leftover["2021"].sum()
all_other_2020 = leftover["2020"].sum()
new_row = pd.DataFrame(data=[{"name":"All Others","2021":all_other_2021,"2020":all_other_2020}])
print(new_row)
temp = pd.concat([temp, new_row], ignore_index=True)

colors ={"noetic": 'rgba(255,0,0,0.4)',
         "melodic": 'rgba(255,0,0,0.4)',
         "kinetic": 'rgba(255,0,0,0.4)',
         "dashing": 'rgba(255,0,0,0.4)',
         "eloquent": 'rgba(255,0,0,0.4)',
         "foxy": 'rgba(255,0,0,0.4)',
         "galactic": 'rgba(255,0,0,0.4)',
         "rolling": 'rgba(255,0,0,0.4)',
        }

trace1 = go.Pie(values= temp["2020"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="July 2020", sort=False)
trace2 = go.Pie(values= temp["2021"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="July 2021",sort=False )
layout = go.Layout(title="Download Percentage from packages.ros.org")
data = [trace1,trace2]
fig = go.Figure(data=data, layout=layout)
fig.update_traces(marker=dict(colors=['#FFAAAA','#FF6666','#FF0000','#CCCCFF','#9999FF','#6666FF','#3333FF']))

fig.show()


         name  2021  2020
0  All Others  1.55  6.17


In [72]:
temp = arch_df[arch_os_df["2021"]>0.00001]

trace1 = go.Pie(values= temp["2020"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="2020")
trace2 = go.Pie(values= temp["2021"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="2021")

layout = go.Layout(title="ROS Download Percentage By Arch",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


Boolean Series key will be reindexed to match DataFrame index.



In [51]:
fig = px.bar(distro_df, x="name", y=["2021", "2020"], title="Wide-Form Input")
fig.show()

In [46]:
fname = "../data/march_2020.txt"
stats_mar_2020 = process_package_dump(fname,"3/2020")
fname = "../data/march_2021.txt"
stats_mar_2021 = process_package_dump(fname,"3/2021")

joined_distro = join_stats([stats_mar_2020["distro"],stats_mar_2021["distro"]])
mar_distro_df = pd.DataFrame(data=joined_distro)
mar_distro_df.to_csv("march_distro.csv")
mar_distro_df.head()

Unnamed: 0,name,3/2020,3/2021
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.0


In [48]:
temp = mar_distro_df[mar_distro_df["7/2021"]>0.5]

trace1 = go.Pie(values= temp["7/2020"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="7/2020")
trace2 = go.Pie(values= temp["7/2021"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="7/2021")

layout = go.Layout(title="ROS Index Download Percentage: March 2020 vs. March 2021",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


KeyError: '7/2021'

In [173]:
cd ../data/


/home/kscottz/Code/ros_metrics_analysis/data


In [174]:
ls

2019_total.txt  2020_total.txt  march_2020.txt  march_2021.txt
