In [115]:
import numpy as np
import pandas as pd
import plotly.express as px

In [155]:
path = "../data/2020_total.txt"

def process_package_dump(fname,date):
    package_counts = [] 
    distro_counts = []
    arch_counts = []
    arch_x_os_counts = []
    stats = []
    with open(fname,'r') as fp:
        # remove the first line
        f = fp.readline()
        # remove all the empty 
        f = fp.readline()
        while "not a ros package name" in f:
            f = fp.readline()

        while "Breakdown" not in f:
            temp = f.split(":")
            parts = temp[0].split("-")
            distro = parts[1]
            package = "-".join(parts[2:])
            data = {
                "package":package,
                "distro":distro,
                "name":temp[0],
                "count":int(temp[1]),
                "date":date
            }

            package_counts.append(data)
            f = fp.readline()
        # Done with the modules 

        f = fp.readline()
        while "Breakdown" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            distro_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Results" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Unique" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_x_os_counts.append(data)
            f = fp.readline()

        f = fp.readline()    
        while len(f) > 0:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":temp[1],
                "date":date
            }
            stats.append((temp[0],temp[1]))
            f = fp.readline()
    retval = {}
    retval["package"] = package_counts
    retval["distro"] = distro_counts
    retval["arch"] = arch_counts
    retval["arch_x_os"] = arch_x_os_counts
    retval["stats"] = stats
    return retval


In [156]:
fname = "../data/2020_total.txt"
stats_2020 = process_package_dump(fname,"2020")
fname = "../data/2019_total.txt"
stats_2019 = process_package_dump(fname,"2019")

In [157]:
def dumb_find(lst, k, v):
    for i, dic in enumerate(lst):
        if dic[k] == v:
            return lst[i]
    return None

def join_stats(stats_list,idx="prct"):
    joined = []
    # LIST SHOULD BE THE NEWEST FIRST!!!
    first = stats_list[0]
    for entry in first:
        new_entry = {}
        new_entry["name"] = entry["name"]
        new_entry.update(entry)
        del new_entry[idx]
        del new_entry["date"]
        
        new_entry[entry["date"]] = entry[idx]
        for other in stats_list[1:]:
            temp = dumb_find(other,"name",entry["name"])
            if temp:
                new_entry[temp["date"]] = temp[idx]
        joined.append(new_entry)
    return joined

In [158]:
joined_distro = join_stats([stats_2020["distro"],stats_2019["distro"]])
distro_df = pd.DataFrame(data=joined_distro)
distro_df.to_csv("distro.csv")
distro_df.head()

Unnamed: 0,name,2020,2019
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.01


In [159]:
joined_arch = join_stats([stats_2020["arch"],stats_2019["arch"]])
arch_df = pd.DataFrame(data=joined_arch)
arch_df.to_csv("arch.csv")
arch_df.head()

Unnamed: 0,name,2020,2019
0,i386,0.45,1.25
1,amd64,89.17,86.91
2,armhf,1.0,2.32
3,arm64,3.7,3.62
4,source,0.0,0.0


In [160]:
joined_arch_x_os = join_stats([stats_2020["arch_x_os"],stats_2019["arch_x_os"]])
arch_os_df = pd.DataFrame(data=joined_arch_x_os)
arch_os_df.to_csv("arch_os.csv")
arch_os_df.head()

Unnamed: 0,name,2020,2019
0,bionic_amd64,50.77,30.19
1,bionic_arm64,3.01,2.03
2,bionic_armhf,0.47,0.6
3,buster_amd64,0.08,
4,buster_arm64,0.01,


In [161]:
joined_package = join_stats([stats_2020["package"],stats_2019["package"]],idx="count")
package_df = pd.DataFrame(data=joined_package)


In [162]:
package_df["YoY"] = package_df["2020"]-package_df["2019"]
package_df["YoY_Prct"] = 100.00*package_df["YoY"]/package_df["2019"]

package_df.to_csv("package.csv")
package_df.head()


Unnamed: 0,name,package,distro,2020,2019,YoY,YoY_Prct
0,python-catkin-pkg-modules,pkg-modules,catkin,2131184,1458422.0,672762.0,46.129447
1,python-catkin-pkg,pkg,catkin,2126073,1456559.0,669514.0,45.965457
2,python-rosdistro-modules,modules,rosdistro,1850443,1372700.0,477743.0,34.803162
3,python-rosdistro,,rosdistro,1848320,1371723.0,476597.0,34.744405
4,python-rospkg-modules,modules,rospkg,1772498,1179426.0,593072.0,50.2848


In [192]:
package_df[0:20]

Unnamed: 0,name,package,distro,2020,2019,YoY,YoY_Prct
0,python-catkin-pkg-modules,pkg-modules,catkin,2131184,1458422.0,672762.0,46.129447
1,python-catkin-pkg,pkg,catkin,2126073,1456559.0,669514.0,45.965457
2,python-rosdistro-modules,modules,rosdistro,1850443,1372700.0,477743.0,34.803162
3,python-rosdistro,,rosdistro,1848320,1371723.0,476597.0,34.744405
4,python-rospkg-modules,modules,rospkg,1772498,1179426.0,593072.0,50.2848
5,python-rospkg,,rospkg,1770166,1188371.0,581795.0,48.957354
6,python-rosdep-modules,modules,rosdep,1395588,92434.0,1303154.0,1409.821062
7,python-rosdep,,rosdep,1369341,1273169.0,96172.0,7.55375
8,ros-melodic-rviz,rviz,melodic,1182825,391294.0,791531.0,202.285494
9,ros-melodic-tf2-ros,tf2-ros,melodic,1173811,395100.0,778711.0,197.092129


In [200]:
dashing = package_df[package_df["distro"]=="dashing"]
dashing[0:20]

Unnamed: 0,name,package,distro,2020,2019,YoY,YoY_Prct
584,ros-dashing-rclcpp,rclcpp,dashing,136595,32305.0,104290.0,322.829283
587,ros-dashing-ros-workspace,ros-workspace,dashing,132581,30299.0,102282.0,337.575498
590,ros-dashing-ament-cmake-core,ament-cmake-core,dashing,129763,29834.0,99929.0,334.950057
592,ros-dashing-rcl,rcl,dashing,128173,29675.0,98498.0,331.922494
593,ros-dashing-rcl-yaml-param-parser,rcl-yaml-param-parser,dashing,128056,29658.0,98398.0,331.775575
596,ros-dashing-rosidl-typesupport-c,rosidl-typesupport-c,dashing,126346,29954.0,96392.0,321.800093
597,ros-dashing-rmw-implementation,rmw-implementation,dashing,126341,29957.0,96384.0,321.741162
598,ros-dashing-builtin-interfaces,builtin-interfaces,dashing,126210,29829.0,96381.0,323.111737
599,ros-dashing-rosidl-typesupport-cpp,rosidl-typesupport-cpp,dashing,126194,29844.0,96350.0,322.845463
600,ros-dashing-rcl-interfaces,rcl-interfaces,dashing,125980,29693.0,96287.0,324.275082


In [163]:
package_df.iloc[package_df["YoY_Prct"].argmin()]

name        ros-kinetic-cob-pick-place-action
package                 cob-pick-place-action
distro                                kinetic
2020                                       18
2019                                     3261
YoY                                     -3243
YoY_Prct                              -99.448
Name: 16909, dtype: object

In [164]:
kinetic = package_df[package_df["distro"] == "kinetic"]

In [165]:
kinetic.iloc[kinetic["YoY_Prct"].argmin()]

name        ros-kinetic-cob-pick-place-action
package                 cob-pick-place-action
distro                                kinetic
2020                                       18
2019                                     3261
YoY                                     -3243
YoY_Prct                              -99.448
Name: 16909, dtype: object

In [166]:
kinetic.iloc[kinetic["YoY_Prct"].argmax()]

name        ros-kinetic-cis-camera
package                 cis-camera
distro                     kinetic
2020                          2154
2019                           146
YoY                           2008
YoY_Prct                   1375.34
Name: 5522, dtype: object

In [167]:
kinetic["YoY_Prct"].median()

-22.761262082801387

In [168]:
melodic = package_df[package_df["distro"] == "melodic"]

In [169]:
melodic.iloc[melodic["YoY_Prct"].argmin()]

name        ros-melodic-dbw-mkz-twist-controller-dbgsym
package                 dbw-mkz-twist-controller-dbgsym
distro                                          melodic
2020                                                398
2019                                               1409
YoY                                               -1011
YoY_Prct                                        -71.753
Name: 9678, dtype: object

In [170]:
from plotly import tools
import plotly.offline as py
import plotly.graph_objs as go

temp = distro_df[distro_df["2020"]>1]

trace1 = go.Pie(values= temp["2019"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="2019")
trace2 = go.Pie(values= temp["2020"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="2020")

layout = go.Layout(title="ROS Index Download Percentage",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


In [203]:
temp = arch_df[arch_os_df["2020"]>0.1]

trace1 = go.Pie(values= temp["2019"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="2019")
trace2 = go.Pie(values= temp["2020"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="2020")

layout = go.Layout(title="ROS Index Download Percentage",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


Boolean Series key will be reindexed to match DataFrame index.



In [138]:
fig = px.bar(distro_df, x="name", y=["2020", "2019"], title="Wide-Form Input")
fig.show()

In [179]:
fname = "../data/march_2020.txt"
stats_mar_2020 = process_package_dump(fname,"3/2020")
fname = "../data/march_2021.txt"
stats_mar_2021 = process_package_dump(fname,"3/2021")

joined_distro = join_stats([stats_mar_2020["distro"],stats_mar_2021["distro"]])
mar_distro_df = pd.DataFrame(data=joined_distro)
mar_distro_df.to_csv("march_distro.csv")
mar_distro_df.head()

Unnamed: 0,name,3/2020,3/2021
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.0


In [186]:
temp = mar_distro_df[mar_distro_df["3/2021"]>0.5]

trace1 = go.Pie(values= temp["3/2020"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="3/2020")
trace2 = go.Pie(values= temp["3/2021"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="3/2021")

layout = go.Layout(title="ROS Index Download Percentage: March 2020 vs. March 2021",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


In [173]:
cd ../data/


/home/kscottz/Code/ros_metrics_analysis/data


In [174]:
ls

2019_total.txt  2020_total.txt  march_2020.txt  march_2021.txt
