In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
path = "../data/2020_total.txt"

def process_package_dump(fname,date):
    package_counts = [] 
    distro_counts = []
    arch_counts = []
    arch_x_os_counts = []
    stats = []
    with open(fname,'r') as fp:
        # remove the first line
        f = fp.readline()
        # remove all the empty 
        f = fp.readline()
        while "not a ros package name" in f:
            f = fp.readline()

        while "Breakdown" not in f:
            temp = f.split(":")
            parts = temp[0].split("-")
            distro = parts[1]
            package = "-".join(parts[2:])
            data = {
                "package":package,
                "distro":distro,
                "name":temp[0],
                "count":int(temp[1]),
                "date":date
            }

            package_counts.append(data)
            f = fp.readline()
        # Done with the modules 

        f = fp.readline()
        while "Breakdown" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            distro_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Results" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Unique" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_x_os_counts.append(data)
            f = fp.readline()

        f = fp.readline()    
        while len(f) > 0:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":temp[1],
                "date":date
            }
            stats.append((temp[0],temp[1]))
            f = fp.readline()
    retval = {}
    retval["package"] = package_counts
    retval["distro"] = distro_counts
    retval["arch"] = arch_counts
    retval["arch_x_os"] = arch_x_os_counts
    retval["stats"] = stats
    return retval


In [3]:
fname = "../data/Stats112020.txt"
stats_2020 = process_package_dump(fname,"2020")
fname = "../data/Stats112021.txt"
stats_2021 = process_package_dump(fname,"2021")

In [4]:
def dumb_find(lst, k, v):
    for i, dic in enumerate(lst):
        if dic[k] == v:
            return lst[i]
    return None

def join_stats(stats_list,idx="prct"):
    joined = []
    # LIST SHOULD BE THE NEWEST FIRST!!!
    first = stats_list[0]
    for entry in first:
        new_entry = {}
        new_entry["name"] = entry["name"]
        new_entry.update(entry)
        del new_entry[idx]
        del new_entry["date"]
        
        new_entry[entry["date"]] = entry[idx]
        for other in stats_list[1:]:
            temp = dumb_find(other,"name",entry["name"])
            if temp:
                new_entry[temp["date"]] = temp[idx]
        joined.append(new_entry)
    return joined

In [41]:
joined_distro = join_stats([stats_2021["distro"],stats_2020["distro"]])
distro_df = pd.DataFrame(data=joined_distro)
distro_df.to_csv("distro.csv")
distro_df

Unnamed: 0,name,2021,2020
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.0
5,groovy,0.0,0.0
6,hydro,0.0,0.0
7,indigo,0.46,0.54
8,jade,0.0,0.0
9,kinetic,4.88,31.11


In [6]:
joined_arch = join_stats([stats_2021["arch"],stats_2020["arch"]])
arch_df = pd.DataFrame(data=joined_arch)
arch_df.to_csv("arch.csv")
arch_df.head()

Unnamed: 0,name,2021,2020
0,i386,0.21,0.41
1,amd64,86.68,90.48
2,armhf,0.52,0.78
3,arm64,6.97,3.74
4,source,0.0,0.0


In [7]:
joined_arch_x_os = join_stats([stats_2021["arch_x_os"],stats_2020["arch_x_os"]])
arch_os_df = pd.DataFrame(data=joined_arch_x_os)
arch_os_df.to_csv("arch_os.csv")
arch_os_df.head()

Unnamed: 0,name,2021,2020
0,bionic_amd64,35.87,46.48
1,bionic_arm64,4.47,2.93
2,bionic_armhf,0.21,0.31
3,buster_amd64,0.14,0.11
4,buster_arm64,0.05,0.01


In [8]:
joined_package = join_stats([stats_2021["package"],stats_2020["package"]],idx="count")
package_df = pd.DataFrame(data=joined_package)


In [9]:
package_df["YoY"] = package_df["2021"]-package_df["2020"]
package_df["YoY_Prct"] = 100.00*package_df["YoY"]/package_df["2020"]

package_df.to_csv("package.csv")
package_df.head()


Unnamed: 0,name,package,distro,2021,2020,YoY,YoY_Prct
0,python3-catkin-pkg-modules,pkg-modules,catkin,151135,84945.0,66190.0,77.921008
1,python-catkin-pkg-modules,pkg-modules,catkin,103308,234115.0,-130807.0,-55.872968
2,python-catkin-pkg,pkg,catkin,102377,234453.0,-132076.0,-56.333679
3,ros-melodic-dynamic-reconfigure,dynamic-reconfigure,melodic,101457,132425.0,-30968.0,-23.385312
4,python3-catkin-pkg,pkg,catkin,101217,41836.0,59381.0,141.937566


In [10]:
package_df[0:20]

Unnamed: 0,name,package,distro,2021,2020,YoY,YoY_Prct
0,python3-catkin-pkg-modules,pkg-modules,catkin,151135,84945.0,66190.0,77.921008
1,python-catkin-pkg-modules,pkg-modules,catkin,103308,234115.0,-130807.0,-55.872968
2,python-catkin-pkg,pkg,catkin,102377,234453.0,-132076.0,-56.333679
3,ros-melodic-dynamic-reconfigure,dynamic-reconfigure,melodic,101457,132425.0,-30968.0,-23.385312
4,python3-catkin-pkg,pkg,catkin,101217,41836.0,59381.0,141.937566
5,ros-melodic-tf,tf,melodic,100763,141125.0,-40362.0,-28.600177
6,python3-rospkg-modules,modules,rospkg,99637,55833.0,43804.0,78.455394
7,ros-melodic-rosservice,rosservice,melodic,98736,132259.0,-33523.0,-25.346479
8,ros-melodic-rosmsg,rosmsg,melodic,97091,132611.0,-35520.0,-26.785108
9,ros-melodic-ros-base,ros-base,melodic,95997,124746.0,-28749.0,-23.04603


In [11]:
dashing = package_df[package_df["distro"]=="dashing"]
dashing[0:20]

Unnamed: 0,name,package,distro,2021,2020,YoY,YoY_Prct
1494,ros-dashing-ament-cmake-core,ament-cmake-core,dashing,5952,11990.0,-6038.0,-50.358632
1508,ros-dashing-rcl,rcl,dashing,5821,11102.0,-5281.0,-47.568006
1509,ros-dashing-rmw-implementation,rmw-implementation,dashing,5820,11109.0,-5289.0,-47.610046
1510,ros-dashing-rcl-yaml-param-parser,rcl-yaml-param-parser,dashing,5820,11066.0,-5246.0,-47.40647
1512,ros-dashing-rosidl-typesupport-c,rosidl-typesupport-c,dashing,5816,11114.0,-5298.0,-47.669606
1513,ros-dashing-rosidl-typesupport-cpp,rosidl-typesupport-cpp,dashing,5811,11093.0,-5282.0,-47.615613
1514,ros-dashing-builtin-interfaces,builtin-interfaces,dashing,5809,11102.0,-5293.0,-47.676094
1516,ros-dashing-rosidl-generator-c,rosidl-generator-c,dashing,5801,9311.0,-3510.0,-37.697347
1517,ros-dashing-rosidl-generator-cpp,rosidl-generator-cpp,dashing,5789,9311.0,-3522.0,-37.826227
1518,ros-dashing-rmw,rmw,dashing,5787,9316.0,-3529.0,-37.881065


In [12]:
package_df.iloc[package_df["YoY_Prct"].argmin()]

name        ros-dashing-aws-common
package                 aws-common
distro                     dashing
2021                            25
2020                         15434
YoY                         -15409
YoY_Prct                   -99.838
Name: 13290, dtype: object

In [13]:
kinetic = package_df[package_df["distro"] == "kinetic"]

In [14]:
kinetic.iloc[kinetic["YoY_Prct"].argmin()]

name        ros-kinetic-aws-common
package                 aws-common
distro                     kinetic
2021                            31
2020                         18129
YoY                         -18098
YoY_Prct                   -99.829
Name: 12292, dtype: object

In [15]:
kinetic.iloc[kinetic["YoY_Prct"].argmax()]

name        ros-kinetic-pyquaternion
package                 pyquaternion
distro                       kinetic
2021                             179
2020                              29
YoY                              150
YoY_Prct                     517.241
Name: 5529, dtype: object

In [16]:
kinetic["YoY_Prct"].median()

-86.00847457627118

In [17]:
melodic = package_df[package_df["distro"] == "melodic"]

In [18]:
melodic.iloc[melodic["YoY_Prct"].argmin()]

name        ros-melodic-variant-topic-test
package                 variant-topic-test
distro                             melodic
2021                                     4
2020                                   164
YoY                                   -160
YoY_Prct                           -97.561
Name: 19650, dtype: object

In [19]:
go

NameError: name 'go' is not defined

In [40]:
from plotly import tools
import plotly.offline as py
import plotly.graph_objs as go

temp = distro_df[distro_df["2021"]>1]

colors ={"noetic": 'rgba(255,0,0,0.4)',
         "melodic": 'rgba(255,0,0,0.4)',
         "kinetic": 'rgba(255,0,0,0.4)',
         "dashing": 'rgba(255,0,0,0.4)',
         "eloquent": 'rgba(255,0,0,0.4)',
         "foxy": 'rgba(255,0,0,0.4)',
         "galactic": 'rgba(255,0,0,0.4)',
         "rolling": 'rgba(255,0,0,0.4)',
        }

trace1 = go.Pie(values= temp["2020"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="November 2020", sort=False)
trace2 = go.Pie(values= temp["2021"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="November 2021",sort=False )
layout = go.Layout(title="ROS Index Download Percentage")
data = [trace1,trace2]
fig = go.Figure(data=data, layout=layout)
fig.update_traces(marker=dict(colors=['#FFAAAA','#FF6666','#FF0000','#CCCCFF','#9999FF','#6666FF','#3333FF']))

fig.show()


In [43]:
temp = arch_df[arch_os_df["2021"]>0.1]

trace1 = go.Pie(values= temp["2020"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="2020")
trace2 = go.Pie(values= temp["2021"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="2021")

layout = go.Layout(title="ROS Index Download Percentage",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


Boolean Series key will be reindexed to match DataFrame index.



In [44]:
fig = px.bar(distro_df, x="name", y=["2021", "2020"], title="Wide-Form Input")
fig.show()

In [45]:
fname = "../data/march_2020.txt"
stats_mar_2020 = process_package_dump(fname,"3/2020")
fname = "../data/march_2021.txt"
stats_mar_2021 = process_package_dump(fname,"3/2021")

joined_distro = join_stats([stats_mar_2020["distro"],stats_mar_2021["distro"]])
mar_distro_df = pd.DataFrame(data=joined_distro)
mar_distro_df.to_csv("march_distro.csv")
mar_distro_df.head()

Unnamed: 0,name,3/2020,3/2021
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.0


In [46]:
temp = mar_distro_df[mar_distro_df["3/2021"]>0.5]

trace1 = go.Pie(values= temp["3/2020"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="3/2020")
trace2 = go.Pie(values= temp["3/2021"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="3/2021")

layout = go.Layout(title="ROS Index Download Percentage: March 2020 vs. March 2021",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


In [173]:
cd ../data/


/home/kscottz/Code/ros_metrics_analysis/data


In [174]:
ls

2019_total.txt  2020_total.txt  march_2020.txt  march_2021.txt
