In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
path = "../data/2020_total.txt"

def process_package_dump(fname,date):
    package_counts = [] 
    distro_counts = []
    arch_counts = []
    arch_x_os_counts = []
    stats = []
    with open(fname,'r') as fp:
        # remove the first line
        f = fp.readline()
        # remove all the empty 
        f = fp.readline()
        while "not a ros package name" in f:
            f = fp.readline()

        while "Breakdown" not in f:
            temp = f.split(":")
            parts = temp[0].split("-")
            distro = parts[1]
            package = "-".join(parts[2:])
            data = {
                "package":package,
                "distro":distro,
                "name":temp[0],
                "count":int(temp[1]),
                "date":date
            }

            package_counts.append(data)
            f = fp.readline()
        # Done with the modules 

        f = fp.readline()
        while "Breakdown" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            distro_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Results" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Unique" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_x_os_counts.append(data)
            f = fp.readline()

        f = fp.readline()    
        while len(f) > 0:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":temp[1],
                "date":date
            }
            stats.append((temp[0],temp[1]))
            f = fp.readline()
    retval = {}
    retval["package"] = package_counts
    retval["distro"] = distro_counts
    retval["arch"] = arch_counts
    retval["arch_x_os"] = arch_x_os_counts
    retval["stats"] = stats
    return retval


In [3]:
fname = "../data/2021October.txt"
stats_2021 = process_package_dump(fname,"2021")
fname = "../data/2022October.txt"
stats_2022 = process_package_dump(fname,"2022")

In [4]:
def dumb_find(lst, k, v):
    for i, dic in enumerate(lst):
        if dic[k] == v:
            return lst[i]
    return None

def join_stats(stats_list,idx="prct"):
    joined = []
    # LIST SHOULD BE THE NEWEST FIRST!!!
    first = stats_list[0]
    for entry in first:
        new_entry = {}
        new_entry["name"] = entry["name"]
        new_entry.update(entry)
        del new_entry[idx]
        del new_entry["date"]
        
        new_entry[entry["date"]] = entry[idx]
        for other in stats_list[1:]:
            temp = dumb_find(other,"name",entry["name"])
            if temp:
                new_entry[temp["date"]] = temp[idx]
        joined.append(new_entry)
    return joined

In [6]:
joined_distro = join_stats([stats_2022["distro"],stats_2021["distro"]])
distro_df = pd.DataFrame(data=joined_distro)
distro_df.to_csv("distro.csv")
distro_df

Unnamed: 0,name,2022,2021
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.0
5,groovy,0.0,0.0
6,hydro,0.0,0.0
7,indigo,0.0,0.0
8,jade,0.0,0.0
9,kinetic,0.0,0.0


In [7]:
joined_arch = join_stats([stats_2022["arch"],stats_2021["arch"]])
arch_df = pd.DataFrame(data=joined_arch)
arch_df.to_csv("arch.csv")
arch_df.head()

Unnamed: 0,name,2022,2021
0,i386,9.51,10.04
1,amd64,84.36,82.75
2,armhf,0.0,0.0
3,arm64,0.0,0.0
4,source,0.0,0.0


In [9]:
joined_arch_x_os = join_stats([stats_2022["arch_x_os"],stats_2021["arch_x_os"]])
arch_os_df = pd.DataFrame(data=joined_arch_x_os)
arch_os_df.to_csv("arch_os.csv")
arch_os_df.head()

In [10]:
joined_package = join_stats([stats_2022["package"],stats_2021["package"]],idx="count")
package_df = pd.DataFrame(data=joined_package)


In [11]:
package_df["YoY"] = package_df["2022"]-package_df["2021"]
package_df["YoY_Prct"] = 100.00*package_df["YoY"]/package_df["2021"]

package_df.to_csv("package.csv")
package_df.head()


Unnamed: 0,name,package,distro,2022,2021,YoY,YoY_Prct
0,ros-repo,,repo,5,4,1,25.0
1,ros-essential,,essential,2,4,-2,-50.0


In [12]:
package_df[0:20]

Unnamed: 0,name,package,distro,2022,2021,YoY,YoY_Prct
0,ros-repo,,repo,5,4,1,25.0
1,ros-essential,,essential,2,4,-2,-50.0


In [13]:
dashing = package_df[package_df["distro"]=="dashing"]
dashing[0:20]

Unnamed: 0,name,package,distro,2022,2021,YoY,YoY_Prct


In [32]:
package_df.iloc[package_df["YoY_Prct"].argmin()]

name        python-vtk6
package                
distro             vtk6
2021                 51
2020              38567
YoY              -38516
YoY_Prct       -99.8678
Name: 8049, dtype: object

In [33]:
kinetic = package_df[package_df["distro"] == "kinetic"]

In [34]:
kinetic.iloc[kinetic["YoY_Prct"].argmin()]

name        ros-kinetic-rosbag-pandas
package                 rosbag-pandas
distro                        kinetic
2021                               20
2020                             1046
YoY                             -1026
YoY_Prct                      -98.088
Name: 12311, dtype: object

In [35]:
kinetic.iloc[kinetic["YoY_Prct"].argmax()]

name        ros-kinetic-ros-speech-recognition
package                 ros-speech-recognition
distro                                 kinetic
2021                                       333
2020                                        35
YoY                                        298
YoY_Prct                               851.429
Name: 4375, dtype: object

In [36]:
kinetic["YoY_Prct"].median()

-56.79012345679013

In [37]:
melodic = package_df[package_df["distro"] == "melodic"]

In [38]:
melodic.iloc[melodic["YoY_Prct"].argmin()]

name        ros-melodic-jderobot-assets
package                 jderobot-assets
distro                          melodic
2021                                 32
2020                                807
YoY                                -775
YoY_Prct                       -96.0347
Name: 10229, dtype: object

In [39]:
go

<module 'plotly.graph_objs' from '/home/kscottz/.local/lib/python3.6/site-packages/plotly/graph_objs/__init__.py'>

In [78]:
print(distro_df)
print(distro_df["2020"].sum())
print(distro_df["2021"].sum())

           name   2021   2020
0     boxturtle   0.00   0.00
1       cturtle   0.00   0.00
2   diamondback   0.00   0.00
3      electric   0.00   0.00
4        fuerte   0.00   0.00
5        groovy   0.00   0.00
6         hydro   0.00   0.00
7        indigo   0.80   1.18
8          jade   0.00   0.00
9       kinetic   8.54  22.04
10        lunar   0.10   0.18
11      melodic  35.24  47.06
12       noetic  18.97   4.43
13       ardent   0.00   0.01
14       bouncy   0.00   0.03
15      crystal   0.04   0.12
16      dashing   2.50   7.30
17     eloquent   0.61   4.65
18         foxy  14.29   2.14
19     galactic   6.38   0.00
20      rolling   2.23   0.07
89.21
89.7


In [90]:
from plotly import tools
import plotly.offline as py
import plotly.graph_objs as go

everything_else_2020 = 100.0-distro_df["2020"].sum()
everything_else_2021 = 100.0-distro_df["2021"].sum()


temp = distro_df[distro_df["2021"]>=1]
leftovers = distro_df[distro_df["2021"]<1]

all_other_2021 = leftover["2021"].sum()
all_other_2020 = leftover["2020"].sum()

new_row = pd.DataFrame(data=[{"name":"Other Distros","2021":all_other_2021,"2020":all_other_2020},
                             {"name":"Python Pkgs","2021":everything_else_2021,"2020":everything_else_2020}])
print(new_row)
temp = pd.concat([temp, new_row], ignore_index=True)
print(temp["2020"].sum())
print(temp["2021"].sum())

colors ={"noetic": 'rgba(255,0,0,0.4)',
         "melodic": 'rgba(255,0,0,0.4)',
         "kinetic": 'rgba(255,0,0,0.4)',
         "dashing": 'rgba(255,0,0,0.4)',
         "eloquent": 'rgba(255,0,0,0.4)',
         "foxy": 'rgba(255,0,0,0.4)',
         "galactic": 'rgba(255,0,0,0.4)',
         "rolling": 'rgba(255,0,0,0.4)',
         
        }

trace1 = go.Pie(values= temp["2020"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="July 2020", sort=False)
trace2 = go.Pie(values= temp["2021"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="July 2021",sort=False )
layout = go.Layout(title="Download Percentage from packages.ros.org")
data = [trace1,trace2]
fig = go.Figure(data=data, layout=layout)
fig.update_traces(marker=dict(colors=['#FFAAAA','#FF6666','#FF0000','#CCCCFF','#9999FF','#6666FF','#3333FF',"#0011FF","#CCCC66"]))

fig.show()


            name   2021   2020
0  Other Distros   1.55   6.17
1    Python Pkgs  10.30  10.79
100.0
100.0


In [72]:
temp = arch_df[arch_os_df["2021"]>0.00001]

trace1 = go.Pie(values= temp["2020"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="2020")
trace2 = go.Pie(values= temp["2021"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="2021")

layout = go.Layout(title="ROS Download Percentage By Arch",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


Boolean Series key will be reindexed to match DataFrame index.



In [51]:
fig = px.bar(distro_df, x="name", y=["2021", "2020"], title="Wide-Form Input")
fig.show()

In [46]:
fname = "../data/march_2020.txt"
stats_mar_2020 = process_package_dump(fname,"3/2020")
fname = "../data/march_2021.txt"
stats_mar_2021 = process_package_dump(fname,"3/2021")

joined_distro = join_stats([stats_mar_2020["distro"],stats_mar_2021["distro"]])
mar_distro_df = pd.DataFrame(data=joined_distro)
mar_distro_df.to_csv("march_distro.csv")
mar_distro_df.head()

Unnamed: 0,name,3/2020,3/2021
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.0


In [48]:
temp = mar_distro_df[mar_distro_df["7/2021"]>0.5]

trace1 = go.Pie(values= temp["7/2020"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="7/2020")
trace2 = go.Pie(values= temp["7/2021"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="7/2021")

layout = go.Layout(title="ROS Index Download Percentage: March 2020 vs. March 2021",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


KeyError: '7/2021'

In [173]:
cd ../data/


/home/kscottz/Code/ros_metrics_analysis/data


In [174]:
ls

2019_total.txt  2020_total.txt  march_2020.txt  march_2021.txt
