In [61]:
import numpy as np
import pandas as pd

In [53]:
path = "../data/2020_total.txt"

def process_package_dump(fname,date):
    package_counts = [] 
    distro_counts = []
    arch_counts = []
    arch_x_os_counts = []
    stats = []
    with open(fname,'r') as fp:
        # remove the first line
        f = fp.readline()
        # remove all the empty 
        f = fp.readline()
        while "not a ros package name" in f:
            f = fp.readline()

        while "Breakdown" not in f:
            temp = f.split(":")
            parts = temp[0].split("-")
            distro = parts[1]
            package = "-".join(parts[2:])
            data = {
                "package":package,
                "distro":distro,
                "name":temp[0],
                "count":int(temp[1]),
                "date":date
            }

            package_counts.append(data)
            f = fp.readline()
        # Done with the modules 

        f = fp.readline()
        while "Breakdown" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            distro_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Results" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Unique" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_x_os_counts.append(data)
            f = fp.readline()

        f = fp.readline()    
        while len(f) > 0:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":temp[1],
                "date":date
            }
            stats.append((temp[0],temp[1]))
            f = fp.readline()
    retval = {}
    retval["package"] = package_counts
    retval["distro"] = distro_counts
    retval["arch"] = arch_counts
    retval["arch_x_os"] = arch_x_os_counts
    retval["stats"] = stats
    return retval


In [54]:
fname = "../data/2020_total.txt"
stats_2020 = process_package_dump(fname,"2020")
fname = "../data/2019_total.txt"
stats_2019 = process_package_dump(fname,"2019")

In [91]:
def dumb_find(lst, k, v):
    for i, dic in enumerate(lst):
        if dic[k] == v:
            return lst[i]
    return None

def join_stats(stats_list,idx="prct"):
    joined = []
    # LIST SHOULD BE THE NEWEST FIRST!!!
    first = stats_list[0]
    for entry in first:
        new_entry = {}
        new_entry["name"] = entry["name"]
        new_entry.update(entry)
        del new_entry[idx]
        del new_entry["date"]
        
        new_entry[entry["date"]] = entry[idx]
        for other in stats_list[1:]:
            temp = dumb_find(other,"name",entry["name"])
            if temp:
                new_entry[temp["date"]] = temp[idx]
        joined.append(new_entry)
    return joined

In [92]:
joined_distro = join_stats([stats_2020["distro"],stats_2019["distro"]])
distro_df = pd.DataFrame(data=joined_distro)
distro_df.to_csv("distro.csv")
distro_df.head()

Unnamed: 0,name,2020,2019
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.01


In [93]:
joined_arch = join_stats([stats_2020["arch"],stats_2019["arch"]])
arch_df = pd.DataFrame(data=joined_arch)
arch_df.to_csv("arch.csv")
arch_df.head()

Unnamed: 0,name,2020,2019
0,i386,0.45,1.25
1,amd64,89.17,86.91
2,armhf,1.0,2.32
3,arm64,3.7,3.62
4,source,0.0,0.0


In [94]:
joined_arch_x_os = join_stats([stats_2020["arch_x_os"],stats_2019["arch_x_os"]])
arch_os_df = pd.DataFrame(data=joined_arch_x_os)
arch_os_df.to_csv("arch_os.csv")
arch_os_df.head()

Unnamed: 0,name,2020,2019
0,bionic_amd64,50.77,30.19
1,bionic_arm64,3.01,2.03
2,bionic_armhf,0.47,0.6
3,buster_amd64,0.08,
4,buster_arm64,0.01,


In [95]:
joined_package = join_stats([stats_2020["package"],stats_2019["package"]],idx="count")
package_df = pd.DataFrame(data=joined_package)


In [96]:
package_df["YoY"] = package_df["2020"]-package_df["2019"]
package_df["YoY_Prct"] = 100.00*package_df["YoY"]/package_df["2019"]

package_df.to_csv("package.csv")
package_df.head()


Unnamed: 0,name,package,distro,2020,2019,YoY,YoY_Prct
0,python-catkin-pkg-modules,pkg-modules,catkin,2131184,1458422.0,672762.0,46.129447
1,python-catkin-pkg,pkg,catkin,2126073,1456559.0,669514.0,45.965457
2,python-rosdistro-modules,modules,rosdistro,1850443,1372700.0,477743.0,34.803162
3,python-rosdistro,,rosdistro,1848320,1371723.0,476597.0,34.744405
4,python-rospkg-modules,modules,rospkg,1772498,1179426.0,593072.0,50.2848


In [97]:
package_df.iloc[package_df["YoY_Prct"].argmin()]

name        ros-kinetic-cob-pick-place-action
package                 cob-pick-place-action
distro                                kinetic
2020                                       18
2019                                     3261
YoY                                     -3243
YoY_Prct                              -99.448
Name: 16909, dtype: object

In [101]:
kinetic = package_df[package_df["distro"] == "kinetic"]

In [102]:
kinetic.iloc[kinetic["YoY_Prct"].argmin()]

name        ros-kinetic-cob-pick-place-action
package                 cob-pick-place-action
distro                                kinetic
2020                                       18
2019                                     3261
YoY                                     -3243
YoY_Prct                              -99.448
Name: 16909, dtype: object

In [103]:
kinetic.iloc[kinetic["YoY_Prct"].argmax()]

name        ros-kinetic-cis-camera
package                 cis-camera
distro                     kinetic
2020                          2154
2019                           146
YoY                           2008
YoY_Prct                   1375.34
Name: 5522, dtype: object

In [105]:
kinetic["YoY_Prct"].median()

-22.761262082801387

In [112]:
melodic = package_df[package_df["distro"] == "melodic"]

In [114]:
melodic.iloc[melodic["YoY_Prct"].argmin()]

name        ros-melodic-dbw-mkz-twist-controller-dbgsym
package                 dbw-mkz-twist-controller-dbgsym
distro                                          melodic
2020                                                398
2019                                               1409
YoY                                               -1011
YoY_Prct                                        -71.753
Name: 9678, dtype: object