In [1]:
import numpy as np
import pandas as pd
import nbformat
import plotly.express as px
from plotly import tools
import plotly.offline as py
import plotly.graph_objs as go


In [2]:
# Download files
# run analyze_awstats
# Use this to parse files

In [3]:
ros2 = ["ardent","bouncy","crystal","dashing","eloquent","foxy","galactic","humble","iron","jazzy","rolling"]
ros1 = ["boxturtle","cturtle","diamondback","electric","fuerte","groovy","hydro","indigo","jade","kinetic","lunar","melodic","noetic"]
# This are just values for the input files
this_year = "2024"
this_month = "October"
last_year = "2023"
last_month = "October"

In [4]:
def process_package_dump(fname,date):
    package_counts = [] 
    distro_counts = []
    arch_counts = []
    arch_x_os_counts = []
    stats = []
    with open(fname,'r') as fp:
        # remove the first line
        f = fp.readline()

        # remove all the empty 
        f = fp.readline()
        while "not a ros package name" in f:
            f = fp.readline()

        while "Breakdown" not in f:
            temp = f.split(":")
            parts = temp[0].split("-")
            distro = parts[1]
            package = "-".join(parts[2:])
            data = {
                "package":package,
                "distro":distro,
                "name":temp[0],
                "count":int(temp[1]),
                "date":date
            }

            package_counts.append(data)
            f = fp.readline()
        # Done with the modules 

        f = fp.readline()
        while "Breakdown" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            distro_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Results" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_counts.append(data)
            f = fp.readline()

        f = fp.readline()
        while "Unique" not in f:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":float(temp[1].replace("%","")),
                "date":date
            }
            arch_x_os_counts.append(data)
            f = fp.readline()

        f = fp.readline()    
        while len(f) > 0:
            temp = f.split(":")
            data = {
                "name":temp[0],
                "prct":temp[1],
                "date":date
            }
            stats.append((temp[0],temp[1]))
            f = fp.readline()
    retval = {}
    retval["package"] = package_counts
    retval["distro"] = distro_counts
    retval["arch"] = arch_counts
    retval["arch_x_os"] = arch_x_os_counts
    retval["stats"] = stats
    return retval


In [5]:
# Load and process the dumps from analyze_awstats.py as dictionaries
fname = "../scripts/October2023.txt"
stats_last = process_package_dump(fname,last_year)
fname = "../scripts/October2024.txt"
stats_this = process_package_dump(fname,this_year)

In [6]:
# print raw package counts and deb download data
print("#"*30)
print("Diff Packages")
d_this = int(stats_this["stats"][0][1])
d_last = int(stats_last["stats"][0][1])
print("{0}: {1}".format(last_year,d_last))
print("{0}: {1}".format(this_year,d_this))
print("Percent Change: {0}%".format(100.0*((d_this-d_last)/d_last)))
print("#"*30)
print("Total Deb Downloads")
d_this = int(stats_this["stats"][1][1])
d_last = int(stats_last["stats"][1][1])
print("{0}: {1}".format(last_year,d_last))
print("{0}: {1}".format(this_year,d_this))
print("Percent Change: {0}%".format(100.0*((d_this-d_last)/d_last)))

##############################
Diff Packages
2023: 26931
2024: 26978
Percent Change: 0.17452006980802792%
##############################
Total Deb Downloads
2023: 48510685
2024: 48943663
Percent Change: 0.8925415091541173%


In [7]:
# print package data
print(len(stats_last["package"]))
print(len(stats_this["package"]))
print(stats_last.keys())
print(stats_last["arch_x_os"])
stats_this

26150
26225
dict_keys(['package', 'distro', 'arch', 'arch_x_os', 'stats'])
[{'name': 'bionic_amd64', 'prct': 4.67, 'date': '2023'}, {'name': 'bionic_arm64', 'prct': 1.23, 'date': '2023'}, {'name': 'bionic_armhf', 'prct': 0.06, 'date': '2023'}, {'name': 'buster_amd64', 'prct': 0.12, 'date': '2023'}, {'name': 'buster_arm64', 'prct': 0.04, 'date': '2023'}, {'name': 'buster_armhf', 'prct': 0.0, 'date': '2023'}, {'name': 'disco_amd64', 'prct': 0.0, 'date': '2023'}, {'name': 'focal_amd64', 'prct': 36.82, 'date': '2023'}, {'name': 'focal_arm64', 'prct': 4.68, 'date': '2023'}, {'name': 'focal_armhf', 'prct': 0.14, 'date': '2023'}, {'name': 'jammy_amd64', 'prct': 40.62, 'date': '2023'}, {'name': 'jammy_arm64', 'prct': 3.15, 'date': '2023'}, {'name': 'jammy_armhf', 'prct': 0.0, 'date': '2023'}, {'name': 'stretch_amd64', 'prct': 0.07, 'date': '2023'}, {'name': 'stretch_arm64', 'prct': 0.03, 'date': '2023'}, {'name': 'trusty_amd64', 'prct': 0.22, 'date': '2023'}, {'name': 'trusty_armhf', 'prct': 0

{'package': [{'package': 'modules',
   'distro': 'rosdistro',
   'name': 'python3-rosdistro-modules',
   'count': 198217,
   'date': '2024'},
  {'package': 'core',
   'distro': 'colcon',
   'name': 'python3-colcon-core',
   'count': 137956,
   'date': '2024'},
  {'package': 'pkg-modules',
   'distro': 'catkin',
   'name': 'python3-catkin-pkg-modules',
   'count': 134237,
   'date': '2024'},
  {'package': 'modules',
   'distro': 'rosdep',
   'name': 'python3-rosdep-modules',
   'count': 132889,
   'date': '2024'},
  {'package': 'modules',
   'distro': 'rospkg',
   'name': 'python3-rospkg-modules',
   'count': 130589,
   'date': '2024'},
  {'package': 'python-setup-py',
   'distro': 'colcon',
   'name': 'python3-colcon-python-setup-py',
   'count': 129041,
   'date': '2024'},
  {'package': '',
   'distro': 'rosdep',
   'name': 'python3-rosdep',
   'count': 123021,
   'date': '2024'},
  {'package': '',
   'distro': 'rosdistro',
   'name': 'python3-rosdistro',
   'count': 118745,
   'date'

In [8]:
def dumb_find(lst, k, v):
    for i, dic in enumerate(lst):
        if dic[k] == v:
            return lst[i]
    return None

def join_stats(stats_list,idx="prct"):
    # join two lists of data into a single dictionary
    joined = []
    # LIST SHOULD BE THE NEWEST FIRST!!!
    first = stats_list[0]
    for entry in first:
        new_entry = {}
        new_entry["name"] = entry["name"]
        new_entry.update(entry)
        del new_entry[idx]
        del new_entry["date"]
        
        new_entry[entry["date"]] = entry[idx]
        for other in stats_list[1:]:
            temp = dumb_find(other,"name",entry["name"])
            if temp:
                new_entry[temp["date"]] = temp[idx]
        joined.append(new_entry)
    return joined

In [9]:
# Join together our distro data and make it into a CSV file 
joined_distro = join_stats([stats_this["distro"],stats_last["distro"]])
distro_df = pd.DataFrame(data=joined_distro)
distro_df.to_csv("distro.csv")
distro_df

Unnamed: 0,name,2024,2023
0,boxturtle,0.0,0.0
1,cturtle,0.0,0.0
2,diamondback,0.0,0.0
3,electric,0.0,0.0
4,fuerte,0.0,0.0
5,groovy,0.0,0.0
6,hydro,0.0,0.0
7,indigo,0.05,0.24
8,jade,0.0,0.0
9,kinetic,0.39,1.14


In [10]:
# Repeat that process for arch
joined_arch = join_stats([stats_this["arch"],stats_last["arch"]])
arch_df = pd.DataFrame(data=joined_arch)
arch_df.to_csv("arch.csv")
arch_df.head()

Unnamed: 0,name,2024,2023
0,i386,0.04,0.12
1,amd64,80.75,83.69
2,armhf,0.12,0.27
3,arm64,11.41,9.18
4,source,0.0,0.0


In [11]:
# Join arch x os data
joined_arch_x_os = join_stats([stats_this["arch_x_os"],stats_last["arch_x_os"]])
arch_os_df = pd.DataFrame(data=joined_arch_x_os)
arch_os_df.to_csv("arch_os.csv")
arch_os_df

Unnamed: 0,name,2024,2023
0,bionic_amd64,1.89,4.67
1,bionic_arm64,0.6,1.23
2,bionic_armhf,0.01,0.06
3,buster_amd64,0.01,0.12
4,buster_arm64,0.01,0.04
5,disco_amd64,0.0,0.0
6,focal_amd64,25.06,36.82
7,focal_arm64,3.63,4.68
8,focal_armhf,0.08,0.14
9,jammy_amd64,41.57,40.62


In [12]:
# finally do this at the package level
joined_package = join_stats([stats_this["package"],stats_last["package"]],idx="count")
package_df = pd.DataFrame(data=joined_package)


In [13]:
# Add in YOY differences and package percents
package_df["YoY"] = package_df[this_year]-package_df[last_year]
package_df["YoY_Prct"] = 100.00*package_df["YoY"]/package_df[this_year]

package_df.to_csv("package.csv")
package_df.head()


Unnamed: 0,name,package,distro,2024,2023,YoY,YoY_Prct
0,python3-rosdistro-modules,modules,rosdistro,198217,128759.0,69458.0,35.041394
1,python3-colcon-core,core,colcon,137956,103375.0,34581.0,25.066688
2,python3-catkin-pkg-modules,pkg-modules,catkin,134237,175130.0,-40893.0,-30.463285
3,python3-rosdep-modules,modules,rosdep,132889,123786.0,9103.0,6.850078
4,python3-rospkg-modules,modules,rospkg,130589,140266.0,-9677.0,-7.410272


# Gazebo Package Download Stats

In [14]:
set_gazebo = set([f for f in package_df["name"].tolist() if "gazebo" in f and "rmf" not in f ])
print(set_gazebo)
print(len(set_gazebo))
set_ignition = set([f for f in package_df["name"].tolist() if "ignition" in f and "rmf" not in f])
print(set_ignition)
print(len(set_ignition))
set_gz = set([f for f in package_df["name"].tolist() if "gz" in f and "rmf" not in f and "classic" not in f])
print(set_gz)
print(len(set_gz))
set_ign = set([f for f in package_df["name"].tolist() if "ign" in f and "rmf" not in f ])
print(set_ign)
print(len(set_ign))


{'ros-kinetic-ainstein-radar-gazebo-plugins', 'ros-melodic-gazebo-msgs', 'ros-noetic-gazebo-video-monitor-plugins', 'ros-kinetic-ur-e-gazebo', 'ros-foxy-husky-gazebo', 'ros-galactic-velodyne-gazebo-plugins-dbgsym', 'ros-galactic-ros-ign-gazebo-demos', 'ros-iron-gazebo-ros-pkgs', 'ros-noetic-gazebo-video-monitors', 'ros-kinetic-agvs-gazebo', 'ros-melodic-rotors-gazebo', 'ros-melodic-cob-gazebo-worlds', 'ros-rolling-velodyne-gazebo-plugins', 'ros-humble-irobot-create-gazebo-bringup', 'ros-indigo-gazebo-state-plugins', 'ros-rolling-turtlebot3-gazebo-dbgsym', 'ros-iron-gazebo-ros2-control-demos-dbgsym', 'ros-indigo-innok-heros-gazebo', 'ros-indigo-rotors-gazebo-plugins', 'ros-noetic-open-manipulator-p-gazebo', 'ros-noetic-qb-device-gazebo', 'ros-noetic-rm-gazebo-dbgsym', 'ros-indigo-youbot-gazebo-robot', 'ros-noetic-audibot-gazebo-dbgsym', 'ros-melodic-ridgeback-gazebo-plugins', 'ros-noetic-warthog-gazebo', 'ros-humble-gazebo-set-joint-positions-plugin-dbgsym', 'ros-iron-ros-ign-gazebo-dem

In [15]:
# Now let's clean up these sets
set_ign_gazebo = set_gazebo.intersection(set_ign)
# Remove ign_gazebo from gazebo to give us just gazebo classic
set_gazebo_classic = set_gazebo-set_ign_gazebo

set_new_gazebo = set_ign_gazebo.union(set_ign,set_gz,set_ignition)
print("Set of new Gazebo packages, formerly Ignition")
print(len(set_new_gazebo))
print(set_new_gazebo)
print("----------------")
print("Set of Gazebo Classic packages")
print(len(set_gazebo_classic))
print(set_gazebo_classic)
print("----------------")
print("Sanity Check -- should be empty")
print(set_gazebo_classic.intersection(set_new_gazebo))

Set of new Gazebo packages, formerly Ignition
279
{'ros-rolling-test-ros-gz-bridge', 'ros-humble-turtlebot4-ignition-toolbox-dbgsym', 'ros-rolling-ros-gz', 'ros-rolling-gz-ogre-next-vendor', 'ros-jazzy-turtlebot4-gz-toolbox', 'ros-iron-ros-ign-image-dbgsym', 'ros-galactic-turtlebot4-ignition-toolbox-dbgsym', 'ros-galactic-ros-ign-gazebo-demos', 'ros-jazzy-gz-utils-vendor-dbgsym', 'ros-noetic-ros-ign-gazebo-demos', 'ros-jazzy-ros-gz-sim-demos', 'ros-humble-irobot-create-ignition-bringup', 'ros-galactic-irobot-create-ignition-bringup', 'ros-foxy-ros-ign-image-dbgsym', 'ros-rolling-gz-transport-vendor', 'ros-humble-ign-ros2-control-dbgsym', 'ros-jazzy-gz-sensors-vendor', 'ros-rolling-gz-fuel-tools-vendor-dbgsym', 'ros-rolling-ign-rviz-plugins-dbgsym', 'ros-iron-ros-ign-gazebo-demos', 'ros-humble-irobot-create-ignition-plugins', 'ros-iron-ros-ign-gazebo-dbgsym', 'ros-iron-ros-ign-gazebo', 'ros-rolling-gz-sensors-vendor-dbgsym', 'ros-rolling-gz-math-vendor', 'ros-humble-ros-gz-interfaces', 

In [16]:
# Make dataframes of classic and new gazebo packages
classic_df = package_df[package_df['name'].isin(set_gazebo_classic)]
gazebo_df = package_df[package_df['name'].isin(set_new_gazebo)]

In [17]:
classic_last = classic_df[last_year].sum()
classic_this = classic_df[this_year].sum()
gazebo_last = gazebo_df[last_year].sum()
gazebo_this = gazebo_df[this_year].sum()
print("Gazebo Classic Package Downloads {0} {1}: {2}".format(last_month, last_year, int(classic_last)))
print("Gazebo Classic Package Downloads {0} {1}: {2}".format(this_month, this_year, int(classic_this)))
classic_change = (classic_this - classic_last) / classic_last
print("Percent Change Between {0} {1} and {2} {3}: {4:.2f}%".format(last_month, last_year, this_month, this_year, 100.0*classic_change))
print("---------------------------------------------------")
print("Ign Gazebo Package Downloads {0} {1}: {2}".format(last_month, last_year, int(gazebo_last)))
print("Ign Gazebo Package Downloads {0} {1}: {2}".format(this_month, this_year, int(gazebo_this)))
gazebo_change = (gazebo_this - gazebo_last) / gazebo_last
print("Percent Change Between {0} {1} and {2} {3}: {4:.2f}%".format(last_month, last_year, this_month, this_year, 100.0*gazebo_change))
print("---------------------------------------------------")
sum_last = gazebo_last + classic_last
gazebo_share_last = gazebo_last / sum_last
classic_share_last = classic_last / sum_last
print("In {0} {1} Gazebo Classic was {2:.2f}% of all Gazebo Downloads".format(last_month,last_year,classic_share_last*100.0))
print("In {0} {1} New Gazebo     was {2:.2f}% of all Gazebo Downloads".format(last_month,last_year,gazebo_share_last*100.0))
print("---------------------------------------------------")
sum_this = gazebo_this + classic_this
gazebo_share_this = gazebo_this / sum_this
classic_share_this = classic_this / sum_this
print("In {0} {1} Gazebo Classic was {2:.2f}% of all Gazebo Downloads".format(this_month,this_year,classic_share_this*100.0))
print("In {0} {1} New Gazebo     was {2:.2f}% of all Gazebo Downloads".format(this_month,this_year,gazebo_share_this*100.0))
print("---------------------------------------------------")
total_gz_last = classic_last + gazebo_last 
total_gz_this = classic_this + gazebo_this
total_gz_change = (total_gz_this - total_gz_last) / total_gz_last
print("TOTAL Gazebo Downloads in {0} {1}: {2}".format(last_month,last_year,int(total_gz_last)))
print("TOTAL Gazebo Downloads in {0} {1}: {2}".format(this_month,this_year,total_gz_this))
print("Change in TOTAL Gazebo Downloads Between {0} {1} and {2} {3}: {4:.2f}%".format(last_month,last_year,this_month,this_year,total_gz_change*100.0))
print("---------------------------------------------------")
print("*ESTIMATED* Annual Gazebo Downloads: {0}".format(12*total_gz_this))


Gazebo Classic Package Downloads October 2023: 527526
Gazebo Classic Package Downloads October 2024: 476002
Percent Change Between October 2023 and October 2024: -9.77%
---------------------------------------------------
Ign Gazebo Package Downloads October 2023: 210691
Ign Gazebo Package Downloads October 2024: 416586
Percent Change Between October 2023 and October 2024: 97.72%
---------------------------------------------------
In October 2023 Gazebo Classic was 71.46% of all Gazebo Downloads
In October 2023 New Gazebo     was 28.54% of all Gazebo Downloads
---------------------------------------------------
In October 2024 Gazebo Classic was 53.33% of all Gazebo Downloads
In October 2024 New Gazebo     was 46.67% of all Gazebo Downloads
---------------------------------------------------
TOTAL Gazebo Downloads in October 2023: 738217
TOTAL Gazebo Downloads in October 2024: 892588
Change in TOTAL Gazebo Downloads Between October 2023 and October 2024: 20.91%
-------------------------

In [18]:
package_df[0:20]

Unnamed: 0,name,package,distro,2024,2023,YoY,YoY_Prct
0,python3-rosdistro-modules,modules,rosdistro,198217,128759.0,69458.0,35.041394
1,python3-colcon-core,core,colcon,137956,103375.0,34581.0,25.066688
2,python3-catkin-pkg-modules,pkg-modules,catkin,134237,175130.0,-40893.0,-30.463285
3,python3-rosdep-modules,modules,rosdep,132889,123786.0,9103.0,6.850078
4,python3-rospkg-modules,modules,rospkg,130589,140266.0,-9677.0,-7.410272
5,python3-colcon-python-setup-py,python-setup-py,colcon,129041,63042.0,65999.0,51.14576
6,python3-rosdep,,rosdep,123021,101911.0,21110.0,17.159672
7,python3-rosdistro,,rosdistro,118745,57167.0,61578.0,51.857341
8,python3-catkin-pkg,pkg,catkin,98862,122626.0,-23764.0,-24.037547
9,python3-colcon-ros,ros,colcon,98287,101902.0,-3615.0,-3.678004


In [19]:
package_df[package_df['name'].str.contains("RPM")]

Unnamed: 0,name,package,distro,2024,2023,YoY,YoY_Prct


In [20]:
# break down data by distro
just_foxy = package_df[package_df["distro"]=="foxy"]
just_foxy[0:20]

Unnamed: 0,name,package,distro,2024,2023,YoY,YoY_Prct
1300,ros-foxy-libstatistics-collector,libstatistics-collector,foxy,8355,13369.0,-5014.0,-60.011969
1303,ros-foxy-statistics-msgs,statistics-msgs,foxy,8345,13369.0,-5024.0,-60.203715
1305,ros-foxy-rclcpp,rclcpp,foxy,8342,13367.0,-5025.0,-60.237353
1383,ros-foxy-ament-cmake-core,ament-cmake-core,foxy,7550,14108.0,-6558.0,-86.860927
1391,ros-foxy-ament-cmake,ament-cmake,foxy,7541,13531.0,-5990.0,-79.432436
1400,ros-foxy-ros-workspace,ros-workspace,foxy,7532,14981.0,-7449.0,-98.898035
1412,ros-foxy-ament-cmake-export-include-directories,ament-cmake-export-include-directories,foxy,7481,13276.0,-5795.0,-77.462906
1413,ros-foxy-ament-cmake-export-libraries,ament-cmake-export-libraries,foxy,7477,12970.0,-5493.0,-73.465294
1415,ros-foxy-ament-cmake-export-link-flags,ament-cmake-export-link-flags,foxy,7475,12787.0,-5312.0,-71.063545
1417,ros-foxy-ament-cmake-python,ament-cmake-python,foxy,7474,12792.0,-5318.0,-71.153332


In [21]:
# package with the highest yoy prct change
package_df.iloc[package_df["YoY_Prct"].argmin()]

name        ros-melodic-ros-pytest
package                 ros-pytest
distro                     melodic
2024                             1
2023                         647.0
YoY                         -646.0
YoY_Prct                  -64600.0
Name: 24821, dtype: object

In [22]:
just_foxy = package_df[package_df["distro"] == "foxy"]

In [23]:
# find the foxy pkg with hightest yoy prct change
just_foxy.iloc[just_foxy["YoY_Prct"].argmin()]

name        ros-foxy-ibeo-msgs
package              ibeo-msgs
distro                    foxy
2024                        11
2023                    2515.0
YoY                    -2504.0
YoY_Prct         -22763.636364
Name: 16712, dtype: object

In [24]:
just_foxy.iloc[just_foxy["YoY_Prct"].argmax()]

name        ros-foxy-py-trees-ros
package              py-trees-ros
distro                       foxy
2024                          994
2023                         54.0
YoY                         940.0
YoY_Prct                94.567404
Name: 3638, dtype: object

In [25]:
just_foxy["YoY_Prct"].median()

-15.384615384615385

In [26]:
melodic = package_df[package_df["distro"] == "melodic"]

In [27]:
melodic.iloc[melodic["YoY_Prct"].argmin()]

name        ros-melodic-ros-pytest
package                 ros-pytest
distro                     melodic
2024                             1
2023                         647.0
YoY                         -646.0
YoY_Prct                  -64600.0
Name: 24821, dtype: object

In [28]:
# Let's look at Desktop installs, we would hope that those would be going up. 

# get the "pure" desktop installs 
exclude = ["warthog","turtlebot4","ridgeback","leo","dingo","mrp2","jackal","bwi","clearpath","industrial"
           "husky","moose","kobuki","care","pr2","heron","rosh","grizzly","husky","baxter","webots","create"]
package_names = package_df["name"].tolist()
pure_desktops = []
extended_desktops = []
for p in package_names:
    if "desktop" in p:
        extended_desktops.append(p)
        hit = True
        for e in exclude:
            if e in p:
                hit = False
                break
        if hit:
            pure_desktops.append(p)
            
# You can check pure desktops or vendor desktops by toggling pure_desktops and extended desktops
desktop_df = package_df[package_df["name"].isin(pure_desktops)]

print("Desktop installs between {0} and {1}".format(last_year,this_year))
desk_last = int(desktop_df["2023"].sum())
desk_this = int(desktop_df["2024"].sum())
print("{0}: {1}".format(this_year,desk_this))
print("{0}: {1}".format(last_year,desk_last))
print("Change Prct: {0}".format((100.00*(desk_this-desk_last)/desk_last)))


Desktop installs between 2023 and 2024
2024: 157787
2023: 185469
Change Prct: -14.925405323800742


In [60]:
# calculate the total number of packages (ROS 1 + ROS 2 + Python + Gazebo) downloaded for this month
# this year and last, calculate the percent change
r2 = package_df[package_df["distro"].isin(ros2)]
r1 = package_df[package_df["distro"].isin(ros1)]
print("Total Packages Downloaded {0} {1}".format(this_month,this_year))
total_this = package_df[this_year].sum()
print(total_this)

print("Total Packages Downloaded {0} {1}".format(last_month,last_year))
total_last = package_df[last_year].sum()
print(total_last)

print("Total Packages Downloaded Change Prct")
print(100*(total_this-total_last)/(total_last))

print("Estimated total Packages in {0}".format(this_year))
print(12*total_this)

print("Estimated total Packages in {0}".format(last_year))
print(12*total_last)


Total Packages Downloaded October 2024
47206990
Total Packages Downloaded October 2023
46472908.0
Total Packages Downloaded Change Prct
1.5795912749854173
Estimated total Packages in 2024
566483880
Estimated total Packages in 2023
557674896.0


In [30]:
# Calculate ROS 1 packages downloaded this year and last year, along with percent change
print("ROS 1 Packages Downloaded {0} {1}".format(last_month,last_year))
r1sum_last = r1[last_year].sum()
print(r1sum_last)

print("ROS 1 Packages Downloaded {0} {1}".format(this_month,this_year))
r1sum_this = r1[this_year].sum()
print(r1sum_this)

print("ROS 1 Packages Downloaded Change")
print(100*(r1sum_this-r1sum_last)/(r1sum_last))

ROS 1 Packages Downloaded October 2023
18206261.0
ROS 1 Packages Downloaded October 2024
12207096
ROS 1 Packages Downloaded Change
-32.95110951117311


In [31]:
# Calculate ROS 2 packages downloaded this year and last year, along with percent change
# NOTE ROS 1 + ROS 2 != Total Packages
# Total Pkgs = ROS1 + ROS2 + [Gazebo/Colcon/Ament/Catkin/Python]
print("ROS 2 Packages Downloaded {0} {1}".format(last_month,last_year))
r2sum_last = r2[last_year].sum()
print(r2sum_last)

print("ROS 2 Packages Downloaded {0} {1}".format(this_month,this_year))
r2sum_this = r2[this_year].sum()
print(r2sum_this)

print("ROS 2 Packages Downloaded Change")
print(100*(r2sum_this-r2sum_last)/(r2sum_last))

print("Esitmated ROS 2 Packages Downloaded in {0}".format(this_year))
print(r2sum_this*12)

ROS 2 Packages Downloaded October 2023
25046489.0
ROS 2 Packages Downloaded October 2024
31289068
ROS 2 Packages Downloaded Change
24.92396838534934
Esitmated ROS 2 Packages Downloaded in 2024
375468816


In [32]:
print("{0} {1} ROS 1 + ROS 2 Total Packages Downloaded".format(last_month,last_year))
print(r1sum_last+r2sum_last)

print("{0} {1} ROS 1 + ROS 2 Total Packages Downloaded".format(this_month,this_year))
print(r1sum_this+r2sum_this)

print("{0} {1} Non-ROS Packages Downloaded".format(last_month,last_year))
print(total_last-(r1sum_last+r2sum_last))

print("{0} {1} Non-ROS Packages Downloaded".format(this_month,this_year))
non_ros_this = total_this-(r1sum_this+r2sum_this)
print(non_ros_this)
#print(total_this-(r1sum_this+r2sum_this))

print("{0} {1} Total ROS Packages Downloaded".format(this_month,this_year))
total_ros_this = r1sum_this+r2sum_this
print(total_ros_this)
print("===============================")

October 2023 ROS 1 + ROS 2 Total Packages Downloaded
43252750.0
October 2024 ROS 1 + ROS 2 Total Packages Downloaded
43496164
October 2023 Non-ROS Packages Downloaded
3220158.0
October 2024 Non-ROS Packages Downloaded
3710826
October 2024 Total ROS Packages Downloaded
43496164


In [33]:
# Calculate the percent changes in ROS 1 and ROS 2 packages

print("Percent ROS 2 Pkgs Downloaded {0} {1}".format(last_month,last_year))
prct_r2_last = r2sum_last/(r1sum_last+r2sum_last)*100
print(prct_r2_last)

print("Percent ROS 1 Pkgs Downloaded {0} {1}".format(last_month,last_year))
prct_r1_last = r1sum_last/(r1sum_last+r2sum_last)*100
print(r1sum_last/(r1sum_last+r2sum_last)*100)

print("============================")
print("Percent ROS 2 Pkgs Downloaded {0} {1}".format(this_month,this_year))
prct_r2_this = r2sum_this/(r1sum_this+r2sum_this)*100
print(prct_r2_this)

print("Percent ROS 1 Pkgs Downloaded {0} {1}".format(this_month,this_year))
prct_r1_this = r1sum_this/(r1sum_this+r2sum_this)*100
print(prct_r1_this)

print("============================")

print("YoY change in ROS 2 downloads as % (last year minus this year)")
print(prct_r2_this-prct_r2_last)

print("YoY change in ROS 1 downloads as %")
print(prct_r1_this-prct_r1_last)


Percent ROS 2 Pkgs Downloaded October 2023
57.907275259954574
Percent ROS 1 Pkgs Downloaded October 2023
42.092724740045426
Percent ROS 2 Pkgs Downloaded October 2024
71.9352354842142
Percent ROS 1 Pkgs Downloaded October 2024
28.064764515785807
YoY change in ROS 2 downloads as % (last year minus this year)
14.02796022425963
YoY change in ROS 1 downloads as %
-14.027960224259619


In [34]:
r2_this = (r2sum_this/(r1sum_this+r2sum_this)*100)
r2_last = (r2sum_last/(r1sum_last+r2sum_last)*100)
print("Percent ROS 2 in {0} {1}".format(this_year,this_month))
print(r2_this)

print("Percent ROS 2 in {0} {1}".format(last_year,last_month))
print(r2_last)

print("Change in ROS 2 between now and the last measurement")
print(r2_this-r2_last)
print("-----------------------------------")

print("Total Packages in {0} {1}".format(this_year,this_month))
print(package_df[this_year].sum())

print("==>Sanity Check ROS packages + other packages this year")
print(total_ros_this+non_ros_this)

print("ESTIMATED Total Packages in the year {0}".format(this_year))
print(12*package_df[this_year].sum())


Percent ROS 2 in 2024 October
71.9352354842142
Percent ROS 2 in 2023 October
57.907275259954574
Change in ROS 2 between now and the last measurement
14.02796022425963
-----------------------------------
Total Packages in 2024 October
47206990
==>Sanity Check ROS packages + other packages this year
47206990
ESTIMATED Total Packages in the year 2024
566483880


In [35]:
print(distro_df)
print(distro_df[this_year].sum())
print(distro_df[last_year].sum())

           name   2024   2023
0     boxturtle   0.00   0.00
1       cturtle   0.00   0.00
2   diamondback   0.00   0.00
3      electric   0.00   0.00
4        fuerte   0.00   0.00
5        groovy   0.00   0.00
6         hydro   0.00   0.00
7        indigo   0.05   0.24
8          jade   0.00   0.00
9       kinetic   0.39   1.14
10        lunar   0.00   0.04
11      melodic   2.32   5.66
12       noetic  22.18  30.51
13       ardent   0.00   0.00
14       bouncy   0.00   0.00
15      crystal   0.00   0.01
16      dashing   0.11   0.20
17     eloquent   0.09   0.16
18         foxy   3.58   6.37
19     galactic   1.25   2.33
20       humble  39.38  32.79
21         iron   5.26   4.97
22        jazzy   9.36    NaN
23      rolling   4.90   4.82
88.87
89.24000000000001


In [36]:
# Calculate percentages for active distros, old distros, 
everything_else_last = 100.0-distro_df[last_year].sum()
everything_else_this = 100.0-distro_df[this_year].sum()
print("Percentage downloads that are not ROS packages (i.e. python):")
print("{0} {1}: {2}".format(this_month,this_year,everything_else_this))
print("{0} {1}: {2}".format(last_month,last_year,everything_else_last))
print("------------")


leftovers = distro_df[distro_df[this_year]<1]
print("Distros less than one percent")
print(leftovers)

all_other_this = leftovers[this_year].sum()
all_other_last = leftovers[last_year].sum()
print("Sum of all distros less than one")

print("{0} {1}: {2}".format(this_month,this_year,all_other_this))
print("{0} {1}: {2}".format(last_month,last_year,all_other_last))
print("-------------")
temp = distro_df[distro_df[this_year]>=1]
print("Distros greater than 1%")
print(temp)

Percentage downloads that are not ROS packages (i.e. python):
October 2024: 11.129999999999995
October 2023: 10.759999999999991
------------
Distros less than one percent
           name  2024  2023
0     boxturtle  0.00  0.00
1       cturtle  0.00  0.00
2   diamondback  0.00  0.00
3      electric  0.00  0.00
4        fuerte  0.00  0.00
5        groovy  0.00  0.00
6         hydro  0.00  0.00
7        indigo  0.05  0.24
8          jade  0.00  0.00
9       kinetic  0.39  1.14
10        lunar  0.00  0.04
13       ardent  0.00  0.00
14       bouncy  0.00  0.00
15      crystal  0.00  0.01
16      dashing  0.11  0.20
17     eloquent  0.09  0.16
Sum of all distros less than one
October 2024: 0.64
October 2023: 1.79
-------------
Distros greater than 1%
        name   2024   2023
11   melodic   2.32   5.66
12    noetic  22.18  30.51
18      foxy   3.58   6.37
19  galactic   1.25   2.33
20    humble  39.38  32.79
21      iron   5.26   4.97
22     jazzy   9.36    NaN
23   rolling   4.90   4.82


In [37]:
# Create a short table with distro change data
# we really should merge this with the raw, per package data at some point
distro_t = distro_df[distro_df[this_year]>=1].copy()
distro_t.loc[-1]= ["all other distros",all_other_this,all_other_last]
distro_t.loc[-2]= ["other pacakges",everything_else_this,everything_else_last]  # adding a row
distro_t["YoY Change Prct"] = distro_t[this_year]-distro_t[last_year]
print(distro_t)
fname = "DistroPrctChange{0}{1}to{2}{3}.csv".format(last_month,last_year,this_month,this_year)
print("Saved to {0}".format(fname))
distro_t.to_csv(fname)
# Sanity Check, should be 100
print("------ Sanity Check")
print(distro_t.sum())


                  name   2024   2023  YoY Change Prct
 11            melodic   2.32   5.66            -3.34
 12             noetic  22.18  30.51            -8.33
 18               foxy   3.58   6.37            -2.79
 19           galactic   1.25   2.33            -1.08
 20             humble  39.38  32.79             6.59
 21               iron   5.26   4.97             0.29
 22              jazzy   9.36    NaN              NaN
 23            rolling   4.90   4.82             0.08
-1   all other distros   0.64   1.79            -1.15
-2      other pacakges  11.13  10.76             0.37
Saved to DistroPrctChangeOctober2023toOctober2024.csv
------ Sanity Check
name               melodicnoeticfoxygalactichumbleironjazzyrollin...
2024                                                           100.0
2023                                                           100.0
YoY Change Prct                                                -9.36
dtype: object


In [45]:
# Generate side by side plots of distros
temp = distro_df[distro_df[this_year]>=1].copy()
new_names = [a[0].upper()+a[1:] for a in temp["name"]]
print(new_names)
temp["name"] = pd.Series(data=new_names,copy=False,index=temp["name"].index)

new_row = pd.DataFrame(data=[{"name":"All Other Distros","2024":all_other_this,"2023":all_other_last},
                             {"name":"Python Pkgs","2024":everything_else_this,"2023":everything_else_last}])
temp = pd.concat([temp, new_row], ignore_index=True)
print(temp["name"])
print(temp)
print(temp[last_year].sum())
print(temp[this_year].sum())

title ={
    'text': "Plot Title",
    'y':0.9,
    'x':0.5,
    'xanchor': 'center',
    'yanchor': 'bottom'}

trace1 = go.Pie(values= temp[last_year], labels = temp["name"],domain=dict(x=[0, 0.5]),title="",sort= False)
trace2 = go.Pie(values= temp[this_year], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="", sort=False)
layout = go.Layout(title="Download Percentage from packages.ros.org October 2023 vs October 2024")
data = [trace1,trace2]
fig = go.Figure(data=data, layout=layout)


fig.update_layout(
    font_family="Arial",
    font_color="#22314E",
    font_size=16,
    title_font_size = 24,
    title_font_family="Arial",
    title_font_color="#22314E"
)

fig.update_traces(marker=dict(colors=['#FFAAAA','#FF6666','#CCCCFF','#9999FF',
                                      '#6666FF','#3333FF','#fcdc3f',"#7cc14b","#EE00FF","#FF00FF"]))

fig.show()


['Melodic', 'Noetic', 'Foxy', 'Galactic', 'Humble', 'Iron', 'Jazzy', 'Rolling']
0              Melodic
1               Noetic
2                 Foxy
3             Galactic
4               Humble
5                 Iron
6                Jazzy
7              Rolling
8    All Other Distros
9          Python Pkgs
Name: name, dtype: object
                name   2024   2023
0            Melodic   2.32   5.66
1             Noetic  22.18  30.51
2               Foxy   3.58   6.37
3           Galactic   1.25   2.33
4             Humble  39.38  32.79
5               Iron   5.26   4.97
6              Jazzy   9.36    NaN
7            Rolling   4.90   4.82
8  All Other Distros   0.64   1.79
9        Python Pkgs  11.13  10.76
100.0
99.99999999999999


In [58]:
trace1 = go.Pie(values= temp[this_year], labels = temp["name"],title="",sort= False)
layout = go.Layout(title="ROS Package Distro Percentages {0} {1}".format(this_month,this_year))
fig = go.Figure(data=trace1, layout=layout)


fig.update_layout(
    font_family="Arial",
    font_color="#22314E",
    font_size=16,
    title_font_size = 24,
    title_font_family="Arial",
    title_font_color="#22314E"
)

fig.update_traces(marker=dict(colors=["#b50411","#f98874","#1b6599","#3c77a9","#5e88b3","#7b9abd","#96acc7","#b0bed1"
,"#fdc6ba","#FFFF99"]))

fig.show()


In [40]:
trace2 = go.Pie(values= temp[this_year], labels = temp["name"],title="",sort= False)
layout = go.Layout(title="ROS Package Downloads by Distro {0} {1}".format(this_month,this_year))
fig = go.Figure(data=trace2, layout=layout)


fig.update_layout(
    font_family="Arial",
    font_color="#22314E",
    font_size=16,
    title_font_size = 24,
    title_font_family="Arial",
    title_font_color="#22314E"
)

fig.update_traces(marker=dict(colors=["#292f56",
"#412f4c",
"#592f43",
"#702f39",
"#882f30",
"#a02e26",
"#b82e1d",
"#cf2e13",
"#e72e0a",
"#ff2e00",]))

fig.show()


In [41]:
# Download percentages by year
temp["diff"]=temp[this_year]-temp[last_year]
temp = arch_df[arch_os_df[this_year]>0.00001]

trace1 = go.Pie(values= temp[this_year], labels = temp["name"],domain=dict(x=[0.5, 1]),title=this_year)
trace2 = go.Pie(values= temp[last_year], labels = temp["name"],domain=dict(x=[0.0,0.5]),title=last_year)

layout = go.Layout(title="ROS Packages Downloaded By Architecture - {0} {1} vs {2} {3}".format(last_month,last_year,this_year,this_month))
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


Boolean Series key will be reindexed to match DataFrame index.



In [52]:
fig = px.bar(distro_df, x="name", y=["2023", "2022"], title="Wide-Form Input")
fig.show()

ValueError: All arguments should have the same length. The length of argument `y` is 2, whereas the length of  previously-processed arguments ['name'] is 24

In [53]:
fname = "../data/march_2020.txt"
stats_mar_2020 = process_package_dump(fname,"3/2020")
fname = "../data/march_2021.txt"
stats_mar_2021 = process_package_dump(fname,"3/2021")

joined_distro = join_stats([stats_mar_2020["distro"],stats_mar_2021["distro"]])
mar_distro_df = pd.DataFrame(data=joined_distro)
mar_distro_df.to_csv("march_distro.csv")
mar_distro_df.head()
stats_mar_2021 = process_package_dump(fname,"3/2021")

In [115]:
temp = mar_distro_df[mar_distro_df["7/2021"]>0.5]

trace1 = go.Pie(values= temp["7/2020"], labels = temp["name"],domain=dict(x=[0.5,1.0]),title="7/2020")
trace2 = go.Pie(values= temp["7/2021"], labels = temp["name"],domain=dict(x=[0, 0.5]),title="7/2021")

layout = go.Layout(title="ROS Index Download Percentage: March 2020 vs. March 2021",)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
fig.show()


KeyError: '7/2021'

In [None]:
cd ../data/


In [1]:
stats_this

NameError: name 'stats_this' is not defined