In [1]:
from typing import Dict
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from plot import plot_hbar_groups, plot_hlines

In [7]:
COLORS = {
    "chainguard": "blue",
    "rapidfort": "firebrick",
    "alpine": "pink",
    "original": "gray"
}

def format_df(df: pd.DataFrame, column: str) -> Dict:
    registries = df["registry_type"].unique()
    images = df["image_type"].unique()
    members = {r: [] for r in registries}

    for reg in registries:
        for img in images:
            tmask = df["registry_type"] == reg
            imask = df["image_type"] == img
            filtered = df[tmask & imask][column]
            if filtered.shape[0] == 0:
                val = 0
            else:
                val = filtered.iloc[0]
            members[reg].append(val)
    
    return members, images

def plot_df(df: pd.DataFrame, column: str):
    _, ax = plt.subplots(figsize=(10,20))
    members, groups = format_df(df, column)
    plot_hlines(ax, members, "original", groups, COLORS)

In [58]:
# Image size by registry
sz_df = pd.read_csv(os.path.join("data", "out", "metadata.csv"))
sz_df["image_size"] = sz_df["image_size"] / 1000000

comps_df = pd.read_csv(os.path.join("data", "out", "components.csv"))
comps_df = comps_df.drop(["type"], axis=1) \
                    .groupby(["registry_type", "image_type"]) \
                    .count() \
                    .reset_index() \
                    .rename(columns={"component_name": "n_components"})

vulns_df = pd.read_csv(os.path.join("data", "out", "vulns.csv"))
vulns_df = vulns_df.drop(["type"], axis=1) \
                   .groupby(["registry_type", "image_type"]) \
                   .count() \
                   .reset_index() \
                   .rename(columns={"severity": "n_vulns"})

df = vulns_df.merge(sz_df, how="left",
                    on=["registry_type", "image_type"]) \
             .merge(comps_df, how="left",
                    on=["registry_type", "image_type"])

df["vpm"] = df["n_vulns"] / df["image_size"]
df["vpc"] = df["n_vulns"] / df["n_components"]

In [60]:
# Figure 1: Image Size by Registry

sz_df.groupby("registry_type").describe()
#plot_df(sz_df, "image_size")

Unnamed: 0_level_0,image_size,image_size,image_size,image_size,image_size,image_size,image_size,image_size
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
registry_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
alpine,26.0,99.947324,83.94957,9.700118,27.651861,68.045138,153.172769,258.561366
chainguard,28.0,111.159502,96.422846,11.875829,23.449918,68.28815,185.483206,296.15853
original,28.0,199.488285,129.628374,13.987245,102.489797,150.238329,266.438794,521.292204
rapidfort,28.0,91.001123,79.405125,6.54298,23.760102,73.774307,138.587401,252.669227


In [61]:
# Figure 2: Number of Components per Image by Registry

comps_df.groupby("registry_type").describe()
# plot_df(comps_df, "n_components")

Unnamed: 0_level_0,n_components,n_components,n_components,n_components,n_components,n_components,n_components,n_components
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
registry_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
alpine,26.0,103.461538,127.859683,17.0,20.0,34.0,128.75,454.0
chainguard,28.0,128.0,129.13444,22.0,31.5,71.5,177.0,455.0
original,28.0,244.285714,213.820599,10.0,112.25,149.0,322.5,934.0
rapidfort,28.0,108.5,135.73872,4.0,17.0,49.5,150.0,476.0


In [62]:
# Figure 3: Number of Vulnerabilities per Image by Registry

vulns_df.groupby("registry_type").describe()
#plot_df(vulns_df, "n_vulns")

Unnamed: 0_level_0,n_vulns,n_vulns,n_vulns,n_vulns,n_vulns,n_vulns,n_vulns,n_vulns
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
registry_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
alpine,18.0,48.111111,82.975467,1.0,2.0,5.5,42.0,257.0
chainguard,7.0,2.714286,2.56348,1.0,1.0,2.0,3.0,8.0
original,26.0,108.615385,68.025335,2.0,72.25,103.0,137.25,263.0
rapidfort,26.0,37.807692,23.096353,1.0,24.5,30.5,49.5,93.0


In [80]:
# Figure 4: Reduction in Vulnerabilities per Component per Image by Registry

mask = df["registry_type"] == "original"
original_df = df[mask][["image_type", "vpc"]]

reduced_df = df[["image_type", "registry_type", "vpc"]] \
           .merge(original_df, how="left", on="image_type",
                  suffixes=("_reduced", "_original")) \
           .fillna(0)

reduced_df["dvpc"] = reduced_df["vpc_reduced"] - reduced_df["vpc_original"]
# reduced_df = reduced_df.drop(["vpc_reduced", "vpc_original"], axis=1)

reduced_df.groupby("registry_type").describe()
#plot_df(reduced_df, "dvpc")

Unnamed: 0_level_0,vpc_reduced,vpc_reduced,vpc_reduced,vpc_reduced,vpc_reduced,vpc_reduced,vpc_reduced,vpc_reduced,vpc_original,vpc_original,vpc_original,vpc_original,vpc_original,dvpc,dvpc,dvpc,dvpc,dvpc,dvpc,dvpc,dvpc
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
registry_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
alpine,18.0,0.832074,1.506012,0.002203,0.022478,0.064901,1.614387,5.823529,18.0,0.455625,...,0.773811,1.040816,18.0,0.376449,1.61402,-0.935553,-0.559971,-0.138375,0.890694,5.797555
chainguard,7.0,0.015376,0.013164,0.002421,0.005988,0.011594,0.020619,0.040404,7.0,0.324395,...,0.488971,0.743243,7.0,-0.309019,0.26968,-0.722625,-0.475667,-0.199771,-0.132193,-0.02502
original,26.0,0.622743,0.491023,0.025974,0.240641,0.574478,0.929204,1.75,26.0,0.622743,...,0.929204,1.75,26.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
rapidfort,26.0,0.833839,0.665455,0.032143,0.252016,0.589855,1.488636,2.058824,26.0,0.555435,...,0.884134,1.681818,26.0,0.278404,0.513283,-0.652655,-0.093609,0.148529,0.567231,1.390029
