# Analysis of vulnerabilities in the Rust ecosystem

This notebook analyses the prevelance and types of vulnerabilities found in packages published by `Cargo`.

In [64]:
from typing import Union
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
from cvss import CVSS2, CVSS3 # converts CVSS strings to score

## Data Aggegation

Note: this data was pulled on 10/28/2023

In [71]:
def convert_severity(severity) -> int:
    """Assumes severity is of form {"type": ..., "score": ... }"""
    if severity["type"] == "CVSS_V3":
        return CVSS3(severity["score"]).scores()[0]
    else:
        return CVSS2(severity["score"]).scores()[0]


def clean_data(data):
    del data["aliases"]
    if len(data["severity"]) > 0:
        data["severity"] = convert_severity(data["severity"][0])
    else:
        data["severity"] = None
    data_affected = data["affected"][0]
    data["affected_name"] = data_affected["package"]["name"]
    data["categories"] = data_affected["database_specific"]["categories"]
    data["affected_info"] = data_affected["database_specific"]["informational"]
    data["events"] = data_affected["ranges"][0]["events"]
    del data["affected"]

    data["reference"] = data["references"][1]["url"]
    del data["references"]
    del data["database_specific"]

def json_to_osv(file_path: str) -> Union[pd.DataFrame, None]:
    """Given a file path to a json file in the OSV format, returns a populated dataframe."""
    with open(file_path, 'r') as file:
        data = json.load(file)
    if "withdrawn" in data:
        return None
    clean_data(data)
    return pd.DataFrame.from_dict(data, orient="index").T
    

In [72]:
# Folder containing RustSec advisories in the OSV format
folder_path = './advisory-db-osv/crates/'

# Read in all vulnerabilities
dataframes = []
for filename in os.listdir(folder_path):
    if filename.endswith('.json'):
        # Construct the full path to the JSON file
        file_path = os.path.join(folder_path, filename)
        # Read the JSON file into a DataFrame
        osv = json_to_osv(file_path)
        if osv is not None:
            dataframes.append(json_to_osv(file_path))
        
# Concatenate all DataFrames into a single DataFrame
df = pd.concat(dataframes, ignore_index=True)

In [76]:
df.head()
df.dtypes

id               object
modified         object
published        object
related          object
summary          object
details          object
severity         object
affected_name    object
categories       object
affected_info    object
events           object
reference        object
dtype: object