In [None]:
import json
from pathlib import Path
import pandas as pd
import seaborn as sns
import ipywidgets as widgets

dtypes = json.loads(Path("data/dtypes.json").read_text())
df = pd.read_csv("data/vcdb.csv", dtype=dtypes)
# Filter not confirmed incidents
df.drop(df.loc[df['security_incident.Confirmed'] == False].index, inplace=True)
# Filter old incidents
since = 2009
until = 2022
df.drop(df.loc[df['timeline.incident.year'] < since].index, inplace=True)
df.drop(df.loc[df['timeline.incident.year'] > until].index, inplace=True)
print(df.shape)

In [None]:
category_names = [x for x in list(df) if x.startswith("action.") and len(x.split(".")) == 2]
incident_timeline = [x for x in list(df) if x.startswith("timeline.incident")]
incident_status = [x for x in list(df) if x.startswith("security_incident.")]
actions = [x for x in list(df) if x.startswith("action.")]
style = {'description_width': 'initial'}
category = widgets.Dropdown(
    options=category_names,
    value='action.Hacking',
    description='Action categories:',
    disabled=False,
    style=style
    )
details = widgets.Dropdown(
    options=["notes", "result","variety", "vector"],
    value='variety',
    description='Category details:',
    disabled=False, 
    style=style
    )
display(category, details)

In [None]:
df_base = df.copy()
df_base.drop(df_base.loc[df[category.value] == False].index, inplace=True)
category_details = [x for x in list(df_base) if x.startswith(f"{category.value.lower()}.{details.value}.")]
short_category_details = [x.split(".")[-1] for x in category_details]
df_base = df_base[["timeline.incident.year"] + category_details]
grouped = df_base.groupby("timeline.incident.year")
df_plot = pd.DataFrame(index=grouped.groups.keys(), columns=short_category_details)
for year, incidents in grouped:
    total = len(incidents)
    action_counts = []
    for detail in category_details:
        try:
            count = incidents[detail].value_counts()[True]
        except KeyError:
            count = 0 
        action_counts.append((count * 100) / total)
    print(year, total, len(action_counts))
    df_plot.loc[year] = action_counts


In [None]:
# define threshold
threshold = 10
# loop through each column in dataframe
for col in df_plot:
    # get the maximum value in column
    # check if it is less than or equal to the defined threshold
    if df_plot[col].max() <= threshold:
        # if true, drop the column
        df_plot = df_plot.drop([col], axis=1)

In [None]:
df_plot.plot(kind="line", figsize=(20,10))

In [None]:
sns.set_theme()
sns.lineplot(data=df_plot)


In [None]:
display(
    df_plot.columns,
    df_plot.index,
    df_plot.head(30),