In [None]:
import json
import pandas as pd
import seaborn as sns
from pathlib import Path

dtypes = json.loads(Path("data/dtypes.json").read_text())
df = pd.read_csv("data/vcdb.csv", dtype=dtypes)
# Filter not confirmed incidents
df.drop(df.loc[df['security_incident.Confirmed'] == False].index, inplace=True)
# Filter old incidents
since = 2009
until = 2022
df.drop(df.loc[df['timeline.incident.year'] < since].index, inplace=True)
df.drop(df.loc[df['timeline.incident.year'] > until].index, inplace=True)
print(df.shape)

In [None]:
action_names = [x for x in list(df) if x.startswith("action.") and len(x.split(".")) == 2]
short_action_names = [x.split(".")[1] for x in action_names]
df_interesting = df[["timeline.incident.year"] + action_names]
grouped = df_interesting.groupby("timeline.incident.year")
df_plot = pd.DataFrame(index=grouped.groups.keys(), columns=short_action_names)
for group_name, group in grouped:
    total = len(group)
    action_counts = []
    for action in action_names:
        try: 
            count = group[action].value_counts()[True]
        except KeyError:
            count = 0 
        action_counts.append((count * 100) / total)
    print(group_name, total, len(action_counts))
    df_plot.loc[group_name] = action_counts


In [None]:
df_plot.plot(kind="line", figsize=(20,8))

In [None]:
display(
    df_plot.columns,
    df_plot.index,
    df_plot.head(30))