In [None]:
import json
import pandas as pd
import seaborn as sns
from pathlib import Path

dtypes = json.loads(Path("data/dtypes.json").read_text())
df = pd.read_csv("data/vcdb.csv", dtype=dtypes)

# Filter not confirmed incidents
df.drop(df.loc[df['security_incident.Confirmed'] == False].index, inplace=True)
# Filter old incidents
since = 2009
until = 2022
df.drop(df.loc[df['timeline.incident.year'] < since].index, inplace=True)
df.drop(df.loc[df['timeline.incident.year'] > until].index, inplace=True)
print(df.shape)

In [None]:
action_names = [x for x in list(df) if x.startswith("action.") and len(x.split(".")) == 2]
df_interesting = df[["timeline.incident.year"] + action_names ]
action_counts = []
for action in action_names:
    count = df_interesting[action].value_counts()[True] 
    action_counts.append(count)
short_action_names = [x.split(".")[1] for x in action_names]
results = pd.DataFrame(data=action_counts, index=short_action_names, columns=["count"]).sort_values(by="count", ascending=False)
results.head(10)

In [None]:

sns.set_theme()
plot = sns.barplot(data=results, x=results.index, y="count")
plot.set(
    xlabel = "Categories", 
    ylabel = "Events count", 
    title = "Category occurrence count"
)
plot.set_xticks(plot.get_xticks()) 
plot.set_xticklabels(plot.get_xticklabels(), rotation=45)
display(plot)