In [None]:
%matplotlib inline
%matplotlib notebook

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
print(os.getcwd())

In [None]:
df = pd.read_json("../data/cleaned_cyberattacks.json")
df.head()

In [3]:
print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())

Dataset shape: (21628, 27)
Columns: ['country', 'year', 'attack_type', 'target_industry', 'financial_loss_(in_million_$)', 'number_of_affected_users', 'attack_source', 'security_vulnerability_type', 'defense_mechanism_used', 'incident_resolution_time_(in_hours)', 'id', 'title', 'category', 'scenario_description', 'tools_used', 'attack_steps', 'target_type', 'vulnerability', 'mitre_technique', 'impact', 'detection_method', 'solution', 'tags', 'source', 'main_category', 'sub_category', 'topic']


In [None]:
for col in df.select_dtypes(include=['object']).columns:
    df[col].fillna("Unknown", inplace=True)


In [None]:
# ------------------ Attack type probability ------------------
attack_counts = df['attack_type'].value_counts()
attack_probs = attack_counts / attack_counts.sum()

plt.figure(figsize=(12,6))
sns.barplot(x=attack_probs.index, y=attack_probs.values, palette="crest")
plt.title("Probability of Each Cyberattack Type")
plt.ylabel("Probability")
plt.xlabel("Attack Type")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
# ------------------ Attacks by country ------------------
if 'country' in df.columns:
    plt.figure(figsize=(14,6))
    sns.countplot(data=df, x='country', hue='attack_type', palette="muted")
    plt.title("Cyberattacks by Country")
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

In [None]:
# ------------------ Correlation heatmap ------------------
num_cols = df.select_dtypes(include=['int64','float64']).columns
if not num_cols.empty:
    plt.figure(figsize=(10,8))
    sns.heatmap(df[num_cols].corr(), annot=True, cmap="coolwarm", linewidths=0.5)
    plt.title("Correlation Heatmap of Numerical Features")
    plt.tight_layout()
    plt.show()