# ML Modeling: Attack Severity
Train a RandomForest classifier to predict `severity` and save the model to disk.
Make sure `gtd_cleaned.csv` is present in `data/`.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import os

base_path = '/content/drive/MyDrive/LABS/Project-1-GTD-Analytics'
df_clean = pd.read_csv(f'{base_path}/data/gtd_cleaned.csv')
print('Loaded cleaned data:', df_clean.shape)

In [None]:
print("Training ML model...")

# ML features
features = ['iyear', 'success', 'nkill', 'nwound', 'latitude', 'longitude']
X = df_clean[features].fillna(0)
y = df_clean['severity']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=42, stratify=y)

# Random Forest (handles imbalance automatically)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
rf_model.fit(X_train, y_train)

# Predictions & evaluation
y_pred = rf_model.predict(X_test)
print("Model Performance:")
print(classification_report(y_test, y_pred))

# Feature importance plot
importances = pd.DataFrame({
    'feature': features,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(data=importances, x='importance', y='feature', palette='Reds_r')
plt.title('Feature Importance: Attack Severity Prediction')
plt.tight_layout()
plt.savefig(f'{base_path}/dashboards/feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

# Save model
joblib.dump(rf_model, f'{base_path}/data/gtd_model.pkl')
print("Model saved: gtd_model.pkl")