# Imports and Setup

## Core libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Machine learning

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

## Optional: for anomaly detection

In [None]:
from sklearn.ensemble import IsolationForest

## Display settings

In [None]:
sns.set(style='whitegrid')

# Load and Explore Data
## Load dataset

In [None]:
df = pd.read_csv('your_threat_data.csv')  # Replace with actual path

## Quick overview

In [None]:
print(df.shape)
print(df.columns)
df.head()

# Data Preprocessing
## Handle missing values

In [None]:
df.fillna(method='ffill', inplace=True)

## Encode categorical variables

In [None]:
df_encoded = pd.get_dummies(df, drop_first=True)

## Feature-target split

In [None]:
X = df_encoded.drop('threat_label', axis=1)  # Replace with actual label column
y = df_encoded['threat_label']

# Model Training
## Train-test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Model

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluation
## Predictions

In [None]:
y_pred = model.predict(X_test)

## Metrics

In [None]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Anomaly Detection

In [None]:
iso_forest = IsolationForest(contamination=0.01, random_state=42)
anomalies = iso_forest.fit_predict(X)

## Add anomaly flag to dataframe


In [None]:
df['anomaly_flag'] = anomalies
df[df['anomaly_flag'] == -1].head()

# Visualization
## Threat distribution

sns.countplot(x='threat_label', data=df)
plt.title('Threat Type Distribution')
plt.xticks(rotation=45)
plt.show()