MODEL DEVELOPMENT AND EVALUATION

In [11]:
import pandas as pd
import numpy as np

In [3]:
df=pd.read_csv('Nigeria_1997-2024_Sep20_cleaned.csv')

In [4]:
df.head()

Unnamed: 0,event_date,year,disorder_type,event_type,sub_event_type,actor1,inter1,civilian_targeting,admin1,admin2,location,latitude,longitude,fatalities
0,2024-09-20,2024,Demonstrations,Protests,Peaceful protest,Protesters (Nigeria),6,Unknown,Borno,Maiduguri Metro,Bolori,11.8826,13.089,0
1,2024-09-20,2024,Demonstrations,Protests,Peaceful protest,Protesters (Nigeria),6,Unknown,Osun,Ife Central,Ile-Ife,7.4824,4.5603,0
2,2024-09-19,2024,Demonstrations,Protests,Peaceful protest,Protesters (Nigeria),6,Unknown,Oyo,Ibadan North,Agodi,7.4035,3.9132,0
3,2024-09-19,2024,Strategic developments,Strategic developments,Disrupted weapons use,Police Forces of Nigeria (2023-),1,Unknown,Kaduna,Chikun,Buruku,10.6179,7.2331,0
4,2024-09-19,2024,Demonstrations,Protests,Peaceful protest,Protesters (Nigeria),6,Unknown,Edo,Etsako East,Wanno,7.1389,6.5724,0


FEATURE ENGINEERING

In [8]:
df.drop(columns=['event_date', 'year'], axis=1, inplace=True)

In [9]:
from sklearn.preprocessing import LabelEncoder

In [12]:
# Encode categorical columns

obj_cols = df.select_dtypes(exclude=np.number).columns

for col in obj_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

In [16]:
df.head()

Unnamed: 0,disorder_type,event_type,sub_event_type,actor1,inter1,civilian_targeting,admin1,admin2,location,latitude,longitude,fatalities
0,0,2,17,737,6,1,7,458,775,11.8826,13.089,0
1,0,2,17,737,6,1,29,286,2095,7.4824,4.5603,0
2,0,2,17,737,6,1,30,263,153,7.4035,3.9132,0
3,3,4,7,723,1,1,18,131,867,10.6179,7.2331,0
4,0,2,17,737,6,1,11,200,4918,7.1389,6.5724,0


In [14]:
from sklearn.model_selection import train_test_split

In [15]:
# Define features and target variable
X = df.drop('fatalities', axis=1)  # Feature set
y = df['fatalities']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y)

MODELLING AND EVALUATION

In [19]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor


In [20]:
# Feature Scaling (for models like SVR and KNN)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
# Function to evaluate models
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    return mse, mae, r2

In [22]:
# Dictionary to store the results
results = {}

In [23]:
# 1. Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
results['Linear Regression'] = evaluate_model(lr_model, X_test, y_test)

In [24]:
# 2. Decision Tree
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, y_train)
results['Decision Tree'] = evaluate_model(dt_model, X_test, y_test)

In [25]:
# 3. Random Forest
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)
results['Random Forest'] = evaluate_model(rf_model, X_test, y_test)

In [26]:
# 4. Gradient Boosting
gbr_model = GradientBoostingRegressor()
gbr_model.fit(X_train, y_train)
results['Gradient Boosting'] = evaluate_model(gbr_model, X_test, y_test)

In [27]:
# 5. Support Vector Regression
svr_model = SVR()
svr_model.fit(X_train_scaled, y_train)  # Scaled data for SVR
results['SVR'] = evaluate_model(svr_model, X_test_scaled, y_test)

In [28]:
# 6. K-Nearest Neighbors
knn_model = KNeighborsRegressor()
knn_model.fit(X_train_scaled, y_train)  # Scaled data for KNN
results['KNN'] = evaluate_model(knn_model, X_test_scaled, y_test)

In [29]:
# Results
for model_name, (mse, mae, r2) in results.items():
    print(f"{model_name}:\n MSE: {mse}\n MAE: {mae}\n R2: {r2}\n")

Linear Regression:
 MSE: 129.2940282547461
 MAE: 3.82215625265518
 R2: 0.05612917321157618

Decision Tree:
 MSE: 174.11555449890724
 MAE: 3.743959008103084
 R2: -0.2710764340778884

Random Forest:
 MSE: 145.38381378310837
 MAE: 3.47851862456546
 R2: -0.06132930011854243

Gradient Boosting:
 MSE: 129.43564511937348
 MAE: 3.5383121631417214
 R2: 0.05509534335178046

SVR:
 MSE: 134.58875619233228
 MAE: 2.724151105634589
 R2: 0.01747665922057684

KNN:
 MSE: 132.53918086570008
 MAE: 3.4570854058630545
 R2: 0.032438946220421316

