# System Crisis Prediction

**This project uses the 'Systemic Crisis, Banking Crisis, inflation Crisis In Africa' dataset that was provided by Kaggle.**

- Dataset description : This dataset focuses on the Banking, Debt, Financial, Inflation and Systemic Crises that occurred, from 1860 to 2014, in 13 African countries, including: Algeria, Angola, Central African Republic, Ivory Coast, Egypt, Kenya, Mauritius, Morocco, Nigeria, South Africa, Tunisia, Zambia and Zimbabwe. The ML model objective is to predict the likelihood of a Systemic crisis emergence given a set of indicators like the annual inflation rates.

 ➡️ Dataset link

https://i.imgur.com/3XzFz3x.jpg


**Instructions**

1. Import you data and perform basic data exploration phase
2. Display general information about the dataset
3. Create a pandas profiling reports to gain insights into the dataset
4. Handle Missing and corrupted values
5. Remove duplicates, if they exist
6. Handle outliers, if they exist
7. Encode categorical features
8. Select your target variable and the features
9. Split your dataset to training and test sets
10. Based on your data exploration phase select a ML classification algorithm and train it on the training set
11. Assess your model performance on the test set using relevant evaluation metrics
12. Discuss with your cohort alternative ways to improve your model performance

In [None]:
# importing necessary libraries

import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

# Loading the dataset

In [None]:
df = pd.read_csv("African_crises_dataset.csv")

In [None]:
pd.set_option('display.max_columns', None)

#### Overview of the dataset

In [None]:
df.head()

#### More information about the dataset

In [None]:
df.shape

In [None]:
df.info()

#### Summary statistics

In [None]:
df.describe(include ='all')

#### Checking for missing values and duplicates

In [None]:
df.isnull().sum()

***There are no missing values in this dataset***

In [None]:
df.duplicated().sum()

***There are no duplicates in this dataset***

#### Encoding categorical columns

In [None]:
df["banking_crisis"].value_counts()

In [None]:
# Manual mapping
crisis_mapping = {'crisis': 1, 'no_crisis': 0}
df['bank_crisis_encoded'] = df['banking_crisis'].map(crisis_mapping)

df.head()

In [None]:
df["country"].value_counts()

In [None]:
# One-Hot Encode the 'country' column
df_encoded = pd.get_dummies(df, columns=['country'], prefix='country')

# Re-attach the original 'country' column
df_encoded['country'] = df['country']

In [None]:
df_encoded.head()

In [None]:
df["country_number"].value_counts()

In [None]:
selected_columns = ['country_number', 'year', 'systemic_crisis','exch_usd', 'domestic_debt_in_default', 'sovereign_external_debt_default', 'gdp_weighted_default', 'inflation_annual_cpi', 'independence', 'currency_crises', 'inflation_crises', 'bank_crisis_encoded','country_Algeria','country_Angola','country_Central African Republic','country_Egypt','country_Ivory Coast','country_Kenya','country_Mauritius','country_Morocco','country_Nigeria', 'country_South Africa','country_Tunisia','country_Zambia','country_Zimbabwe']

correlation_matrix = df_encoded[selected_columns].corr()

In [None]:
correlation_matrix

In [None]:
plt.figure(figsize=(20, 15)) 
sns.heatmap(correlation_matrix, 
            annot=True,
            cmap="BrBG",           
            linewidths=0.5,           
            linecolor='black',        
            cbar_kws={'label': 'Correlation Coefficient'})

plt.title('Correlation Matrix of Features', fontsize=18)
plt.xlabel('Features', fontsize=10)
plt.ylabel('Features', fontsize=10)

plt.show()

### Modelling

#### selecting features and splitting the data into training and test sets

In [None]:
#dropping unnecessary columns 
X = df_encoded.drop(columns=["systemic_crisis", "gdp_weighted_default", "country_code", "country_number", "country", "banking_crisis", "country_code"])

#selecting the target variable
y = df_encoded["systemic_crisis"]

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Verifying the features
print(X_train.columns)

### KNN
#### K-Nearest Neigbour for Classification

In [None]:
from sklearn.neighbors import KNeighborsClassifier as knn

ks = range(1, 10)
mean_accuracy = []

for n in ks:
    knn_model  = knn(n_neighbors = n)
    knn_model.fit(X_train, y_train)
    knn_ypred = knn_model.predict(X_test)
    mean_accuracy.append(accuracy_score(y_test, knn_ypred))

In [None]:
for i in range(0, len(mean_accuracy)):
    print("k = {} has a Score = {} ".format(i+1, mean_accuracy[i]))

In [None]:
# Relation between k's and their respective accuracies
plt.plot(ks, mean_accuracy)
plt.xlabel('Values of K')
plt.ylabel('Testing Accuracy Values')

In [None]:
# Final Model with best value of k
knn_model = knn(n_neighbors = 4)

In [None]:
#fitting  it into the model
knn_model.fit(X_train, y_train)

In [None]:
knn_ypred = knn_model.predict(X_test)

In [None]:
# Evaluate the model
# Accuracy
accuracy = accuracy_score(y_test,knn_ypred)
print(f"Accuracy: {accuracy}")

# Confusion Matrix
cm = confusion_matrix(y_test, knn_ypred)
print(f"Confusion Matrix:\n{cm}")

# Classification Report (includes precision, recall, and F1-score)
report = classification_report(y_test, knn_ypred)
print(f"Classification Report:\n{report}")

In [None]:
confusion_matrix = pd.crosstab(y_test, knn_ypred, rownames=['Actual'], colnames=['Predicted'])

# Adjusting figure size
plt.figure(figsize=(6, 4))  

sns.heatmap(confusion_matrix, annot=True, cmap="BrBG", linewidths=0.5, linecolor='black')
plt.title('Confusion Matrix', fontsize=15)
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('Actual', fontsize=12)
plt.show()

### SVM
Support vector machine classifier

In [None]:
from sklearn.svm import SVC 

In [None]:
from sklearn.metrics import f1_score

In [None]:
kernel_func = ['sigmoid', 'poly', 'rbf', 'linear']
accuracy_score = []

for k in kernel_func:
    svc_model = SVC(C = 0.01, gamma = 'auto', kernel = k)
    svc_model.fit(X_train, y_train)
    svc_ypred = svc_model.predict(X_test)
    accuracy_score.append(f1_score(y_test, svc_ypred, average = 'weighted'))

In [None]:
# Visualization of best kernel

y_pos = np.arange(len(kernel_func))
plt.bar(y_pos, accuracy_score, align='center', alpha = 0.8, color='green')
plt.xticks(y_pos, kernel_func)
plt.xlabel('Kernel Functions')
plt.ylabel('Accuracy')

In [None]:
svc_model = SVC(C = 0.01, gamma = 'auto', kernel = 'poly')

In [None]:
svc_model.fit(X_train, y_train)

In [None]:
svc_ypred = svc_model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
# Evaluate the model
# Accuracy
accuracy = accuracy_score(y_test,svc_ypred)
print(f"Accuracy: {accuracy}")

# Confusion Matrix
cm = confusion_matrix(y_test, svc_ypred)
print(f"Confusion Matrix:\n{cm}")

# Classification Report (includes precision, recall, and F1-score)
report = classification_report(y_test, svc_ypred)
print(f"Classification Report:\n{report}")

In [None]:
confusion_matrix = pd.crosstab(y_test, svc_ypred, rownames=['Actual'], colnames=['Predicted'])

# Adjusting figure size
plt.figure(figsize=(6, 4))  

sns.heatmap(confusion_matrix, annot=True, cmap="BrBG", linewidths=0.5, linecolor='black')
plt.title('Confusion Matrix', fontsize=15)
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('Actual', fontsize=12)
plt.show()

### Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import jaccard_score

In [None]:
depth_range = range(1, 10)
jaccard_score_ = []
f1_score_ = []

for d in depth_range:
    dt = DecisionTreeClassifier(criterion = 'gini', max_depth = d)
    dt.fit(X_train, y_train)
    dt_ypred = dt.predict(X_test)
    jaccard_score_.append(jaccard_score(y_test, dt_ypred))
    f1_score_.append(f1_score(y_test, dt_ypred, average = 'weighted'))

In [None]:
# Creating a df to check the best value of max_depth

result = pd.DataFrame([jaccard_score_, f1_score_], index = ['Jaccard', 'F1'], columns = ['d = 1','d = 2','d = 3','d = 4','d = 5','d = 6','d = 7','d = 8','d = 9'])
result.columns.name = 'Evaluation Metrices'
result

In [None]:
# Main Model
dt_model = DecisionTreeClassifier(criterion = 'gini', max_depth = 2)

In [None]:
dt_model.fit(X_train, y_train)

In [None]:
dt_ypred = dt_model.predict(X_test)

In [None]:
# Evaluate the model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Accuracy
accuracy = accuracy_score(y_test,dt_ypred)
print(f"Accuracy: {accuracy}")

# Confusion Matrix
cm = confusion_matrix(y_test, dt_ypred)
print(f"Confusion Matrix:\n{cm}")

# Classification Report (includes precision, recall, and F1-score)
report = classification_report(y_test, dt_ypred)
print(f"Classification Report:\n{report}")

In [None]:
confusion_matrix = pd.crosstab(y_test, dt_ypred, rownames=['Actual'], colnames=['Predicted'])

# Adjusting figure size
plt.figure(figsize=(6, 4))  

sns.heatmap(confusion_matrix, annot=True, cmap="BrBG", linewidths=0.5, linecolor='black')
plt.title('Confusion Matrix', fontsize=15)
plt.xlabel('Predicted', fontsize=12)
plt.ylabel('Actual', fontsize=12)
plt.show()