# Imports and Cleaning

In [68]:
import pandas as pd
import numpy as np
import plotly.express as px
import re
import matplotlib.pyplot as plt
import seaborn as sns

In [69]:
import warnings
from sklearn.exceptions import DataConversionWarning, ConvergenceWarning, UndefinedMetricWarning

# Suppress specific warnings
warnings.filterwarnings("ignore", category=DataConversionWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

In [70]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression  
from sklearn.linear_model import RidgeClassifier  
from sklearn.naive_bayes import GaussianNB  
from sklearn.svm import SVC  
from sklearn.metrics import classification_report

In [71]:
from termcolor import colored as cl  # text customization
from sklearn.metrics import roc_auc_score  # AUC metric
from sklearn.linear_model import LogisticRegression  # Logistic Regression for classification
from sklearn.linear_model import RidgeClassifier  # Ridge Classifier for classification
from sklearn.naive_bayes import GaussianNB  # Naive Bayes for classification
from sklearn.svm import SVC  # Support Vector Classifier
from sklearn.preprocessing import label_binarize #as we have multi-class classification

In [72]:
#for random state (to have it consistent throughout the model)
seed = 9

In [73]:
data = pd.read_csv("../World_Datasets/final_dataset_world.csv")

In [74]:
data

Unnamed: 0,Country,S&P Rating,Country Name,country_code,unemployment_2000,unemployment_2014,unemployment_2015,unemployment_2016,unemployment_2017,unemployment_2018,unemployment_2019,unemployment_2020,unemployment_2021,unemployment_2022,unemployment_2023,current_account_balance_1990,current_account_balance_2000,current_account_balance_2014,current_account_balance_2015,current_account_balance_2016,current_account_balance_2017,current_account_balance_2018,current_account_balance_2019,current_account_balance_2020,current_account_balance_2021,current_account_balance_2022,current_account_balance_2023,exchange_rate_usd_1990,exchange_rate_usd_2000,exchange_rate_usd_2014,exchange_rate_usd_2015,exchange_rate_usd_2016,exchange_rate_usd_2017,exchange_rate_usd_2018,exchange_rate_usd_2019,exchange_rate_usd_2020,exchange_rate_usd_2021,exchange_rate_usd_2022,exchange_rate_usd_2023,gdp_1990,gdp_2000,gdp_2014,gdp_2015,gdp_2016,gdp_2017,gdp_2018,gdp_2019,gdp_2020,gdp_2021,gdp_2022,gdp_2023,inflation_1990,inflation_2000,inflation_2014,inflation_2015,inflation_2016,inflation_2017,inflation_2018,inflation_2019,inflation_2020,inflation_2021,inflation_2022,inflation_2023,total_reserves_1990,total_reserves_2000,total_reserves_2014,total_reserves_2015,total_reserves_2016,total_reserves_2017,total_reserves_2018,total_reserves_2019,total_reserves_2020,total_reserves_2021,total_reserves_2022,total_reserves_2023,Unnamed: 73,value_counts
0,Australia,AAA,Australia,AUS,6.288000,6.078000,6.055000,5.711000,5.592000,5.300000,5.159000,6.456000,5.116000,3.701000,3.667000,-4.987491e+00,-3.721401e+00,-3.054734e+00,-4.229028e+00,-3.440490e+00,-2.722911e+00,-2.233402e+00,3.501218e-01,2.330931e+00,3.137209e+00,8.453239e-01,2.547373e-01,1.281057e+00,1.724827e+00,1.109363e+00,1.331090e+00,1.345214e+00,1.304758e+00,1.338412e+00,1.438507e+00,1.453085e+00,1.331224e+00,1.441664e+00,1.505191e+00,3.114267e+11,4.161678e+11,1.468265e+12,1.351296e+12,1.206837e+12,1.325583e+12,1.427809e+12,1.392724e+12,1.328414e+12,1.556736e+12,1.690858e+12,1.728057e+12,7.333022e+00,4.457435e+00,2.487923e+00,1.508367e+00,1.276991e+00,1.948647e+00,1.911401e+00,1.610768e+00,8.469055e-01,2.863910e+00,6.594097e+00,5.597015e+00,1.931874e+10,1.882155e+10,5.391033e+10,4.540604e+10,5.248079e+10,6.565372e+10,5.390953e+10,5.799469e+10,4.254463e+10,5.787750e+10,5.670190e+10,6.170335e+10,3.085167e+10,73.0
1,Canada,AAA,Canada,CAN,6.829000,7.023000,6.945000,7.038000,6.426000,5.837000,5.690000,9.657000,7.527000,5.280000,5.366000,-3.398803e+00,2.483228e+00,-2.318921e+00,-3.513987e+00,-3.093111e+00,-2.803427e+00,-2.385072e+00,-1.951703e+00,-2.010494e+00,1.277747e-02,-3.526320e-01,-7.295740e-01,1.166774e+00,1.485394e+00,1.104747e+00,1.278786e+00,1.325615e+00,1.297936e+00,1.295818e+00,1.326793e+00,1.341153e+00,1.253877e+00,1.301555e+00,1.349909e+00,5.960756e+11,7.447734e+11,1.805750e+12,1.556509e+12,1.527995e+12,1.649266e+12,1.725329e+12,1.743725e+12,1.655685e+12,2.007472e+12,2.161483e+12,2.142471e+12,4.780477e+00,2.719440e+00,1.906636e+00,1.125241e+00,1.428760e+00,1.596884e+00,2.268226e+00,1.949269e+00,7.169996e-01,3.395193e+00,6.802801e+00,3.879002e+00,2.352952e+10,3.242727e+10,7.469996e+10,7.975352e+10,8.271811e+10,8.667771e+10,8.392560e+10,8.529711e+10,9.042814e+10,1.066151e+11,1.069524e+11,1.175509e+11,5.877545e+10,73.0
2,Denmark,AAA,Denmark,DNK,4.476000,6.925000,6.278000,5.989000,5.833000,5.131000,5.018000,5.637000,5.043000,4.434000,5.142000,9.923083e-01,1.378800e+00,8.886178e+00,7.953938e+00,7.078509e+00,7.360245e+00,6.244573e+00,7.425270e+00,7.231130e+00,8.659145e+00,1.156562e+01,9.840846e+00,6.188558e+00,8.083144e+00,5.612467e+00,6.727907e+00,6.731718e+00,6.602893e+00,6.314619e+00,6.669447e+00,6.542152e+00,6.287113e+00,7.076152e+00,6.889703e+00,1.382177e+11,1.640438e+11,3.528326e+11,3.017589e+11,3.121818e+11,3.316106e+11,3.552934e+11,3.454015e+11,3.556310e+11,4.083782e+11,4.019456e+11,4.070919e+11,2.641603e+00,2.903282e+00,5.640205e-01,4.520342e-01,2.500000e-01,1.147132e+00,8.136095e-01,7.581316e-01,4.207120e-01,1.853045e+00,7.696567e+00,3.305178e+00,1.122584e+10,1.569595e+10,7.539189e+10,6.518509e+10,6.421581e+10,7.524445e+10,7.094208e+10,6.683555e+10,7.282335e+10,8.223584e+10,9.607255e+10,1.093708e+11,5.468542e+10,73.0
3,Germany,AAA,Germany,DEU,7.917000,4.981000,4.624000,4.122000,3.746000,3.384000,3.136000,3.856000,3.638000,3.135000,3.045000,2.612583e+00,-1.513131e+00,7.192490e+00,8.100335e+00,8.899854e+00,8.083586e+00,8.444744e+00,7.880059e+00,6.349851e+00,6.931968e+00,4.437915e+00,5.936536e+00,1.615733e+00,1.481802e+11,2.963604e+11,4.445405e+11,5.927207e+11,7.409009e+11,8.890811e+11,1.037261e+12,1.185441e+12,1.333622e+12,1.481802e+12,1.629982e+12,1.778162e+12,1.966981e+12,3.965801e+12,3.423568e+12,3.537784e+12,3.763092e+12,4.052008e+12,3.957208e+12,3.940143e+12,4.348297e+12,4.163596e+12,4.525704e+12,2.696468e+00,1.440268e+00,9.067940e-01,5.144261e-01,4.917470e-01,1.509495e+00,1.732169e+00,1.445660e+00,1.448779e-01,3.066667e+00,6.872574e+00,5.946437e+00,1.045473e+11,8.749687e+10,1.934848e+11,1.737309e+11,1.840313e+11,1.999831e+11,1.980271e+11,2.240280e+11,2.684086e+11,2.957362e+11,2.939137e+11,3.227001e+11,1.613500e+11,62.0
4,Liechtenstein,AAA,Liechtenstein,LIE,9.640537,9.136085,9.052059,8.903803,8.664479,8.383856,8.244777,9.305229,9.015766,8.230760,7.967091,1.516652e+07,3.017828e+07,4.306998e+07,5.742663e+07,7.143651e+07,8.531168e+07,9.953030e+07,1.134059e+08,1.277577e+08,1.421095e+08,1.564863e+08,1.708664e+08,1.840786e+08,2.451328e+09,4.784157e+09,7.121938e+09,9.459720e+09,1.179867e+10,1.417107e+10,1.654347e+10,1.891586e+10,2.129617e+10,2.368620e+10,2.638014e+10,1.421509e+09,2.483890e+09,6.657527e+09,6.268515e+09,6.237302e+09,6.474309e+09,6.692621e+09,6.436467e+09,6.405870e+09,7.710380e+09,7.361504e+09,7.088856e+09,6.816208e+09,6.543559e+09,6.270911e+09,5.998263e+09,5.725615e+09,5.452966e+09,5.180318e+09,4.907670e+09,4.635021e+09,4.362373e+09,4.089725e+09,3.817076e+09,3.544428e+09,3.271780e+09,2.999131e+09,2.726483e+09,2.453835e+09,2.181187e+09,1.908538e+09,1.635890e+09,1.363242e+09,1.090593e+09,8.179449e+08,5.452966e+08,2.726483e+08,13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,Sri Lanka,SD,Sri Lanka,LKA,7.740000,4.157000,4.519000,4.243000,4.046000,4.318000,4.670000,5.365000,5.258000,6.330000,6.360000,-3.713648e+00,-6.390297e+00,-2.408481e+00,-2.211361e+00,-1.979723e+00,-2.446586e+00,-2.962153e+00,-2.070888e+00,-1.408137e+00,-3.706463e+00,-1.953545e+00,1.847993e+00,4.006292e+01,7.700512e+01,1.305647e+02,1.358569e+02,1.455817e+02,1.524464e+02,1.624649e+02,1.787449e+02,1.855926e+02,1.987643e+02,3.226327e+02,3.275065e+02,8.032551e+09,1.633081e+10,8.252854e+10,8.514096e+10,8.801228e+10,9.437624e+10,9.449387e+10,8.901498e+10,8.430430e+10,8.860932e+10,7.414487e+10,8.435686e+10,2.149525e+01,6.176276e+00,3.179002e+00,3.768368e+00,3.958888e+00,7.704138e+00,2.135038e+00,3.528394e+00,6.153945e+00,7.014781e+00,4.972110e+01,1.654117e+01,4.470305e+08,1.131355e+09,8.210750e+09,7.302097e+09,6.008199e+09,7.959048e+09,6.920826e+09,7.648305e+09,5.663994e+09,3.136992e+09,2.352744e+09,1.568496e+09,7.842479e+08,71.0
127,Ukraine,SD,Ukraine,UKR,11.707000,9.270000,9.140000,9.350000,9.500000,8.799000,8.194000,9.475000,9.834000,8.411791,6.989581,5.567372e+00,4.145163e+00,-3.442597e+00,5.531085e+00,-1.998803e+00,-3.098389e+00,-4.914009e+00,-2.679957e+00,3.362966e+00,-1.943275e+00,4.923775e+00,-5.401186e+00,1.952346e-02,5.440233e+00,1.188666e+01,2.184470e+01,2.555133e+01,2.659661e+01,2.720049e+01,2.584559e+01,2.695752e+01,2.728619e+01,3.234230e+01,3.657381e+01,8.139356e+10,3.237508e+10,1.335039e+11,9.103097e+10,9.335587e+10,1.120905e+11,1.308911e+11,1.538830e+11,1.566177e+11,1.997659e+11,1.619895e+11,1.787570e+11,8.937851e+10,2.820310e+01,1.207186e+01,4.869986e+01,1.391271e+01,1.443832e+01,1.095186e+01,7.886717e+00,2.732492e+00,9.363139e+00,2.018364e+01,1.284902e+01,7.386012e+08,1.477202e+09,7.538805e+09,1.330088e+10,1.553726e+10,1.881093e+10,2.081790e+10,2.531700e+10,2.913754e+10,3.096667e+10,2.850593e+10,4.051011e+10,2.025505e+10,67.0
128,Lebanon,D,Lebanon,LBN,8.594000,8.796000,9.270000,9.760000,10.236000,10.741000,11.301000,13.235000,12.621000,11.599000,11.565000,-1.033289e+00,-1.363158e+01,-2.622987e+01,-1.710774e+01,-2.047802e+01,-2.288220e+01,-2.434704e+01,-2.182809e+01,-8.761842e+00,-1.969781e+01,-3.460623e+01,-2.810367e+01,6.950892e+02,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.419243e+09,2.838485e+09,1.726036e+10,4.809521e+10,4.992934e+10,5.114731e+10,5.302768e+10,5.490152e+10,5.160596e+10,3.171213e+10,2.313194e+10,2.099242e+10,1.574432e+10,1.049621e+10,5.248105e+09,1.854604e+00,-3.749145e+00,-7.833596e-01,4.321352e+00,6.076989e+00,3.005389e+00,8.486433e+01,1.547561e+02,1.712055e+02,2.213416e+02,4.210389e+09,8.474638e+09,5.066886e+10,4.853139e+10,5.390551e+10,5.541153e+10,5.238061e+10,5.221348e+10,4.244040e+10,3.523922e+10,3.251288e+10,2.167525e+10,1.083763e+10,66.0
129,Puerto Rico,D,Puerto Rico,PRI,10.080000,13.900000,12.000000,11.800000,10.800000,9.200000,8.300000,8.890000,7.900000,6.000000,5.962000,1.224157e+09,2.448314e+09,3.672470e+09,4.896627e+09,6.120784e+09,7.344941e+09,8.569097e+09,9.793254e+09,1.101741e+10,1.224157e+10,1.346572e+10,1.468988e+10,1.591404e+10,1.713819e+10,1.836235e+10,1.958651e+10,2.081066e+10,2.203482e+10,2.325898e+10,2.448314e+10,2.570729e+10,2.693145e+10,2.815561e+10,2.937976e+10,3.060392e+10,6.170180e+10,1.020000e+11,1.030000e+11,1.040000e+11,1.030000e+11,1.010000e+11,1.050000e+11,1.030000e+11,1.060000e+11,1.140000e+11,1.180000e+11,1.134615e+11,1.089231e+11,1.043846e+11,9.984615e+10,9.530769e+10,9.076923e+10,8.623077e+10,8.169231e+10,7.715385e+10,7.261538e+10,6.807692e+10,6.353846e+10,5.900000e+10,5.446154e+10,4.992308e+10,4.538462e+10,4.084615e+10,3.630769e+10,3.176923e+10,2.723077e+10,2.269231e+10,1.815385e+10,1.361538e+10,9.076923e+09,4.538462e+09,39.0


In [75]:
#dropping non-relevant columns
data = data.drop(['Unnamed: 73', 'value_counts', 'Country Name', 'country_code'], axis = 1)

In [76]:
print('Number of unique values: ', data['S&P Rating'].nunique())
print(data['S&P Rating'].unique())

Number of unique values:  38
['AAA' 'AA+' 'AA+\xa0' 'AA-' 'AA' 'AA\xa0' 'A+' 'NR' 'A\xa0' 'A' 'AA-\xa0'
 'A-' 'A-\xa0' 'BBB+' 'BBB' 'BBB\xa0' 'BBB+\xa0' 'BBB-' 'BBB-\xa0'
 'BB+\xa0' 'BB+' 'BB\xa0' 'BB' 'BB-\xa0' 'BB-' 'B+' 'B+\xa0' 'SD' 'B\xa0'
 'B-' 'B' 'B-\xa0' 'CCC+\xa0' 'CCC+' 'CCC' 'D' 'D\xa0' 'SD\xa0']


As 38 is too many values for a classification, we will regroup first, and drop the non-rated 'NR'.

In [77]:
data = data[data['S&P Rating'] != 'NR'] #dropping non-rated

In [78]:
data['target'] = np.where(data['S&P Rating'].str.match(r'.*A.*', na=False), 0, 
                          np.where(data['S&P Rating'].str.match(r'.*B.*', na=False), 1, 2))

We now have 3 classification possibilities. 

# Base Model

In [79]:
data

Unnamed: 0,Country,S&P Rating,unemployment_2000,unemployment_2014,unemployment_2015,unemployment_2016,unemployment_2017,unemployment_2018,unemployment_2019,unemployment_2020,unemployment_2021,unemployment_2022,unemployment_2023,current_account_balance_1990,current_account_balance_2000,current_account_balance_2014,current_account_balance_2015,current_account_balance_2016,current_account_balance_2017,current_account_balance_2018,current_account_balance_2019,current_account_balance_2020,current_account_balance_2021,current_account_balance_2022,current_account_balance_2023,exchange_rate_usd_1990,exchange_rate_usd_2000,exchange_rate_usd_2014,exchange_rate_usd_2015,exchange_rate_usd_2016,exchange_rate_usd_2017,exchange_rate_usd_2018,exchange_rate_usd_2019,exchange_rate_usd_2020,exchange_rate_usd_2021,exchange_rate_usd_2022,exchange_rate_usd_2023,gdp_1990,gdp_2000,gdp_2014,gdp_2015,gdp_2016,gdp_2017,gdp_2018,gdp_2019,gdp_2020,gdp_2021,gdp_2022,gdp_2023,inflation_1990,inflation_2000,inflation_2014,inflation_2015,inflation_2016,inflation_2017,inflation_2018,inflation_2019,inflation_2020,inflation_2021,inflation_2022,inflation_2023,total_reserves_1990,total_reserves_2000,total_reserves_2014,total_reserves_2015,total_reserves_2016,total_reserves_2017,total_reserves_2018,total_reserves_2019,total_reserves_2020,total_reserves_2021,total_reserves_2022,total_reserves_2023,target
0,Australia,AAA,6.288000,6.078000,6.055000,5.711000,5.592000,5.300000,5.159000,6.456000,5.116000,3.701000,3.667000,-4.987491e+00,-3.721401e+00,-3.054734e+00,-4.229028e+00,-3.440490e+00,-2.722911e+00,-2.233402e+00,3.501218e-01,2.330931e+00,3.137209e+00,8.453239e-01,2.547373e-01,1.281057e+00,1.724827e+00,1.109363e+00,1.331090e+00,1.345214e+00,1.304758e+00,1.338412e+00,1.438507e+00,1.453085e+00,1.331224e+00,1.441664e+00,1.505191e+00,3.114267e+11,4.161678e+11,1.468265e+12,1.351296e+12,1.206837e+12,1.325583e+12,1.427809e+12,1.392724e+12,1.328414e+12,1.556736e+12,1.690858e+12,1.728057e+12,7.333022e+00,4.457435e+00,2.487923e+00,1.508367e+00,1.276991e+00,1.948647e+00,1.911401e+00,1.610768e+00,8.469055e-01,2.863910e+00,6.594097e+00,5.597015e+00,1.931874e+10,1.882155e+10,5.391033e+10,4.540604e+10,5.248079e+10,6.565372e+10,5.390953e+10,5.799469e+10,4.254463e+10,5.787750e+10,5.670190e+10,6.170335e+10,0
1,Canada,AAA,6.829000,7.023000,6.945000,7.038000,6.426000,5.837000,5.690000,9.657000,7.527000,5.280000,5.366000,-3.398803e+00,2.483228e+00,-2.318921e+00,-3.513987e+00,-3.093111e+00,-2.803427e+00,-2.385072e+00,-1.951703e+00,-2.010494e+00,1.277747e-02,-3.526320e-01,-7.295740e-01,1.166774e+00,1.485394e+00,1.104747e+00,1.278786e+00,1.325615e+00,1.297936e+00,1.295818e+00,1.326793e+00,1.341153e+00,1.253877e+00,1.301555e+00,1.349909e+00,5.960756e+11,7.447734e+11,1.805750e+12,1.556509e+12,1.527995e+12,1.649266e+12,1.725329e+12,1.743725e+12,1.655685e+12,2.007472e+12,2.161483e+12,2.142471e+12,4.780477e+00,2.719440e+00,1.906636e+00,1.125241e+00,1.428760e+00,1.596884e+00,2.268226e+00,1.949269e+00,7.169996e-01,3.395193e+00,6.802801e+00,3.879002e+00,2.352952e+10,3.242727e+10,7.469996e+10,7.975352e+10,8.271811e+10,8.667771e+10,8.392560e+10,8.529711e+10,9.042814e+10,1.066151e+11,1.069524e+11,1.175509e+11,0
2,Denmark,AAA,4.476000,6.925000,6.278000,5.989000,5.833000,5.131000,5.018000,5.637000,5.043000,4.434000,5.142000,9.923083e-01,1.378800e+00,8.886178e+00,7.953938e+00,7.078509e+00,7.360245e+00,6.244573e+00,7.425270e+00,7.231130e+00,8.659145e+00,1.156562e+01,9.840846e+00,6.188558e+00,8.083144e+00,5.612467e+00,6.727907e+00,6.731718e+00,6.602893e+00,6.314619e+00,6.669447e+00,6.542152e+00,6.287113e+00,7.076152e+00,6.889703e+00,1.382177e+11,1.640438e+11,3.528326e+11,3.017589e+11,3.121818e+11,3.316106e+11,3.552934e+11,3.454015e+11,3.556310e+11,4.083782e+11,4.019456e+11,4.070919e+11,2.641603e+00,2.903282e+00,5.640205e-01,4.520342e-01,2.500000e-01,1.147132e+00,8.136095e-01,7.581316e-01,4.207120e-01,1.853045e+00,7.696567e+00,3.305178e+00,1.122584e+10,1.569595e+10,7.539189e+10,6.518509e+10,6.421581e+10,7.524445e+10,7.094208e+10,6.683555e+10,7.282335e+10,8.223584e+10,9.607255e+10,1.093708e+11,0
3,Germany,AAA,7.917000,4.981000,4.624000,4.122000,3.746000,3.384000,3.136000,3.856000,3.638000,3.135000,3.045000,2.612583e+00,-1.513131e+00,7.192490e+00,8.100335e+00,8.899854e+00,8.083586e+00,8.444744e+00,7.880059e+00,6.349851e+00,6.931968e+00,4.437915e+00,5.936536e+00,1.615733e+00,1.481802e+11,2.963604e+11,4.445405e+11,5.927207e+11,7.409009e+11,8.890811e+11,1.037261e+12,1.185441e+12,1.333622e+12,1.481802e+12,1.629982e+12,1.778162e+12,1.966981e+12,3.965801e+12,3.423568e+12,3.537784e+12,3.763092e+12,4.052008e+12,3.957208e+12,3.940143e+12,4.348297e+12,4.163596e+12,4.525704e+12,2.696468e+00,1.440268e+00,9.067940e-01,5.144261e-01,4.917470e-01,1.509495e+00,1.732169e+00,1.445660e+00,1.448779e-01,3.066667e+00,6.872574e+00,5.946437e+00,1.045473e+11,8.749687e+10,1.934848e+11,1.737309e+11,1.840313e+11,1.999831e+11,1.980271e+11,2.240280e+11,2.684086e+11,2.957362e+11,2.939137e+11,3.227001e+11,0
4,Liechtenstein,AAA,9.640537,9.136085,9.052059,8.903803,8.664479,8.383856,8.244777,9.305229,9.015766,8.230760,7.967091,1.516652e+07,3.017828e+07,4.306998e+07,5.742663e+07,7.143651e+07,8.531168e+07,9.953030e+07,1.134059e+08,1.277577e+08,1.421095e+08,1.564863e+08,1.708664e+08,1.840786e+08,2.451328e+09,4.784157e+09,7.121938e+09,9.459720e+09,1.179867e+10,1.417107e+10,1.654347e+10,1.891586e+10,2.129617e+10,2.368620e+10,2.638014e+10,1.421509e+09,2.483890e+09,6.657527e+09,6.268515e+09,6.237302e+09,6.474309e+09,6.692621e+09,6.436467e+09,6.405870e+09,7.710380e+09,7.361504e+09,7.088856e+09,6.816208e+09,6.543559e+09,6.270911e+09,5.998263e+09,5.725615e+09,5.452966e+09,5.180318e+09,4.907670e+09,4.635021e+09,4.362373e+09,4.089725e+09,3.817076e+09,3.544428e+09,3.271780e+09,2.999131e+09,2.726483e+09,2.453835e+09,2.181187e+09,1.908538e+09,1.635890e+09,1.363242e+09,1.090593e+09,8.179449e+08,5.452966e+08,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,Sri Lanka,SD,7.740000,4.157000,4.519000,4.243000,4.046000,4.318000,4.670000,5.365000,5.258000,6.330000,6.360000,-3.713648e+00,-6.390297e+00,-2.408481e+00,-2.211361e+00,-1.979723e+00,-2.446586e+00,-2.962153e+00,-2.070888e+00,-1.408137e+00,-3.706463e+00,-1.953545e+00,1.847993e+00,4.006292e+01,7.700512e+01,1.305647e+02,1.358569e+02,1.455817e+02,1.524464e+02,1.624649e+02,1.787449e+02,1.855926e+02,1.987643e+02,3.226327e+02,3.275065e+02,8.032551e+09,1.633081e+10,8.252854e+10,8.514096e+10,8.801228e+10,9.437624e+10,9.449387e+10,8.901498e+10,8.430430e+10,8.860932e+10,7.414487e+10,8.435686e+10,2.149525e+01,6.176276e+00,3.179002e+00,3.768368e+00,3.958888e+00,7.704138e+00,2.135038e+00,3.528394e+00,6.153945e+00,7.014781e+00,4.972110e+01,1.654117e+01,4.470305e+08,1.131355e+09,8.210750e+09,7.302097e+09,6.008199e+09,7.959048e+09,6.920826e+09,7.648305e+09,5.663994e+09,3.136992e+09,2.352744e+09,1.568496e+09,2
127,Ukraine,SD,11.707000,9.270000,9.140000,9.350000,9.500000,8.799000,8.194000,9.475000,9.834000,8.411791,6.989581,5.567372e+00,4.145163e+00,-3.442597e+00,5.531085e+00,-1.998803e+00,-3.098389e+00,-4.914009e+00,-2.679957e+00,3.362966e+00,-1.943275e+00,4.923775e+00,-5.401186e+00,1.952346e-02,5.440233e+00,1.188666e+01,2.184470e+01,2.555133e+01,2.659661e+01,2.720049e+01,2.584559e+01,2.695752e+01,2.728619e+01,3.234230e+01,3.657381e+01,8.139356e+10,3.237508e+10,1.335039e+11,9.103097e+10,9.335587e+10,1.120905e+11,1.308911e+11,1.538830e+11,1.566177e+11,1.997659e+11,1.619895e+11,1.787570e+11,8.937851e+10,2.820310e+01,1.207186e+01,4.869986e+01,1.391271e+01,1.443832e+01,1.095186e+01,7.886717e+00,2.732492e+00,9.363139e+00,2.018364e+01,1.284902e+01,7.386012e+08,1.477202e+09,7.538805e+09,1.330088e+10,1.553726e+10,1.881093e+10,2.081790e+10,2.531700e+10,2.913754e+10,3.096667e+10,2.850593e+10,4.051011e+10,2
128,Lebanon,D,8.594000,8.796000,9.270000,9.760000,10.236000,10.741000,11.301000,13.235000,12.621000,11.599000,11.565000,-1.033289e+00,-1.363158e+01,-2.622987e+01,-1.710774e+01,-2.047802e+01,-2.288220e+01,-2.434704e+01,-2.182809e+01,-8.761842e+00,-1.969781e+01,-3.460623e+01,-2.810367e+01,6.950892e+02,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.419243e+09,2.838485e+09,1.726036e+10,4.809521e+10,4.992934e+10,5.114731e+10,5.302768e+10,5.490152e+10,5.160596e+10,3.171213e+10,2.313194e+10,2.099242e+10,1.574432e+10,1.049621e+10,5.248105e+09,1.854604e+00,-3.749145e+00,-7.833596e-01,4.321352e+00,6.076989e+00,3.005389e+00,8.486433e+01,1.547561e+02,1.712055e+02,2.213416e+02,4.210389e+09,8.474638e+09,5.066886e+10,4.853139e+10,5.390551e+10,5.541153e+10,5.238061e+10,5.221348e+10,4.244040e+10,3.523922e+10,3.251288e+10,2.167525e+10,2
129,Puerto Rico,D,10.080000,13.900000,12.000000,11.800000,10.800000,9.200000,8.300000,8.890000,7.900000,6.000000,5.962000,1.224157e+09,2.448314e+09,3.672470e+09,4.896627e+09,6.120784e+09,7.344941e+09,8.569097e+09,9.793254e+09,1.101741e+10,1.224157e+10,1.346572e+10,1.468988e+10,1.591404e+10,1.713819e+10,1.836235e+10,1.958651e+10,2.081066e+10,2.203482e+10,2.325898e+10,2.448314e+10,2.570729e+10,2.693145e+10,2.815561e+10,2.937976e+10,3.060392e+10,6.170180e+10,1.020000e+11,1.030000e+11,1.040000e+11,1.030000e+11,1.010000e+11,1.050000e+11,1.030000e+11,1.060000e+11,1.140000e+11,1.180000e+11,1.134615e+11,1.089231e+11,1.043846e+11,9.984615e+10,9.530769e+10,9.076923e+10,8.623077e+10,8.169231e+10,7.715385e+10,7.261538e+10,6.807692e+10,6.353846e+10,5.900000e+10,5.446154e+10,4.992308e+10,4.538462e+10,4.084615e+10,3.630769e+10,3.176923e+10,2.723077e+10,2.269231e+10,1.815385e+10,1.361538e+10,9.076923e+09,2


In [80]:
data = data.set_index(data['Country'])
data = data.drop('Country', axis = 1)

In [81]:
data.head()

Unnamed: 0_level_0,S&P Rating,unemployment_2000,unemployment_2014,unemployment_2015,unemployment_2016,unemployment_2017,unemployment_2018,unemployment_2019,unemployment_2020,unemployment_2021,unemployment_2022,unemployment_2023,current_account_balance_1990,current_account_balance_2000,current_account_balance_2014,current_account_balance_2015,current_account_balance_2016,current_account_balance_2017,current_account_balance_2018,current_account_balance_2019,current_account_balance_2020,current_account_balance_2021,current_account_balance_2022,current_account_balance_2023,exchange_rate_usd_1990,exchange_rate_usd_2000,exchange_rate_usd_2014,exchange_rate_usd_2015,exchange_rate_usd_2016,exchange_rate_usd_2017,exchange_rate_usd_2018,exchange_rate_usd_2019,exchange_rate_usd_2020,exchange_rate_usd_2021,exchange_rate_usd_2022,exchange_rate_usd_2023,gdp_1990,gdp_2000,gdp_2014,gdp_2015,gdp_2016,gdp_2017,gdp_2018,gdp_2019,gdp_2020,gdp_2021,gdp_2022,gdp_2023,inflation_1990,inflation_2000,inflation_2014,inflation_2015,inflation_2016,inflation_2017,inflation_2018,inflation_2019,inflation_2020,inflation_2021,inflation_2022,inflation_2023,total_reserves_1990,total_reserves_2000,total_reserves_2014,total_reserves_2015,total_reserves_2016,total_reserves_2017,total_reserves_2018,total_reserves_2019,total_reserves_2020,total_reserves_2021,total_reserves_2022,total_reserves_2023,target
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1
Australia,AAA,6.288,6.078,6.055,5.711,5.592,5.3,5.159,6.456,5.116,3.701,3.667,-4.987491,-3.721401,-3.054734,-4.229028,-3.44049,-2.722911,-2.233402,0.3501218,2.330931,3.137209,0.8453239,0.2547373,1.281057,1.724827,1.109363,1.33109,1.345214,1.304758,1.338412,1.438507,1.453085,1.331224,1.441664,1.505191,311426700000.0,416167800000.0,1468265000000.0,1351296000000.0,1206837000000.0,1325583000000.0,1427809000000.0,1392724000000.0,1328414000000.0,1556736000000.0,1690858000000.0,1728057000000.0,7.333022,4.457435,2.487923,1.508367,1.276991,1.948647,1.911401,1.610768,0.8469055,2.86391,6.594097,5.597015,19318740000.0,18821550000.0,53910330000.0,45406040000.0,52480790000.0,65653720000.0,53909530000.0,57994690000.0,42544630000.0,57877500000.0,56701900000.0,61703350000.0,0
Canada,AAA,6.829,7.023,6.945,7.038,6.426,5.837,5.69,9.657,7.527,5.28,5.366,-3.398803,2.483228,-2.318921,-3.513987,-3.093111,-2.803427,-2.385072,-1.951703,-2.010494,0.01277747,-0.352632,-0.729574,1.166774,1.485394,1.104747,1.278786,1.325615,1.297936,1.295818,1.326793,1.341153,1.253877,1.301555,1.349909,596075600000.0,744773400000.0,1805750000000.0,1556509000000.0,1527995000000.0,1649266000000.0,1725329000000.0,1743725000000.0,1655685000000.0,2007472000000.0,2161483000000.0,2142471000000.0,4.780477,2.71944,1.906636,1.125241,1.42876,1.596884,2.268226,1.949269,0.7169996,3.395193,6.802801,3.879002,23529520000.0,32427270000.0,74699960000.0,79753520000.0,82718110000.0,86677710000.0,83925600000.0,85297110000.0,90428140000.0,106615100000.0,106952400000.0,117550900000.0,0
Denmark,AAA,4.476,6.925,6.278,5.989,5.833,5.131,5.018,5.637,5.043,4.434,5.142,0.9923083,1.3788,8.886178,7.953938,7.078509,7.360245,6.244573,7.42527,7.23113,8.659145,11.56562,9.840846,6.188558,8.083144,5.612467,6.727907,6.731718,6.602893,6.314619,6.669447,6.542152,6.287113,7.076152,6.889703,138217700000.0,164043800000.0,352832600000.0,301758900000.0,312181800000.0,331610600000.0,355293400000.0,345401500000.0,355631000000.0,408378200000.0,401945600000.0,407091900000.0,2.641603,2.903282,0.5640205,0.4520342,0.25,1.147132,0.8136095,0.7581316,0.420712,1.853045,7.696567,3.305178,11225840000.0,15695950000.0,75391890000.0,65185090000.0,64215810000.0,75244450000.0,70942080000.0,66835550000.0,72823350000.0,82235840000.0,96072550000.0,109370800000.0,0
Germany,AAA,7.917,4.981,4.624,4.122,3.746,3.384,3.136,3.856,3.638,3.135,3.045,2.612583,-1.513131,7.19249,8.100335,8.899854,8.083586,8.444744,7.880059,6.349851,6.931968,4.437915,5.936536,1.615733,148180200000.0,296360400000.0,444540500000.0,592720700000.0,740900900000.0,889081100000.0,1037261000000.0,1185441000000.0,1333622000000.0,1481802000000.0,1629982000000.0,1778162000000.0,1966981000000.0,3965801000000.0,3423568000000.0,3537784000000.0,3763092000000.0,4052008000000.0,3957208000000.0,3940143000000.0,4348297000000.0,4163596000000.0,4525704000000.0,2.696468,1.440268,0.906794,0.5144261,0.491747,1.509495,1.732169,1.44566,0.1448779,3.066667,6.872574,5.946437,104547300000.0,87496870000.0,193484800000.0,173730900000.0,184031300000.0,199983100000.0,198027100000.0,224028000000.0,268408600000.0,295736200000.0,293913700000.0,322700100000.0,0
Liechtenstein,AAA,9.640537,9.136085,9.052059,8.903803,8.664479,8.383856,8.244777,9.305229,9.015766,8.23076,7.967091,15166520.0,30178280.0,43069980.0,57426630.0,71436510.0,85311680.0,99530300.0,113405900.0,127757700.0,142109500.0,156486300.0,170866400.0,184078600.0,2451328000.0,4784157000.0,7121938000.0,9459720000.0,11798670000.0,14171070000.0,16543470000.0,18915860000.0,21296170000.0,23686200000.0,26380140000.0,1421509000.0,2483890000.0,6657527000.0,6268515000.0,6237302000.0,6474309000.0,6692621000.0,6436467000.0,6405870000.0,7710380000.0,7361504000.0,7088856000.0,6816208000.0,6543559000.0,6270911000.0,5998263000.0,5725615000.0,5452966000.0,5180318000.0,4907670000.0,4635021000.0,4362373000.0,4089725000.0,3817076000.0,3544428000.0,3271780000.0,2999131000.0,2726483000.0,2453835000.0,2181187000.0,1908538000.0,1635890000.0,1363242000.0,1090593000.0,817944900.0,545296600.0,0


## Train Test Split 

In [82]:
X = data.drop(['target', 'S&P Rating'], axis = 1)
y = data[['target']]

In [83]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.4,random_state=seed)

## Intial Modeling and Results 

In [84]:
# 1. Logistic Regression
logistic = LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=seed)
logistic.fit(X_train, y_train)
logistic_test_yhat = logistic.predict(X_test)
logistic_train_yhat = logistic.predict(X_train)


# 2. Ridge Classifier
ridge_classifier = RidgeClassifier(alpha = 0.5, random_state=seed)
ridge_classifier.fit(X_train, y_train)
ridge_classifier_test_yhat = ridge_classifier.predict(X_test)
ridge_classifier_train_yhat = ridge_classifier.predict(X_train)

# 3. Gaussian Naive Bayes
naive_bayes = GaussianNB()
naive_bayes.fit(X_train, y_train)
naive_bayes_test_yhat = naive_bayes.predict(X_test)
naive_bayes_train_yhat = naive_bayes.predict(X_train)

# 4. Support Vector Classifier
svc = SVC(probability = True, random_state=seed)
svc.fit(X_train, y_train)
svc_test_yhat = svc.predict(X_test)
svc_train_yhat = svc.predict(X_train)

In [85]:
# Binarize the output for multi-class AUC
y_train_binarized = label_binarize(y_train, classes=[0, 1, 2])  # Adjust class labels as needed
y_test_binarized = label_binarize(y_test, classes=[0, 1, 2])

print(cl('AUC SCORE (Multi-Class):', attrs=['bold']))
print('-------------------------------------------------------------------------------')

# 1. Logistic Regression
logistic.fit(X_train, y_train)
logistic_train_auc = roc_auc_score(y_train_binarized, logistic.predict_proba(X_train), multi_class='ovr')
logistic_test_auc = roc_auc_score(y_test_binarized, logistic.predict_proba(X_test), multi_class='ovr')
print(cl('Logistic model:', attrs=['bold']))
print(cl('Train - AUC score is {:.4f}'.format(logistic_train_auc), attrs=['bold']))
print(cl('Test - AUC score is {:.4f}'.format(logistic_test_auc), attrs=['bold']))

print('-------------------------------------------------------------------------------')

# 2. Ridge Classifier
ridge_classifier.fit(X_train, y_train)
ridge_train_scores = ridge_classifier.decision_function(X_train)  # Multiclass decision scores
ridge_test_scores = ridge_classifier.decision_function(X_test)
ridge_classifier_train_auc = roc_auc_score(y_train_binarized, ridge_train_scores, multi_class='ovr')
ridge_classifier_test_auc = roc_auc_score(y_test_binarized, ridge_test_scores, multi_class='ovr')
print(cl('Ridge model:', attrs=['bold']))
print(cl('Train - AUC score is {:.4f}'.format(ridge_classifier_train_auc), attrs=['bold']))
print(cl('Test - AUC score is {:.4f}'.format(ridge_classifier_test_auc), attrs=['bold']))

print('-------------------------------------------------------------------------------')

# 3. Gaussian Naive Bayes
naive_bayes.fit(X_train, y_train)
naive_bayes_train_auc = roc_auc_score(y_train_binarized, naive_bayes.predict_proba(X_train), multi_class='ovr')
naive_bayes_test_auc = roc_auc_score(y_test_binarized, naive_bayes.predict_proba(X_test), multi_class='ovr')
print(cl('Naive Bayes model:', attrs=['bold']))
print(cl('Train - AUC score is {:.4f}'.format(naive_bayes_train_auc), attrs=['bold']))
print(cl('Test - AUC score is {:.4f}'.format(naive_bayes_test_auc), attrs=['bold']))

print('-------------------------------------------------------------------------------')

# 4. Support Vector Classifier
svc.fit(X_train, y_train)
svc_train_auc = roc_auc_score(y_train_binarized, svc.decision_function(X_train), multi_class='ovr')  # Use decision_function
svc_test_auc = roc_auc_score(y_test_binarized, svc.decision_function(X_test), multi_class='ovr')
print(cl('SVC model:', attrs=['bold']))
print(cl('Train - AUC score is {:.4f}'.format(svc_train_auc), attrs=['bold']))
print(cl('Test - AUC score is {:.4f}'.format(svc_test_auc), attrs=['bold']))



[1mAUC SCORE (Multi-Class):[0m
-------------------------------------------------------------------------------
[1mLogistic model:[0m
[1mTrain - AUC score is 0.9148[0m
[1mTest - AUC score is 0.8102[0m
-------------------------------------------------------------------------------
[1mRidge model:[0m
[1mTrain - AUC score is 1.0000[0m
[1mTest - AUC score is 0.4185[0m
-------------------------------------------------------------------------------
[1mNaive Bayes model:[0m
[1mTrain - AUC score is 0.6967[0m
[1mTest - AUC score is 0.7161[0m
-------------------------------------------------------------------------------
[1mSVC model:[0m
[1mTrain - AUC score is 0.7028[0m
[1mTest - AUC score is 0.5403[0m


In [86]:
print(cl('Logistic Model: Other Metrics:', attrs=['bold']))
# 1. Logistic Regression
logistic.fit(X_train, y_train)
print('     ')
print(cl('Classification Report (Test):'))
print(classification_report(y_test, logistic_test_yhat))

[1mLogistic Model: Other Metrics:[0m
     
Classification Report (Test):[0m
              precision    recall  f1-score   support

           0       0.70      0.61      0.65        23
           1       0.75      0.67      0.71        27
           2       0.14      1.00      0.25         1

    accuracy                           0.65        51
   macro avg       0.53      0.76      0.54        51
weighted avg       0.72      0.65      0.67        51



In [87]:
print(cl('Ridge Classifier: Other Metrics:', attrs=['bold']))
# 2. Ridge Classifier
ridge_classifier.fit(X_train, y_train)
print('     ')
print(cl('Classification Report (Test):'))
print(classification_report(y_test, ridge_classifier_test_yhat))

[1mRidge Classifier: Other Metrics:[0m
     
Classification Report (Test):[0m
              precision    recall  f1-score   support

           0       0.62      0.35      0.44        23
           1       0.63      0.44      0.52        27
           2       0.00      0.00      0.00         1

    accuracy                           0.39        51
   macro avg       0.42      0.26      0.32        51
weighted avg       0.61      0.39      0.48        51



In [88]:
print(cl('Gaussian Naive Bayes: Other Metrics:', attrs=['bold']))
# 3. Gaussian Naive Bayes
naive_bayes.fit(X_train, y_train)
print('     ')
print(cl('Classification Report (Test):'))
print(classification_report(y_test, naive_bayes_test_yhat))

[1mGaussian Naive Bayes: Other Metrics:[0m
     
Classification Report (Test):[0m
              precision    recall  f1-score   support

           0       0.83      0.22      0.34        23
           1       0.57      0.85      0.69        27
           2       0.20      1.00      0.33         1

    accuracy                           0.57        51
   macro avg       0.54      0.69      0.45        51
weighted avg       0.68      0.57      0.53        51



In [89]:
print(cl('Support Vector Classifier: Other Metrics:', attrs=['bold']))
# 4. Support Vector Classifier
svc.fit(X_train, y_train)
print('     ')
print(cl('Classification Report (Test):'))
print(classification_report(y_test,svc_test_yhat))

[1mSupport Vector Classifier: Other Metrics:[0m
     
Classification Report (Test):[0m
              precision    recall  f1-score   support

           0       0.80      0.17      0.29        23
           1       0.57      0.96      0.71        27
           2       0.00      0.00      0.00         1

    accuracy                           0.59        51
   macro avg       0.46      0.38      0.33        51
weighted avg       0.66      0.59      0.51        51



In [90]:
# AUC scores from all models
model_names = ['Logistic Regression', 'Ridge Classifier', 'Naive Bayes', 'SVC']
train_auc_scores = [logistic_train_auc, ridge_classifier_train_auc, naive_bayes_train_auc, svc_train_auc]
test_auc_scores = [logistic_test_auc, ridge_classifier_test_auc, naive_bayes_test_auc, svc_test_auc]

# Create a DataFrame for visualization
auc_data = pd.DataFrame({
    'Model': model_names * 2,
    'Dataset': ['Train'] * len(model_names) + ['Test'] * len(model_names),
    'AUC Score': train_auc_scores + test_auc_scores})

# Create the bar plot with the plotly_dark template
fig = px.bar(
    auc_data, 
    x='Model', 
    y='AUC Score', 
    color='Dataset', 
    barmode='group',
    title='Comparison of AUC Scores Across Models',
    labels={'AUC Score': 'AUC Score', 'Model': 'Classification Model'},
    template='plotly_dark'
)

# Customize layout
fig.update_layout(
    title_font_size=20,
    xaxis_title='Model',
    yaxis_title='AUC Score',
    legend_title='Dataset',
)

# Display the plot
fig.show()


# Improved Modeling

## Feature Engineering 

Creation of relevant ratios recarding country ratings. 

In [91]:
df = data.copy()

In [92]:
df.head()

Unnamed: 0_level_0,S&P Rating,unemployment_2000,unemployment_2014,unemployment_2015,unemployment_2016,unemployment_2017,unemployment_2018,unemployment_2019,unemployment_2020,unemployment_2021,unemployment_2022,unemployment_2023,current_account_balance_1990,current_account_balance_2000,current_account_balance_2014,current_account_balance_2015,current_account_balance_2016,current_account_balance_2017,current_account_balance_2018,current_account_balance_2019,current_account_balance_2020,current_account_balance_2021,current_account_balance_2022,current_account_balance_2023,exchange_rate_usd_1990,exchange_rate_usd_2000,exchange_rate_usd_2014,exchange_rate_usd_2015,exchange_rate_usd_2016,exchange_rate_usd_2017,exchange_rate_usd_2018,exchange_rate_usd_2019,exchange_rate_usd_2020,exchange_rate_usd_2021,exchange_rate_usd_2022,exchange_rate_usd_2023,gdp_1990,gdp_2000,gdp_2014,gdp_2015,gdp_2016,gdp_2017,gdp_2018,gdp_2019,gdp_2020,gdp_2021,gdp_2022,gdp_2023,inflation_1990,inflation_2000,inflation_2014,inflation_2015,inflation_2016,inflation_2017,inflation_2018,inflation_2019,inflation_2020,inflation_2021,inflation_2022,inflation_2023,total_reserves_1990,total_reserves_2000,total_reserves_2014,total_reserves_2015,total_reserves_2016,total_reserves_2017,total_reserves_2018,total_reserves_2019,total_reserves_2020,total_reserves_2021,total_reserves_2022,total_reserves_2023,target
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1
Australia,AAA,6.288,6.078,6.055,5.711,5.592,5.3,5.159,6.456,5.116,3.701,3.667,-4.987491,-3.721401,-3.054734,-4.229028,-3.44049,-2.722911,-2.233402,0.3501218,2.330931,3.137209,0.8453239,0.2547373,1.281057,1.724827,1.109363,1.33109,1.345214,1.304758,1.338412,1.438507,1.453085,1.331224,1.441664,1.505191,311426700000.0,416167800000.0,1468265000000.0,1351296000000.0,1206837000000.0,1325583000000.0,1427809000000.0,1392724000000.0,1328414000000.0,1556736000000.0,1690858000000.0,1728057000000.0,7.333022,4.457435,2.487923,1.508367,1.276991,1.948647,1.911401,1.610768,0.8469055,2.86391,6.594097,5.597015,19318740000.0,18821550000.0,53910330000.0,45406040000.0,52480790000.0,65653720000.0,53909530000.0,57994690000.0,42544630000.0,57877500000.0,56701900000.0,61703350000.0,0
Canada,AAA,6.829,7.023,6.945,7.038,6.426,5.837,5.69,9.657,7.527,5.28,5.366,-3.398803,2.483228,-2.318921,-3.513987,-3.093111,-2.803427,-2.385072,-1.951703,-2.010494,0.01277747,-0.352632,-0.729574,1.166774,1.485394,1.104747,1.278786,1.325615,1.297936,1.295818,1.326793,1.341153,1.253877,1.301555,1.349909,596075600000.0,744773400000.0,1805750000000.0,1556509000000.0,1527995000000.0,1649266000000.0,1725329000000.0,1743725000000.0,1655685000000.0,2007472000000.0,2161483000000.0,2142471000000.0,4.780477,2.71944,1.906636,1.125241,1.42876,1.596884,2.268226,1.949269,0.7169996,3.395193,6.802801,3.879002,23529520000.0,32427270000.0,74699960000.0,79753520000.0,82718110000.0,86677710000.0,83925600000.0,85297110000.0,90428140000.0,106615100000.0,106952400000.0,117550900000.0,0
Denmark,AAA,4.476,6.925,6.278,5.989,5.833,5.131,5.018,5.637,5.043,4.434,5.142,0.9923083,1.3788,8.886178,7.953938,7.078509,7.360245,6.244573,7.42527,7.23113,8.659145,11.56562,9.840846,6.188558,8.083144,5.612467,6.727907,6.731718,6.602893,6.314619,6.669447,6.542152,6.287113,7.076152,6.889703,138217700000.0,164043800000.0,352832600000.0,301758900000.0,312181800000.0,331610600000.0,355293400000.0,345401500000.0,355631000000.0,408378200000.0,401945600000.0,407091900000.0,2.641603,2.903282,0.5640205,0.4520342,0.25,1.147132,0.8136095,0.7581316,0.420712,1.853045,7.696567,3.305178,11225840000.0,15695950000.0,75391890000.0,65185090000.0,64215810000.0,75244450000.0,70942080000.0,66835550000.0,72823350000.0,82235840000.0,96072550000.0,109370800000.0,0
Germany,AAA,7.917,4.981,4.624,4.122,3.746,3.384,3.136,3.856,3.638,3.135,3.045,2.612583,-1.513131,7.19249,8.100335,8.899854,8.083586,8.444744,7.880059,6.349851,6.931968,4.437915,5.936536,1.615733,148180200000.0,296360400000.0,444540500000.0,592720700000.0,740900900000.0,889081100000.0,1037261000000.0,1185441000000.0,1333622000000.0,1481802000000.0,1629982000000.0,1778162000000.0,1966981000000.0,3965801000000.0,3423568000000.0,3537784000000.0,3763092000000.0,4052008000000.0,3957208000000.0,3940143000000.0,4348297000000.0,4163596000000.0,4525704000000.0,2.696468,1.440268,0.906794,0.5144261,0.491747,1.509495,1.732169,1.44566,0.1448779,3.066667,6.872574,5.946437,104547300000.0,87496870000.0,193484800000.0,173730900000.0,184031300000.0,199983100000.0,198027100000.0,224028000000.0,268408600000.0,295736200000.0,293913700000.0,322700100000.0,0
Liechtenstein,AAA,9.640537,9.136085,9.052059,8.903803,8.664479,8.383856,8.244777,9.305229,9.015766,8.23076,7.967091,15166520.0,30178280.0,43069980.0,57426630.0,71436510.0,85311680.0,99530300.0,113405900.0,127757700.0,142109500.0,156486300.0,170866400.0,184078600.0,2451328000.0,4784157000.0,7121938000.0,9459720000.0,11798670000.0,14171070000.0,16543470000.0,18915860000.0,21296170000.0,23686200000.0,26380140000.0,1421509000.0,2483890000.0,6657527000.0,6268515000.0,6237302000.0,6474309000.0,6692621000.0,6436467000.0,6405870000.0,7710380000.0,7361504000.0,7088856000.0,6816208000.0,6543559000.0,6270911000.0,5998263000.0,5725615000.0,5452966000.0,5180318000.0,4907670000.0,4635021000.0,4362373000.0,4089725000.0,3817076000.0,3544428000.0,3271780000.0,2999131000.0,2726483000.0,2453835000.0,2181187000.0,1908538000.0,1635890000.0,1363242000.0,1090593000.0,817944900.0,545296600.0,0


In [93]:
df.columns

Index(['S&P Rating', 'unemployment_2000', 'unemployment_2014',
       'unemployment_2015', 'unemployment_2016', 'unemployment_2017',
       'unemployment_2018', 'unemployment_2019', 'unemployment_2020',
       'unemployment_2021', 'unemployment_2022', 'unemployment_2023',
       'current_account_balance_1990', 'current_account_balance_2000',
       'current_account_balance_2014', 'current_account_balance_2015',
       'current_account_balance_2016', 'current_account_balance_2017',
       'current_account_balance_2018', 'current_account_balance_2019',
       'current_account_balance_2020', 'current_account_balance_2021',
       'current_account_balance_2022', 'current_account_balance_2023',
       'exchange_rate_usd_1990', 'exchange_rate_usd_2000',
       'exchange_rate_usd_2014', 'exchange_rate_usd_2015',
       'exchange_rate_usd_2016', 'exchange_rate_usd_2017',
       'exchange_rate_usd_2018', 'exchange_rate_usd_2019',
       'exchange_rate_usd_2020', 'exchange_rate_usd_2021',
      

#### Current Account Balance to GDP Ratio

In [94]:
# years for which we have data
years = [1990, 2000, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]

for year in years:
    cab_col = f'current_account_balance_{year}'
    gdp_col = f'gdp_{year}'
    ratio_col = f'cab_to_gdp_{year}'
    
    # Ensure the columns exist before creating the ratio
    if cab_col in df.columns and gdp_col in df.columns:
        df[ratio_col] = df[cab_col] / df[gdp_col]

#### Total reserves to GDP Ratio

In [95]:
for year in years:
    reserves_col = f'total_reserves_{year}'
    gdp_col = f'gdp_{year}'
    ratio_col = f'reserves_to_gdp_{year}'
    
    if reserves_col in df.columns and gdp_col in df.columns:
        df[ratio_col] = df[reserves_col] / df[gdp_col]

#### GDP Growth Rate 

In [96]:
#only the years for which we can calculate a contiguous change
growth_years = [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]

for i in range(1, len(growth_years)):
    previous = growth_years[i - 1]
    current = growth_years[i]
    gdp_prev = f'gdp_{previous}'
    gdp_curr = f'gdp_{current}'
    growth_col = f'gdp_growth_{previous}_{current}'
    
    if gdp_prev in df.columns and gdp_curr in df.columns:
        df[growth_col] = (df[gdp_curr] - df[gdp_prev]) / df[gdp_prev]

### Inflation to Unemployment Ratio

In [97]:
for year in years:
    inflation_col = f'inflation_{year}'
    unemployment_col = f'unemployment_{year}'
    ratio_col = f'inflation_to_unemployment_{year}'
    
    if inflation_col in df.columns and unemployment_col in df.columns:
        df[ratio_col] = df[inflation_col] / df[unemployment_col]

In [98]:
df

Unnamed: 0_level_0,S&P Rating,unemployment_2000,unemployment_2014,unemployment_2015,unemployment_2016,unemployment_2017,unemployment_2018,unemployment_2019,unemployment_2020,unemployment_2021,unemployment_2022,unemployment_2023,current_account_balance_1990,current_account_balance_2000,current_account_balance_2014,current_account_balance_2015,current_account_balance_2016,current_account_balance_2017,current_account_balance_2018,current_account_balance_2019,current_account_balance_2020,current_account_balance_2021,current_account_balance_2022,current_account_balance_2023,exchange_rate_usd_1990,exchange_rate_usd_2000,exchange_rate_usd_2014,exchange_rate_usd_2015,exchange_rate_usd_2016,exchange_rate_usd_2017,exchange_rate_usd_2018,exchange_rate_usd_2019,exchange_rate_usd_2020,exchange_rate_usd_2021,exchange_rate_usd_2022,exchange_rate_usd_2023,gdp_1990,gdp_2000,gdp_2014,gdp_2015,gdp_2016,gdp_2017,gdp_2018,gdp_2019,gdp_2020,gdp_2021,gdp_2022,gdp_2023,inflation_1990,inflation_2000,inflation_2014,inflation_2015,inflation_2016,inflation_2017,inflation_2018,inflation_2019,inflation_2020,inflation_2021,inflation_2022,inflation_2023,total_reserves_1990,total_reserves_2000,total_reserves_2014,total_reserves_2015,total_reserves_2016,total_reserves_2017,total_reserves_2018,total_reserves_2019,total_reserves_2020,total_reserves_2021,total_reserves_2022,total_reserves_2023,target,cab_to_gdp_1990,cab_to_gdp_2000,cab_to_gdp_2014,cab_to_gdp_2015,cab_to_gdp_2016,cab_to_gdp_2017,cab_to_gdp_2018,cab_to_gdp_2019,cab_to_gdp_2020,cab_to_gdp_2021,cab_to_gdp_2022,cab_to_gdp_2023,reserves_to_gdp_1990,reserves_to_gdp_2000,reserves_to_gdp_2014,reserves_to_gdp_2015,reserves_to_gdp_2016,reserves_to_gdp_2017,reserves_to_gdp_2018,reserves_to_gdp_2019,reserves_to_gdp_2020,reserves_to_gdp_2021,reserves_to_gdp_2022,reserves_to_gdp_2023,gdp_growth_2014_2015,gdp_growth_2015_2016,gdp_growth_2016_2017,gdp_growth_2017_2018,gdp_growth_2018_2019,gdp_growth_2019_2020,gdp_growth_2020_2021,gdp_growth_2021_2022,gdp_growth_2022_2023,inflation_to_unemployment_2000,inflation_to_unemployment_2014,inflation_to_unemployment_2015,inflation_to_unemployment_2016,inflation_to_unemployment_2017,inflation_to_unemployment_2018,inflation_to_unemployment_2019,inflation_to_unemployment_2020,inflation_to_unemployment_2021,inflation_to_unemployment_2022,inflation_to_unemployment_2023
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1
Australia,AAA,6.288000,6.078000,6.055000,5.711000,5.592000,5.300000,5.159000,6.456000,5.116000,3.701000,3.667000,-4.987491e+00,-3.721401e+00,-3.054734e+00,-4.229028e+00,-3.440490e+00,-2.722911e+00,-2.233402e+00,3.501218e-01,2.330931e+00,3.137209e+00,8.453239e-01,2.547373e-01,1.281057e+00,1.724827e+00,1.109363e+00,1.331090e+00,1.345214e+00,1.304758e+00,1.338412e+00,1.438507e+00,1.453085e+00,1.331224e+00,1.441664e+00,1.505191e+00,3.114267e+11,4.161678e+11,1.468265e+12,1.351296e+12,1.206837e+12,1.325583e+12,1.427809e+12,1.392724e+12,1.328414e+12,1.556736e+12,1.690858e+12,1.728057e+12,7.333022e+00,4.457435e+00,2.487923e+00,1.508367e+00,1.276991e+00,1.948647e+00,1.911401e+00,1.610768e+00,8.469055e-01,2.863910e+00,6.594097e+00,5.597015e+00,1.931874e+10,1.882155e+10,5.391033e+10,4.540604e+10,5.248079e+10,6.565372e+10,5.390953e+10,5.799469e+10,4.254463e+10,5.787750e+10,5.670190e+10,6.170335e+10,0,-1.601498e-11,-8.942069e-12,-2.080506e-12,-3.129608e-12,-2.850832e-12,-2.054124e-12,-1.564216e-12,2.513936e-13,1.754672e-12,2.015248e-12,4.999378e-13,1.474125e-13,0.062033,0.045226,0.036717,0.033602,0.043486,0.049528,0.037757,0.041641,0.032027,0.037179,0.033534,0.035707,-0.079665,-0.106904,0.098394,0.077118,-0.024573,-0.046176,0.171875,0.086156,0.022000,7.088796e-01,4.093325e-01,2.491109e-01,2.236020e-01,3.484706e-01,3.606417e-01,3.122248e-01,1.311812e-01,5.597948e-01,1.781707e+00,1.526320e+00
Canada,AAA,6.829000,7.023000,6.945000,7.038000,6.426000,5.837000,5.690000,9.657000,7.527000,5.280000,5.366000,-3.398803e+00,2.483228e+00,-2.318921e+00,-3.513987e+00,-3.093111e+00,-2.803427e+00,-2.385072e+00,-1.951703e+00,-2.010494e+00,1.277747e-02,-3.526320e-01,-7.295740e-01,1.166774e+00,1.485394e+00,1.104747e+00,1.278786e+00,1.325615e+00,1.297936e+00,1.295818e+00,1.326793e+00,1.341153e+00,1.253877e+00,1.301555e+00,1.349909e+00,5.960756e+11,7.447734e+11,1.805750e+12,1.556509e+12,1.527995e+12,1.649266e+12,1.725329e+12,1.743725e+12,1.655685e+12,2.007472e+12,2.161483e+12,2.142471e+12,4.780477e+00,2.719440e+00,1.906636e+00,1.125241e+00,1.428760e+00,1.596884e+00,2.268226e+00,1.949269e+00,7.169996e-01,3.395193e+00,6.802801e+00,3.879002e+00,2.352952e+10,3.242727e+10,7.469996e+10,7.975352e+10,8.271811e+10,8.667771e+10,8.392560e+10,8.529711e+10,9.042814e+10,1.066151e+11,1.069524e+11,1.175509e+11,0,-5.701967e-12,3.334206e-12,-1.284187e-12,-2.257608e-12,-2.024295e-12,-1.699803e-12,-1.382386e-12,-1.119272e-12,-1.214297e-12,6.364954e-15,-1.631435e-13,-3.405293e-13,0.039474,0.043540,0.041368,0.051239,0.054135,0.052555,0.048643,0.048917,0.054617,0.053109,0.049481,0.054867,-0.138026,-0.018319,0.079366,0.046120,0.010662,-0.050490,0.212472,0.076719,-0.008796,3.982194e-01,2.714845e-01,1.620218e-01,2.030065e-01,2.485036e-01,3.885944e-01,3.425780e-01,7.424662e-02,4.510686e-01,1.288409e+00,7.228851e-01
Denmark,AAA,4.476000,6.925000,6.278000,5.989000,5.833000,5.131000,5.018000,5.637000,5.043000,4.434000,5.142000,9.923083e-01,1.378800e+00,8.886178e+00,7.953938e+00,7.078509e+00,7.360245e+00,6.244573e+00,7.425270e+00,7.231130e+00,8.659145e+00,1.156562e+01,9.840846e+00,6.188558e+00,8.083144e+00,5.612467e+00,6.727907e+00,6.731718e+00,6.602893e+00,6.314619e+00,6.669447e+00,6.542152e+00,6.287113e+00,7.076152e+00,6.889703e+00,1.382177e+11,1.640438e+11,3.528326e+11,3.017589e+11,3.121818e+11,3.316106e+11,3.552934e+11,3.454015e+11,3.556310e+11,4.083782e+11,4.019456e+11,4.070919e+11,2.641603e+00,2.903282e+00,5.640205e-01,4.520342e-01,2.500000e-01,1.147132e+00,8.136095e-01,7.581316e-01,4.207120e-01,1.853045e+00,7.696567e+00,3.305178e+00,1.122584e+10,1.569595e+10,7.539189e+10,6.518509e+10,6.421581e+10,7.524445e+10,7.094208e+10,6.683555e+10,7.282335e+10,8.223584e+10,9.607255e+10,1.093708e+11,0,7.179312e-12,8.405071e-12,2.518525e-11,2.635858e-11,2.267431e-11,2.219545e-11,1.757582e-11,2.149750e-11,2.033324e-11,2.120374e-11,2.877410e-11,2.417352e-11,0.081219,0.095681,0.213676,0.216017,0.205700,0.226906,0.199672,0.193501,0.204772,0.201372,0.239019,0.268664,-0.144753,0.034541,0.062235,0.071417,-0.027842,0.029616,0.148320,-0.015752,0.012804,6.486332e-01,8.144701e-02,7.200289e-02,4.174320e-02,1.966625e-01,1.585674e-01,1.510824e-01,7.463402e-02,3.674490e-01,1.735807e+00,6.427807e-01
Germany,AAA,7.917000,4.981000,4.624000,4.122000,3.746000,3.384000,3.136000,3.856000,3.638000,3.135000,3.045000,2.612583e+00,-1.513131e+00,7.192490e+00,8.100335e+00,8.899854e+00,8.083586e+00,8.444744e+00,7.880059e+00,6.349851e+00,6.931968e+00,4.437915e+00,5.936536e+00,1.615733e+00,1.481802e+11,2.963604e+11,4.445405e+11,5.927207e+11,7.409009e+11,8.890811e+11,1.037261e+12,1.185441e+12,1.333622e+12,1.481802e+12,1.629982e+12,1.778162e+12,1.966981e+12,3.965801e+12,3.423568e+12,3.537784e+12,3.763092e+12,4.052008e+12,3.957208e+12,3.940143e+12,4.348297e+12,4.163596e+12,4.525704e+12,2.696468e+00,1.440268e+00,9.067940e-01,5.144261e-01,4.917470e-01,1.509495e+00,1.732169e+00,1.445660e+00,1.448779e-01,3.066667e+00,6.872574e+00,5.946437e+00,1.045473e+11,8.749687e+10,1.934848e+11,1.737309e+11,1.840313e+11,1.999831e+11,1.980271e+11,2.240280e+11,2.684086e+11,2.957362e+11,2.939137e+11,3.227001e+11,0,1.469260e-12,-7.692658e-13,1.813629e-12,2.366050e-12,2.515658e-12,2.148123e-12,2.084088e-12,1.991318e-12,1.611579e-12,1.594180e-12,1.065885e-12,1.311738e-12,0.058795,0.044483,0.048788,0.050746,0.052019,0.053143,0.048871,0.056613,0.068122,0.068012,0.070591,0.071304,-0.136727,0.033362,0.063686,0.076776,-0.023396,-0.004313,0.103589,-0.042477,0.086970,1.819210e-01,1.820506e-01,1.112513e-01,1.192982e-01,4.029618e-01,5.118702e-01,4.609884e-01,3.757208e-02,8.429540e-01,2.192209e+00,1.952853e+00
Liechtenstein,AAA,9.640537,9.136085,9.052059,8.903803,8.664479,8.383856,8.244777,9.305229,9.015766,8.230760,7.967091,1.516652e+07,3.017828e+07,4.306998e+07,5.742663e+07,7.143651e+07,8.531168e+07,9.953030e+07,1.134059e+08,1.277577e+08,1.421095e+08,1.564863e+08,1.708664e+08,1.840786e+08,2.451328e+09,4.784157e+09,7.121938e+09,9.459720e+09,1.179867e+10,1.417107e+10,1.654347e+10,1.891586e+10,2.129617e+10,2.368620e+10,2.638014e+10,1.421509e+09,2.483890e+09,6.657527e+09,6.268515e+09,6.237302e+09,6.474309e+09,6.692621e+09,6.436467e+09,6.405870e+09,7.710380e+09,7.361504e+09,7.088856e+09,6.816208e+09,6.543559e+09,6.270911e+09,5.998263e+09,5.725615e+09,5.452966e+09,5.180318e+09,4.907670e+09,4.635021e+09,4.362373e+09,4.089725e+09,3.817076e+09,3.544428e+09,3.271780e+09,2.999131e+09,2.726483e+09,2.453835e+09,2.181187e+09,1.908538e+09,1.635890e+09,1.363242e+09,1.090593e+09,8.179449e+08,5.452966e+08,0,1.066931e-02,1.214961e-02,6.469366e-03,9.161122e-03,1.145311e-02,1.317696e-02,1.487165e-02,1.761927e-02,1.994384e-02,1.843093e-02,2.125738e-02,2.410352e-02,2.493426,1.317200,0.450487,0.434949,0.393413,0.336899,0.285171,0.254160,0.212811,0.141445,0.111111,0.076923,-0.058432,-0.004979,0.037998,0.033720,-0.038274,-0.004754,0.203643,-0.045248,-0.037037,6.787547e+08,6.863893e+08,6.626408e+08,6.430527e+08,6.293473e+08,6.178920e+08,5.952459e+08,4.981093e+08,4.838605e+08,4.968830e+08,4.791054e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Sri Lanka,SD,7.740000,4.157000,4.519000,4.243000,4.046000,4.318000,4.670000,5.365000,5.258000,6.330000,6.360000,-3.713648e+00,-6.390297e+00,-2.408481e+00,-2.211361e+00,-1.979723e+00,-2.446586e+00,-2.962153e+00,-2.070888e+00,-1.408137e+00,-3.706463e+00,-1.953545e+00,1.847993e+00,4.006292e+01,7.700512e+01,1.305647e+02,1.358569e+02,1.455817e+02,1.524464e+02,1.624649e+02,1.787449e+02,1.855926e+02,1.987643e+02,3.226327e+02,3.275065e+02,8.032551e+09,1.633081e+10,8.252854e+10,8.514096e+10,8.801228e+10,9.437624e+10,9.449387e+10,8.901498e+10,8.430430e+10,8.860932e+10,7.414487e+10,8.435686e+10,2.149525e+01,6.176276e+00,3.179002e+00,3.768368e+00,3.958888e+00,7.704138e+00,2.135038e+00,3.528394e+00,6.153945e+00,7.014781e+00,4.972110e+01,1.654117e+01,4.470305e+08,1.131355e+09,8.210750e+09,7.302097e+09,6.008199e+09,7.959048e+09,6.920826e+09,7.648305e+09,5.663994e+09,3.136992e+09,2.352744e+09,1.568496e+09,2,-4.623249e-10,-3.913030e-10,-2.918362e-11,-2.597294e-11,-2.249372e-11,-2.592375e-11,-3.134757e-11,-2.326449e-11,-1.670303e-11,-4.182927e-11,-2.634768e-11,2.190685e-11,0.055652,0.069277,0.099490,0.085765,0.068265,0.084333,0.073241,0.085922,0.067185,0.035402,0.031732,0.018594,0.031655,0.033724,0.072308,0.001246,-0.057981,-0.052920,0.051065,-0.163239,0.137730,7.979685e-01,7.647347e-01,8.338942e-01,9.330399e-01,1.904137e+00,4.944506e-01,7.555447e-01,1.147054e+00,1.334116e+00,7.854834e+00,2.600814e+00
Ukraine,SD,11.707000,9.270000,9.140000,9.350000,9.500000,8.799000,8.194000,9.475000,9.834000,8.411791,6.989581,5.567372e+00,4.145163e+00,-3.442597e+00,5.531085e+00,-1.998803e+00,-3.098389e+00,-4.914009e+00,-2.679957e+00,3.362966e+00,-1.943275e+00,4.923775e+00,-5.401186e+00,1.952346e-02,5.440233e+00,1.188666e+01,2.184470e+01,2.555133e+01,2.659661e+01,2.720049e+01,2.584559e+01,2.695752e+01,2.728619e+01,3.234230e+01,3.657381e+01,8.139356e+10,3.237508e+10,1.335039e+11,9.103097e+10,9.335587e+10,1.120905e+11,1.308911e+11,1.538830e+11,1.566177e+11,1.997659e+11,1.619895e+11,1.787570e+11,8.937851e+10,2.820310e+01,1.207186e+01,4.869986e+01,1.391271e+01,1.443832e+01,1.095186e+01,7.886717e+00,2.732492e+00,9.363139e+00,2.018364e+01,1.284902e+01,7.386012e+08,1.477202e+09,7.538805e+09,1.330088e+10,1.553726e+10,1.881093e+10,2.081790e+10,2.531700e+10,2.913754e+10,3.096667e+10,2.850593e+10,4.051011e+10,2,6.840065e-11,1.280356e-10,-2.578649e-11,6.076047e-11,-2.141058e-11,-2.764185e-11,-3.754273e-11,-1.741555e-11,2.147245e-11,-9.727763e-12,3.039564e-11,-3.021524e-11,0.009074,0.045628,0.056469,0.146114,0.166430,0.167819,0.159047,0.164521,0.186042,0.155015,0.175974,0.226621,-0.318140,0.025540,0.200680,0.167727,0.175657,0.017771,0.275500,-0.189103,0.103510,2.409080e+00,1.302250e+00,5.328213e+00,1.487990e+00,1.519823e+00,1.244671e+00,9.624991e-01,2.883897e-01,9.521191e-01,2.399446e+00,1.838311e+00
Lebanon,D,8.594000,8.796000,9.270000,9.760000,10.236000,10.741000,11.301000,13.235000,12.621000,11.599000,11.565000,-1.033289e+00,-1.363158e+01,-2.622987e+01,-1.710774e+01,-2.047802e+01,-2.288220e+01,-2.434704e+01,-2.182809e+01,-8.761842e+00,-1.969781e+01,-3.460623e+01,-2.810367e+01,6.950892e+02,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.507500e+03,1.419243e+09,2.838485e+09,1.726036e+10,4.809521e+10,4.992934e+10,5.114731e+10,5.302768e+10,5.490152e+10,5.160596e+10,3.171213e+10,2.313194e+10,2.099242e+10,1.574432e+10,1.049621e+10,5.248105e+09,1.854604e+00,-3.749145e+00,-7.833596e-01,4.321352e+00,6.076989e+00,3.005389e+00,8.486433e+01,1.547561e+02,1.712055e+02,2.213416e+02,4.210389e+09,8.474638e+09,5.066886e+10,4.853139e+10,5.390551e+10,5.541153e+10,5.238061e+10,5.221348e+10,4.244040e+10,3.523922e+10,3.251288e+10,2.167525e+10,2,-3.640281e-10,-7.897618e-10,-5.453737e-10,-3.426390e-10,-4.003734e-10,-4.315143e-10,-4.434675e-10,-4.229762e-10,-2.762931e-10,-8.515417e-10,-1.648510e-09,-1.785004e-09,1.483323,0.490988,1.053512,0.972001,1.053927,1.044955,0.954083,1.011772,1.338302,1.523401,1.548791,1.376703,0.038135,0.024394,0.036764,0.035337,-0.060027,-0.385495,-0.270565,-0.092492,-0.250000,6.106709e+08,2.108463e-01,-4.044385e-01,-8.026226e-02,4.221720e-01,5.657750e-01,2.659401e-01,6.412114e+00,1.226179e+01,1.476037e+01,1.913892e+01
Puerto Rico,D,10.080000,13.900000,12.000000,11.800000,10.800000,9.200000,8.300000,8.890000,7.900000,6.000000,5.962000,1.224157e+09,2.448314e+09,3.672470e+09,4.896627e+09,6.120784e+09,7.344941e+09,8.569097e+09,9.793254e+09,1.101741e+10,1.224157e+10,1.346572e+10,1.468988e+10,1.591404e+10,1.713819e+10,1.836235e+10,1.958651e+10,2.081066e+10,2.203482e+10,2.325898e+10,2.448314e+10,2.570729e+10,2.693145e+10,2.815561e+10,2.937976e+10,3.060392e+10,6.170180e+10,1.020000e+11,1.030000e+11,1.040000e+11,1.030000e+11,1.010000e+11,1.050000e+11,1.030000e+11,1.060000e+11,1.140000e+11,1.180000e+11,1.134615e+11,1.089231e+11,1.043846e+11,9.984615e+10,9.530769e+10,9.076923e+10,8.623077e+10,8.169231e+10,7.715385e+10,7.261538e+10,6.807692e+10,6.353846e+10,5.900000e+10,5.446154e+10,4.992308e+10,4.538462e+10,4.084615e+10,3.630769e+10,3.176923e+10,2.723077e+10,2.269231e+10,1.815385e+10,1.361538e+10,9.076923e+09,2,4.000000e-02,3.967977e-02,3.600461e-02,4.754007e-02,5.885369e-02,7.131010e-02,8.484255e-02,9.326909e-02,1.069652e-01,1.154865e-01,1.181204e-01,1.244905e-01,1.927858,0.882657,0.489442,0.440627,0.392751,0.352502,0.314547,0.259341,0.220314,0.171263,0.119433,0.076923,0.009804,0.009709,-0.009615,-0.019417,0.039604,-0.019048,0.029126,0.075472,0.035088,1.080586e+10,7.509685e+09,8.320513e+09,8.076923e+09,8.404558e+09,9.372910e+09,9.842447e+09,8.678723e+09,9.191821e+09,1.134615e+10,1.065724e+10


## Correlations maps

In [99]:
df = df.drop('S&P Rating', axis = 1)

In [100]:
# Variables to compare
variables_to_compare = [
    'unemployment_2000', 'unemployment_2014', 'unemployment_2015', 'unemployment_2016',
    'unemployment_2017', 'unemployment_2018', 'unemployment_2019', 'unemployment_2020',
    'unemployment_2021', 'unemployment_2022', 'unemployment_2023'
]

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs Unemployment Variables", 
                template='plotly_dark', 
                width=1000, 
                height=600)  

fig.show()


In [None]:
# Variables to compare
variables_to_compare = ['current_account_balance_1990', 'current_account_balance_2000',
       'current_account_balance_2014', 'current_account_balance_2015',
       'current_account_balance_2016', 'current_account_balance_2017',
       'current_account_balance_2018', 'current_account_balance_2019',
       'current_account_balance_2020', 'current_account_balance_2021',
       'current_account_balance_2022', 'current_account_balance_2023'
]

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs Current Account Balance Variables", 
                template='plotly_dark', 
                width=1000, 
                height=600) 

fig.show()


In [102]:
# Variables to compare
variables_to_compare = ['exchange_rate_usd_1990', 'exchange_rate_usd_2000',
       'exchange_rate_usd_2014', 'exchange_rate_usd_2015',
       'exchange_rate_usd_2016', 'exchange_rate_usd_2017',
       'exchange_rate_usd_2018', 'exchange_rate_usd_2019',
       'exchange_rate_usd_2020', 'exchange_rate_usd_2021',
       'exchange_rate_usd_2022', 'exchange_rate_usd_2023',
]

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs Exchange Rate Variables", 
                template='plotly_dark', 
                width=1000,  
                height=600)  

fig.show()


In [None]:
# Variables to compare
variables_to_compare = ['inflation_1990', 'inflation_2000', 'inflation_2014', 'inflation_2015',
       'inflation_2016', 'inflation_2017', 'inflation_2018', 'inflation_2019',
       'inflation_2020', 'inflation_2021', 'inflation_2022', 'inflation_2023'
]

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs Inflation Variables", 
                template='plotly_dark', 
                width=1000,  
                height=600)  

fig.show()


In [None]:
# Variables to compare
variables_to_compare = ['gdp_1990',
       'gdp_2000', 'gdp_2014', 'gdp_2015', 'gdp_2016', 'gdp_2017', 'gdp_2018',
       'gdp_2019', 'gdp_2020', 'gdp_2021', 'gdp_2022', 'gdp_2023'
]

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs GDP Variables", 
                template='plotly_dark', 
                width=1000, 
                height=600) 

fig.show()


In [105]:
# Variables to compare
variables_to_compare = ['total_reserves_1990', 'total_reserves_2000', 'total_reserves_2014',
       'total_reserves_2015', 'total_reserves_2016', 'total_reserves_2017',
       'total_reserves_2018', 'total_reserves_2019', 'total_reserves_2020',
       'total_reserves_2021', 'total_reserves_2022', 'total_reserves_2023'
]

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs Total Reserves Variables", 
                template='plotly_dark', 
                width=1000,  
                height=600)  

fig.show()


In [106]:
# Variables to compare
variables_to_compare = [ 'inflation_2023', 'gdp_1990', 'gdp_2023', 'unemployment_2023', 'current_account_balance_2023',
                        'total_reserves_1990', 'total_reserves_2023']

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs 2023 (and 1990) Variables", 
                template='plotly_dark', 
                width=1000,  
                height=600)  

fig.show()


In [107]:
# Variables to compare
variables_to_compare = [ 'cab_to_gdp_1990', 'cab_to_gdp_2000', 'cab_to_gdp_2014', 'cab_to_gdp_2015', 'cab_to_gdp_2016', 
                        'cab_to_gdp_2017', 'cab_to_gdp_2018', 'cab_to_gdp_2019', 'cab_to_gdp_2020', 'cab_to_gdp_2021', 
                        'cab_to_gdp_2022', 'cab_to_gdp_2023']

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs Current Account Balance to GDP Ratio", 
                template='plotly_dark', 
                width=1000,  
                height=600)  

fig.show()


In [108]:
# Variables to compare
variables_to_compare = [ 'reserves_to_gdp_1990', 'reserves_to_gdp_2000', 'reserves_to_gdp_2014', 'reserves_to_gdp_2015', 
                        'reserves_to_gdp_2016', 'reserves_to_gdp_2017', 'reserves_to_gdp_2018', 'reserves_to_gdp_2019', 
                        'reserves_to_gdp_2020', 'reserves_to_gdp_2021', 'reserves_to_gdp_2022', 'reserves_to_gdp_2023']

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs Reserves to GDP Ratio", 
                template='plotly_dark', 
                width=1000,  
                height=600)  

fig.show()


In [109]:
# Variables to compare
variables_to_compare = [ 'gdp_growth_2014_2015', 'gdp_growth_2015_2016', 'gdp_growth_2016_2017', 'gdp_growth_2017_2018', 
                        'gdp_growth_2018_2019', 'gdp_growth_2019_2020', 'gdp_growth_2020_2021', 'gdp_growth_2021_2022', 
                        'gdp_growth_2022_2023']

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()


fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs GDP Growth", 
                template='plotly_dark', 
                width=1000,  
                height=600)  

fig.show()


In [111]:
# Variables to compare
variables_to_compare = [ 'inflation_to_unemployment_2000', 'inflation_to_unemployment_2014', 'inflation_to_unemployment_2015', 
                        'inflation_to_unemployment_2016', 'inflation_to_unemployment_2017', 'inflation_to_unemployment_2018', 
                        'inflation_to_unemployment_2019', 'inflation_to_unemployment_2020', 'inflation_to_unemployment_2021', 
                        'inflation_to_unemployment_2022', 'inflation_to_unemployment_2023']

# Compute the correlation matrix for 'target' and the selected variables
columns_to_use = ['target'] + variables_to_compare
df_corr_matrix = df[columns_to_use].corr()

# Take the absolute values of the correlation matrix
df_corr_matrix_abs = df_corr_matrix.abs()

fig = px.imshow(df_corr_matrix_abs, 
                color_continuous_scale='RdBu_r', 
                title="Correlation Heatmap: Target vs Inflation to Unemployment Ratio", 
                template='plotly_dark', 
                width=1000, 
                height=600)  

fig.show()


In [110]:
column_list = df.columns.tolist()
print(column_list)

['unemployment_2000', 'unemployment_2014', 'unemployment_2015', 'unemployment_2016', 'unemployment_2017', 'unemployment_2018', 'unemployment_2019', 'unemployment_2020', 'unemployment_2021', 'unemployment_2022', 'unemployment_2023', 'current_account_balance_1990', 'current_account_balance_2000', 'current_account_balance_2014', 'current_account_balance_2015', 'current_account_balance_2016', 'current_account_balance_2017', 'current_account_balance_2018', 'current_account_balance_2019', 'current_account_balance_2020', 'current_account_balance_2021', 'current_account_balance_2022', 'current_account_balance_2023', 'exchange_rate_usd_1990', 'exchange_rate_usd_2000', 'exchange_rate_usd_2014', 'exchange_rate_usd_2015', 'exchange_rate_usd_2016', 'exchange_rate_usd_2017', 'exchange_rate_usd_2018', 'exchange_rate_usd_2019', 'exchange_rate_usd_2020', 'exchange_rate_usd_2021', 'exchange_rate_usd_2022', 'exchange_rate_usd_2023', 'gdp_1990', 'gdp_2000', 'gdp_2014', 'gdp_2015', 'gdp_2016', 'gdp_2017', 

## Feature Selection 

Our goal is to remove redundant and irrelevant variables from our dataframe to avoid having too much noise and use too much computational power for nothing. 
As we have many values with a correlation with the target below 0.3, we will put a low threshold for irrelevance. 

In [122]:
irrelevance_threshold = 0.05
redundancy_threshold = 0.95

In [123]:
# Remove irrelevant variables
correlations_with_target = df.corr()['target'].abs()  # Absolute correlations with the target
relevant_vars = correlations_with_target[correlations_with_target >= irrelevance_threshold].index.tolist()

# Filter the dataframe to include only relevant variables
filtered_df = df[relevant_vars]

In [124]:
# Remove redundant variables (highly correlated with each other, but not with the target)
correlation_matrix = filtered_df.corr().abs()  # Compute absolute correlation matrix
np.fill_diagonal(correlation_matrix.values, 0)  # Fill diagonal with 0 to ignore self-correlation

# Keep track of variables to drop
variables_to_drop = set()

# Iterate through variables to find redundancies
for col in correlation_matrix.columns:
    if col not in variables_to_drop:
        # Find variables highly correlated with the current variable
        redundant_vars = correlation_matrix[col][correlation_matrix[col] > redundancy_threshold].index.tolist()
        # For redundant variables, keep the one most correlated with the target
        for redundant_var in redundant_vars:
            if redundant_var != col:
                # Compare correlation with target
                if correlations_with_target[col] >= correlations_with_target[redundant_var]:
                    variables_to_drop.add(redundant_var)  # Drop the redundant variable
                else:
                    variables_to_drop.add(col)

# Final list of variables after removing irrelevant and redundant variables
final_vars = [var for var in filtered_df.columns if var not in variables_to_drop]

# Create the final dataframe
final_df = df[final_vars]


In [125]:
final_df

Unnamed: 0_level_0,unemployment_2000,unemployment_2022,exchange_rate_usd_1990,exchange_rate_usd_2023,gdp_1990,gdp_2014,inflation_1990,total_reserves_1990,total_reserves_2000,total_reserves_2021,target,cab_to_gdp_2014,cab_to_gdp_2018,reserves_to_gdp_1990,reserves_to_gdp_2000,reserves_to_gdp_2017,gdp_growth_2014_2015,gdp_growth_2015_2016,gdp_growth_2016_2017,gdp_growth_2017_2018,gdp_growth_2018_2019,gdp_growth_2019_2020,gdp_growth_2020_2021,gdp_growth_2021_2022,inflation_to_unemployment_2023
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Australia,6.288000,3.701000,1.281057e+00,1.505191e+00,3.114267e+11,1.468265e+12,7.333022e+00,1.931874e+10,1.882155e+10,5.787750e+10,0,-2.080506e-12,-1.564216e-12,0.062033,0.045226,0.049528,-0.079665,-0.106904,0.098394,0.077118,-0.024573,-0.046176,0.171875,0.086156,1.526320e+00
Canada,6.829000,5.280000,1.166774e+00,1.349909e+00,5.960756e+11,1.805750e+12,4.780477e+00,2.352952e+10,3.242727e+10,1.066151e+11,0,-1.284187e-12,-1.382386e-12,0.039474,0.043540,0.052555,-0.138026,-0.018319,0.079366,0.046120,0.010662,-0.050490,0.212472,0.076719,7.228851e-01
Denmark,4.476000,4.434000,6.188558e+00,6.889703e+00,1.382177e+11,3.528326e+11,2.641603e+00,1.122584e+10,1.569595e+10,8.223584e+10,0,2.518525e-11,1.757582e-11,0.081219,0.095681,0.226906,-0.144753,0.034541,0.062235,0.071417,-0.027842,0.029616,0.148320,-0.015752,6.427807e-01
Germany,7.917000,3.135000,1.615733e+00,1.629982e+12,1.778162e+12,3.965801e+12,2.696468e+00,1.045473e+11,8.749687e+10,2.957362e+11,0,1.813629e-12,2.084088e-12,0.058795,0.044483,0.053143,-0.136727,0.033362,0.063686,0.076776,-0.023396,-0.004313,0.103589,-0.042477,1.952853e+00
Liechtenstein,9.640537,8.230760,1.840786e+08,2.638014e+10,1.421509e+09,6.657527e+09,6.816208e+09,3.544428e+09,3.271780e+09,1.090593e+09,0,6.469366e-03,1.487165e-02,2.493426,1.317200,0.336899,-0.058432,-0.004979,0.037998,0.033720,-0.038274,-0.004754,0.203643,-0.045248,4.791054e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Sri Lanka,7.740000,6.330000,4.006292e+01,3.275065e+02,8.032551e+09,8.252854e+10,2.149525e+01,4.470305e+08,1.131355e+09,3.136992e+09,2,-2.918362e-11,-3.134757e-11,0.055652,0.069277,0.084333,0.031655,0.033724,0.072308,0.001246,-0.057981,-0.052920,0.051065,-0.163239,2.600814e+00
Ukraine,11.707000,8.411791,1.952346e-02,3.657381e+01,8.139356e+10,1.335039e+11,8.937851e+10,7.386012e+08,1.477202e+09,3.096667e+10,2,-2.578649e-11,-3.754273e-11,0.009074,0.045628,0.167819,-0.318140,0.025540,0.200680,0.167727,0.175657,0.017771,0.275500,-0.189103,1.838311e+00
Lebanon,8.594000,11.599000,6.950892e+02,1.419243e+09,2.838485e+09,4.809521e+10,1.049621e+10,4.210389e+09,8.474638e+09,3.523922e+10,2,-5.453737e-10,-4.434675e-10,1.483323,0.490988,1.044955,0.038135,0.024394,0.036764,0.035337,-0.060027,-0.385495,-0.270565,-0.092492,1.913892e+01
Puerto Rico,10.080000,6.000000,1.591404e+10,2.937976e+10,3.060392e+10,1.020000e+11,1.134615e+11,5.900000e+10,5.446154e+10,1.815385e+10,2,3.600461e-02,8.484255e-02,1.927858,0.882657,0.352502,0.009804,0.009709,-0.009615,-0.019417,0.039604,-0.019048,0.029126,0.075472,1.065724e+10
