In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import matplotlib.pylab as plt
%matplotlib inline
from sklearn import preprocessing
from sklearn import model_selection
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, GRU
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from scikeras.wrappers import KerasClassifier, KerasRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.utils import resample
import pickle

# Multiclass classification models

In this notebook we will create classification models to label the years as per their economic activity. According to bibliography, the world has experienced 4 recessions (continious decrease of GDP) and 4 downturns (low GDP growth) in the last 70 years. we will label them accordingly:

1. Pre-recession: 1 year before each recession
2. Recession-peak: 1975, 1982, 1991, 2009, 2020
3. Recovery: 2 years after each crisis
4. Global-downturn: 1958, 1998, 2001, 2012
5. Expansion: all other years

## IMF dataset

This is the dataset containing global data for 1980-2027.

In [10]:
pd.reset_option('all')
orignal_imf = pd.read_csv(r'C:\Users\menno\IH-Labs\IH-final-project\01_CSV files\imf_clean.csv')
orignal_imf

  pd.reset_option('all')
  pd.reset_option('all')
: boolean
    use_inf_as_null had been deprecated and will be removed in a future
    version. Use `use_inf_as_na` instead.

  pd.reset_option('all')


Unnamed: 0,WEO Subject Code,Country,Subject Descriptor,Subject Notes,Units,Scale,Country/Series-specific Notes,1980,1981,1982,...,2020,2021,2022,2023,2024,2025,2026,2027,2028,Estimates Start After
0,NGDP_R,Albania,"Gross domestic product, constant prices",Expressed in billions of national currency uni...,National currency,Billions,Source: IMF Staff Estimates. Official national...,311.514,329.270,338.819,...,808.617,877.475,910.293,930.774,961.862,994.565000,1028.380000,1063.345000,1099.499,2021.0
1,NGDP_RPCH,Albania,"Gross domestic product, constant prices",Annual percentages of constant price GDP are y...,Percent change,,"See notes for: Gross domestic product, consta...",2.684,5.700,2.900,...,-3.482,8.516,3.740,2.250,3.340,3.400000,3.400000,3.400000,3.4,2021.0
2,NGDP,Albania,"Gross domestic product, current prices",Expressed in billions of national currency uni...,National currency,Billions,Source: IMF Staff Estimates. Official national...,18.489,19.126,19.698,...,1644.077,1889.839,2073.967,2189.584,2310.315,2432.559000,2562.478000,2699.746000,2844.763,2021.0
3,NGDPD,Albania,"Gross domestic product, current prices",Values are based upon GDP in national currency...,U.S. dollars,Billions,"See notes for: Gross domestic product, curren...",1.946,2.229,2.296,...,15.161,18.310,18.509,20.177,20.847,21.932000,23.086000,24.217000,25.46,2021.0
4,PPPGDP,Albania,"Gross domestic product, current prices",These data form the basis for the country weig...,Purchasing power parity; international dollars,Billions,"See notes for: Gross domestic product, curren...",5.759,6.663,7.280,...,40.706,46.156,51.237,54.392,57.454,60.536000,63.760000,67.126000,70.716,2021.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4032,D_NGDPD,Sub-Saharan Africa,"External debt, total",,Percent of GDP,,,18.910,21.068,21.790,...,43.741,40.808,40.457,40.263,38.599,37.759225,37.759225,37.759225,37.75922500000001,
4033,DS,Sub-Saharan Africa,"External debt, total debt service",,U.S. dollars,Billions,,10.493,10.300,11.741,...,109.426,115.685,107.838,144.712,124.867,42.584200,42.584200,42.584200,42.5842,
4034,DS_NGDPD,Sub-Saharan Africa,"External debt, total debt service",,Percent of GDP,,,3.350,3.426,3.512,...,6.384,6.553,5.702,7.172,5.697,6.293650,6.293650,6.293650,6.29365,
4035,DSI,Sub-Saharan Africa,"External debt, total debt service, interest",,U.S. dollars,Billions,,3.301,3.541,3.144,...,18.412,19.246,18.701,22.367,24.774,7.437100,7.437100,7.437100,7.437099999999999,


### Preparing the dataset

We need to rearrange the dataset to make it suitable for the classification

We can drop all the categorical columns because they only give information about the indicators

We will use data until 2020 for the model and then try to label the years 2021-2027 

In [11]:
imf = orignal_imf.select_dtypes(np.number)
imf

Unnamed: 0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
0,311.514,329.270,338.819,342.546,349.397,344.156,363.428,360.521,355.474,390.310,...,820.653,837.786,808.617,877.475,910.293,930.774,961.862,994.565000,1028.380000,1063.345000
1,2.684,5.700,2.900,1.100,2.000,-1.500,5.600,-0.800,-1.400,9.800,...,4.019,2.088,-3.482,8.516,3.740,2.250,3.340,3.400000,3.400000,3.400000
2,18.489,19.126,19.698,19.900,19.645,20.065,20.692,20.531,20.238,22.228,...,1636.730,1691.903,1644.077,1889.839,2073.967,2189.584,2310.315,2432.559000,2562.478000,2699.746000
3,1.946,2.229,2.296,2.319,2.290,2.339,2.587,2.566,2.530,2.779,...,15.157,15.399,15.161,18.310,18.509,20.177,20.847,21.932000,23.086000,24.217000
4,5.759,6.663,7.280,7.649,8.083,8.214,8.848,8.994,9.181,10.476,...,40.061,41.631,40.706,46.156,51.237,54.392,57.454,60.536000,63.760000,67.126000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4032,18.910,21.068,21.790,29.216,38.230,41.151,41.954,38.561,39.358,40.059,...,39.648,41.100,43.741,40.808,40.457,40.263,38.599,37.759225,37.759225,37.759225
4033,10.493,10.300,11.741,13.934,15.031,16.869,17.096,17.030,17.986,16.911,...,113.886,113.445,109.426,115.685,107.838,144.712,124.867,42.584200,42.584200,42.584200
4034,3.350,3.426,3.512,5.457,7.971,8.582,7.108,6.115,6.620,5.699,...,7.211,6.637,6.384,6.553,5.702,7.172,5.697,6.293650,6.293650,6.293650
4035,3.301,3.541,3.144,3.457,3.234,3.887,4.041,4.385,4.650,4.894,...,17.928,18.718,18.412,19.246,18.701,22.367,24.774,7.437100,7.437100,7.437100


In [12]:
imf = imf.transpose().reset_index()
imf

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036
0,1980,311.514,2.684,18.489,1.946,5.759,5.935,116584.542,5557.555,6919.411,...,0.39,0.706,-6.298,4.972,60.018,18.91,10.493,3.35,3.301,1.04
1,1981,329.27,5.7,19.126,2.229,6.663,5.809,120786.154,5757.844,7016.154,...,-0.524,0.197,-9.892,-8.517,66.126,21.068,10.3,3.426,3.541,1.054
2,1982,338.819,2.9,19.698,2.296,7.28,5.814,121689.947,5800.928,7074.574,...,-1.243,-0.558,-11.627,-2.061,76.556,21.79,11.741,3.512,3.144,0.929
3,1983,342.546,1.1,19.9,2.319,7.649,5.809,120446.718,5741.664,6997.238,...,-0.884,0.627,-9.151,-0.207,80.005,29.216,13.934,5.457,3.457,1.109
4,1984,349.397,2.0,19.645,2.29,8.083,5.623,120297.849,5734.567,6763.886,...,-0.549,-1.062,-1.789,-0.087,83.24,38.23,15.031,7.971,3.234,1.307
5,1985,344.156,-1.5,20.065,2.339,8.214,5.83,116082.037,5533.6,6767.916,...,-0.534,-0.585,2.562,0.082,88.998,41.151,16.869,8.582,3.887,1.721
6,1986,363.428,5.6,20.692,2.587,8.848,5.694,120235.598,5731.6,6845.794,...,-0.354,0.477,-1.595,0.665,99.509,41.954,17.096,7.108,4.041,1.569
7,1987,360.521,-0.8,20.531,2.566,8.994,5.695,116915.396,5573.326,6657.957,...,-0.856,-0.387,-3.681,3.869,111.007,38.561,17.03,6.115,4.385,1.567
8,1988,355.474,-1.4,20.238,2.53,9.181,5.693,113123.997,5392.591,6440.366,...,-0.712,-0.08,-2.942,0.105,110.67,39.358,17.986,6.62,4.65,1.542
9,1989,390.31,9.8,22.228,2.779,10.476,5.695,120916.021,5764.035,6886.273,...,-2.175,0.353,-2.026,2.634,119.238,40.059,16.911,5.699,4.894,1.593


In [13]:
columns = ['year']

for i in range(len(orignal_imf)):
    column_name = (orignal_imf['Country'][i]+'_'+orignal_imf['Subject Descriptor'][i])
    column_name = str(column_name).lower().replace(' ','_')
    columns.append(column_name)

len(columns)

4038

In [14]:
imf.columns= columns
imf.head()

Unnamed: 0,year,"albania_gross_domestic_product,_constant_prices","albania_gross_domestic_product,_constant_prices.1","albania_gross_domestic_product,_current_prices","albania_gross_domestic_product,_current_prices.1","albania_gross_domestic_product,_current_prices.2","albania_gross_domestic_product,_deflator","albania_gross_domestic_product_per_capita,_constant_prices","albania_gross_domestic_product_per_capita,_constant_prices.1","albania_gross_domestic_product_per_capita,_current_prices",...,"sub-saharan_africa_direct_investment,_net","sub-saharan_africa_portfolio_investment,_net","sub-saharan_africa_other_investment,_net",sub-saharan_africa_change_in_reserves,"sub-saharan_africa_external_debt,_total","sub-saharan_africa_external_debt,_total.1","sub-saharan_africa_external_debt,_total_debt_service","sub-saharan_africa_external_debt,_total_debt_service.1","sub-saharan_africa_external_debt,_total_debt_service,_interest","sub-saharan_africa_external_debt,_total_debt_service,_interest.1"
0,1980,311.514,2.684,18.489,1.946,5.759,5.935,116584.542,5557.555,6919.411,...,0.39,0.706,-6.298,4.972,60.018,18.91,10.493,3.35,3.301,1.04
1,1981,329.27,5.7,19.126,2.229,6.663,5.809,120786.154,5757.844,7016.154,...,-0.524,0.197,-9.892,-8.517,66.126,21.068,10.3,3.426,3.541,1.054
2,1982,338.819,2.9,19.698,2.296,7.28,5.814,121689.947,5800.928,7074.574,...,-1.243,-0.558,-11.627,-2.061,76.556,21.79,11.741,3.512,3.144,0.929
3,1983,342.546,1.1,19.9,2.319,7.649,5.809,120446.718,5741.664,6997.238,...,-0.884,0.627,-9.151,-0.207,80.005,29.216,13.934,5.457,3.457,1.109
4,1984,349.397,2.0,19.645,2.29,8.083,5.623,120297.849,5734.567,6763.886,...,-0.549,-1.062,-1.789,-0.087,83.24,38.23,15.031,7.971,3.234,1.307


In [15]:
imf.tail(10)

Unnamed: 0,year,"albania_gross_domestic_product,_constant_prices","albania_gross_domestic_product,_constant_prices.1","albania_gross_domestic_product,_current_prices","albania_gross_domestic_product,_current_prices.1","albania_gross_domestic_product,_current_prices.2","albania_gross_domestic_product,_deflator","albania_gross_domestic_product_per_capita,_constant_prices","albania_gross_domestic_product_per_capita,_constant_prices.1","albania_gross_domestic_product_per_capita,_current_prices",...,"sub-saharan_africa_direct_investment,_net","sub-saharan_africa_portfolio_investment,_net","sub-saharan_africa_other_investment,_net",sub-saharan_africa_change_in_reserves,"sub-saharan_africa_external_debt,_total","sub-saharan_africa_external_debt,_total.1","sub-saharan_africa_external_debt,_total_debt_service","sub-saharan_africa_external_debt,_total_debt_service.1","sub-saharan_africa_external_debt,_total_debt_service,_interest","sub-saharan_africa_external_debt,_total_debt_service,_interest.1"
38,2018,820.653,4.019,1636.73,15.157,40.061,199.442,284678.591,13570.554,567769.706,...,-20.88,-4.538,-19.148,4.889,650.966,39.648,113.886,7.211,17.928,1.024
39,2019,837.786,2.088,1691.903,15.399,41.631,201.949,290805.66,13862.63,587280.293,...,-29.242,-18.766,-10.964,6.315,696.589,41.1,113.445,6.637,18.718,1.06
40,2020,808.617,-3.482,1644.077,15.161,40.706,203.32,280984.503,13394.458,571296.57,...,-9.88,2.192,-3.712,-9.341,719.517,43.741,109.426,6.384,18.412,1.102
41,2021,877.475,8.516,1889.839,18.31,46.156,215.372,305428.182,14559.682,657808.053,...,-69.909,45.146,-15.884,22.51,743.404,40.808,115.685,6.553,19.246,1.022
42,2022,910.293,3.74,2073.967,18.509,51.237,227.835,317575.901,15138.76,723549.683,...,-25.857,5.766,-5.926,-8.55,770.647,40.457,107.838,5.702,18.701,0.93
43,2023,930.774,2.25,2189.584,20.177,54.392,235.243,325625.597,15522.487,766012.356,...,-36.654,3.732,-8.387,-2.488,803.678,40.263,144.712,7.172,22.367,1.063
44,2024,961.862,3.34,2310.315,20.847,57.454,240.192,337536.034,16090.254,810734.5,...,-43.849,2.335,-17.517,8.203,847.68,38.599,124.867,5.697,24.774,1.097
45,2025,994.565,3.4,2432.559,21.932,60.536,244.585,350141.324,16691.145,856393.802,...,-13.399125,-4.806475,1.8585,4.41975,247.09835,37.759225,42.5842,6.29365,7.4371,1.190375
46,2026,1028.38,3.4,2562.478,23.086,63.76,249.176,363275.178,17317.233,905194.658,...,-13.399125,-4.806475,1.8585,4.41975,247.09835,37.759225,42.5842,6.29365,7.4371,1.190375
47,2027,1063.345,3.4,2699.746,24.217,67.126,253.892,376989.41,17970.987,957144.768,...,-13.399125,-4.806475,1.8585,4.41975,247.09835,37.759225,42.5842,6.29365,7.4371,1.190375


In [16]:
imf.to_csv('imf_formatted.csv')

In [17]:
imf_to_predict = imf[42:48]
imf = imf[:42]
imf.tail()

Unnamed: 0,year,"albania_gross_domestic_product,_constant_prices","albania_gross_domestic_product,_constant_prices.1","albania_gross_domestic_product,_current_prices","albania_gross_domestic_product,_current_prices.1","albania_gross_domestic_product,_current_prices.2","albania_gross_domestic_product,_deflator","albania_gross_domestic_product_per_capita,_constant_prices","albania_gross_domestic_product_per_capita,_constant_prices.1","albania_gross_domestic_product_per_capita,_current_prices",...,"sub-saharan_africa_direct_investment,_net","sub-saharan_africa_portfolio_investment,_net","sub-saharan_africa_other_investment,_net",sub-saharan_africa_change_in_reserves,"sub-saharan_africa_external_debt,_total","sub-saharan_africa_external_debt,_total.1","sub-saharan_africa_external_debt,_total_debt_service","sub-saharan_africa_external_debt,_total_debt_service.1","sub-saharan_africa_external_debt,_total_debt_service,_interest","sub-saharan_africa_external_debt,_total_debt_service,_interest.1"
37,2017,788.943,3.802,1550.645,13.053,37.609,196.547,273542.434,13039.696,537640.3,...,-37.338,-23.979,0.352,16.014,602.808,36.5,83.705,5.03,13.811,0.855
38,2018,820.653,4.019,1636.73,15.157,40.061,199.442,284678.591,13570.554,567769.706,...,-20.88,-4.538,-19.148,4.889,650.966,39.648,113.886,7.211,17.928,1.024
39,2019,837.786,2.088,1691.903,15.399,41.631,201.949,290805.66,13862.63,587280.293,...,-29.242,-18.766,-10.964,6.315,696.589,41.1,113.445,6.637,18.718,1.06
40,2020,808.617,-3.482,1644.077,15.161,40.706,203.32,280984.503,13394.458,571296.57,...,-9.88,2.192,-3.712,-9.341,719.517,43.741,109.426,6.384,18.412,1.102
41,2021,877.475,8.516,1889.839,18.31,46.156,215.372,305428.182,14559.682,657808.053,...,-69.909,45.146,-15.884,22.51,743.404,40.808,115.685,6.553,19.246,1.022


In [18]:
imf['year'] = imf['year'].astype(np.number)

crisis_years = [1975, 1982, 1991, 2009, 2020]
recovery_years = [1976, 1977, 1983, 1984, 1992, 1993, 2010, 2011]
downturn_years = [1958, 1998, 2001, 2012]

labels = []
for i in range(len(imf)):
    if imf['year'][i] in crisis_years:
        labels.append('recession')

    elif imf.loc[i]['year'] in recovery_years:
        labels.append('recovery')

    elif imf.loc[i]['year'] in recovery_years:
        labels.append('recovery')

    elif imf.loc[i]['year'] in downturn_years:
        labels.append('downturn')
        
    else:
        labels.append('expansion')

imf['year_label'] = labels

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  imf['year'] = imf['year'].astype(np.number)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  imf['year_label'] = labels


In [19]:
imf[['year', 'year_label']]

Unnamed: 0,year,year_label
0,1980.0,expansion
1,1981.0,expansion
2,1982.0,recession
3,1983.0,recovery
4,1984.0,recovery
5,1985.0,expansion
6,1986.0,expansion
7,1987.0,expansion
8,1988.0,expansion
9,1989.0,expansion


In [20]:
imf

Unnamed: 0,year,"albania_gross_domestic_product,_constant_prices","albania_gross_domestic_product,_constant_prices.1","albania_gross_domestic_product,_current_prices","albania_gross_domestic_product,_current_prices.1","albania_gross_domestic_product,_current_prices.2","albania_gross_domestic_product,_deflator","albania_gross_domestic_product_per_capita,_constant_prices","albania_gross_domestic_product_per_capita,_constant_prices.1","albania_gross_domestic_product_per_capita,_current_prices",...,"sub-saharan_africa_portfolio_investment,_net","sub-saharan_africa_other_investment,_net",sub-saharan_africa_change_in_reserves,"sub-saharan_africa_external_debt,_total","sub-saharan_africa_external_debt,_total.1","sub-saharan_africa_external_debt,_total_debt_service","sub-saharan_africa_external_debt,_total_debt_service.1","sub-saharan_africa_external_debt,_total_debt_service,_interest","sub-saharan_africa_external_debt,_total_debt_service,_interest.1",year_label
0,1980.0,311.514,2.684,18.489,1.946,5.759,5.935,116584.542,5557.555,6919.411,...,0.706,-6.298,4.972,60.018,18.91,10.493,3.35,3.301,1.04,expansion
1,1981.0,329.27,5.7,19.126,2.229,6.663,5.809,120786.154,5757.844,7016.154,...,0.197,-9.892,-8.517,66.126,21.068,10.3,3.426,3.541,1.054,expansion
2,1982.0,338.819,2.9,19.698,2.296,7.28,5.814,121689.947,5800.928,7074.574,...,-0.558,-11.627,-2.061,76.556,21.79,11.741,3.512,3.144,0.929,recession
3,1983.0,342.546,1.1,19.9,2.319,7.649,5.809,120446.718,5741.664,6997.238,...,0.627,-9.151,-0.207,80.005,29.216,13.934,5.457,3.457,1.109,recovery
4,1984.0,349.397,2.0,19.645,2.29,8.083,5.623,120297.849,5734.567,6763.886,...,-1.062,-1.789,-0.087,83.24,38.23,15.031,7.971,3.234,1.307,recovery
5,1985.0,344.156,-1.5,20.065,2.339,8.214,5.83,116082.037,5533.6,6767.916,...,-0.585,2.562,0.082,88.998,41.151,16.869,8.582,3.887,1.721,expansion
6,1986.0,363.428,5.6,20.692,2.587,8.848,5.694,120235.598,5731.6,6845.794,...,0.477,-1.595,0.665,99.509,41.954,17.096,7.108,4.041,1.569,expansion
7,1987.0,360.521,-0.8,20.531,2.566,8.994,5.695,116915.396,5573.326,6657.957,...,-0.387,-3.681,3.869,111.007,38.561,17.03,6.115,4.385,1.567,expansion
8,1988.0,355.474,-1.4,20.238,2.53,9.181,5.693,113123.997,5392.591,6440.366,...,-0.08,-2.942,0.105,110.67,39.358,17.986,6.62,4.65,1.542,expansion
9,1989.0,390.31,9.8,22.228,2.779,10.476,5.695,120916.021,5764.035,6886.273,...,0.353,-2.026,2.634,119.238,40.059,16.911,5.699,4.894,1.593,expansion


### Model

In [21]:
X = imf.drop(['year', 'year_label'],axis=1)
X.shape

(42, 4037)

In [22]:
Y = imf['year_label']

encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = pd.DataFrame(encoder.transform(Y))

In [23]:
encoded_Y.value_counts()

1    29
3     6
2     4
0     3
dtype: int64

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, encoded_Y, test_size=0.3, shuffle=False)

scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [26]:
y_test

Unnamed: 0,0
29,2
30,3
31,3
32,0
33,1
34,1
35,1
36,1
37,1
38,1


In [27]:
y_train

Unnamed: 0,0
0,1
1,1
2,2
3,3
4,3
5,1
6,1
7,1
8,1
9,1


In [29]:
# Initialize the model
model = Sequential()

# Add layers to the model
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))  # Use the shape of the training data for input shape
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(4, activation='softmax'))  # Softmax for multiclass classification

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train,
    to_categorical(y_train),
    epochs=20,
    batch_size=6
)

# Evaluate the model
evaluation = model.evaluate(X_test, to_categorical(y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [30]:
predictions = model.predict(X_test)
print(np.argmax(predictions, axis=1))

predictions

[1 1 1 1 1 1 1 1 1 1 1 1 1]


array([[5.1252634e-15, 1.0000000e+00, 2.1251322e-22, 6.9331122e-17],
       [2.2636111e-26, 1.0000000e+00, 4.2047603e-32, 7.3939212e-26],
       [1.5029098e-28, 1.0000000e+00, 2.4975808e-35, 8.6088485e-29],
       [4.0866684e-30, 1.0000000e+00, 4.7850397e-38, 9.2066692e-31],
       [1.1381428e-31, 1.0000000e+00, 0.0000000e+00, 2.3765755e-32],
       [1.2454781e-32, 1.0000000e+00, 0.0000000e+00, 9.8014436e-34],
       [4.3298201e-31, 1.0000000e+00, 0.0000000e+00, 2.0132135e-33],
       [2.5340323e-33, 1.0000000e+00, 0.0000000e+00, 1.2787576e-35],
       [3.8922284e-38, 1.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00, 0.0000000e+00]],
      dtype=float32)

The model accuracy is acceptable, however the high accuracy is only achieved because of the imbalanced data.

The model is predicting every year as expansion

We will try up- and downsampling

In [31]:
all_data = imf
all_data

Unnamed: 0,year,"albania_gross_domestic_product,_constant_prices","albania_gross_domestic_product,_constant_prices.1","albania_gross_domestic_product,_current_prices","albania_gross_domestic_product,_current_prices.1","albania_gross_domestic_product,_current_prices.2","albania_gross_domestic_product,_deflator","albania_gross_domestic_product_per_capita,_constant_prices","albania_gross_domestic_product_per_capita,_constant_prices.1","albania_gross_domestic_product_per_capita,_current_prices",...,"sub-saharan_africa_portfolio_investment,_net","sub-saharan_africa_other_investment,_net",sub-saharan_africa_change_in_reserves,"sub-saharan_africa_external_debt,_total","sub-saharan_africa_external_debt,_total.1","sub-saharan_africa_external_debt,_total_debt_service","sub-saharan_africa_external_debt,_total_debt_service.1","sub-saharan_africa_external_debt,_total_debt_service,_interest","sub-saharan_africa_external_debt,_total_debt_service,_interest.1",year_label
0,1980.0,311.514,2.684,18.489,1.946,5.759,5.935,116584.542,5557.555,6919.411,...,0.706,-6.298,4.972,60.018,18.91,10.493,3.35,3.301,1.04,expansion
1,1981.0,329.27,5.7,19.126,2.229,6.663,5.809,120786.154,5757.844,7016.154,...,0.197,-9.892,-8.517,66.126,21.068,10.3,3.426,3.541,1.054,expansion
2,1982.0,338.819,2.9,19.698,2.296,7.28,5.814,121689.947,5800.928,7074.574,...,-0.558,-11.627,-2.061,76.556,21.79,11.741,3.512,3.144,0.929,recession
3,1983.0,342.546,1.1,19.9,2.319,7.649,5.809,120446.718,5741.664,6997.238,...,0.627,-9.151,-0.207,80.005,29.216,13.934,5.457,3.457,1.109,recovery
4,1984.0,349.397,2.0,19.645,2.29,8.083,5.623,120297.849,5734.567,6763.886,...,-1.062,-1.789,-0.087,83.24,38.23,15.031,7.971,3.234,1.307,recovery
5,1985.0,344.156,-1.5,20.065,2.339,8.214,5.83,116082.037,5533.6,6767.916,...,-0.585,2.562,0.082,88.998,41.151,16.869,8.582,3.887,1.721,expansion
6,1986.0,363.428,5.6,20.692,2.587,8.848,5.694,120235.598,5731.6,6845.794,...,0.477,-1.595,0.665,99.509,41.954,17.096,7.108,4.041,1.569,expansion
7,1987.0,360.521,-0.8,20.531,2.566,8.994,5.695,116915.396,5573.326,6657.957,...,-0.387,-3.681,3.869,111.007,38.561,17.03,6.115,4.385,1.567,expansion
8,1988.0,355.474,-1.4,20.238,2.53,9.181,5.693,113123.997,5392.591,6440.366,...,-0.08,-2.942,0.105,110.67,39.358,17.986,6.62,4.65,1.542,expansion
9,1989.0,390.31,9.8,22.228,2.779,10.476,5.695,120916.021,5764.035,6886.273,...,0.353,-2.026,2.634,119.238,40.059,16.911,5.699,4.894,1.593,expansion


### Downsampling

In [32]:
other_categories = all_data[all_data['year_label']!='expansion']
other_categories['year_label'].value_counts()

recovery     6
recession    4
downturn     3
Name: year_label, dtype: int64

In [33]:
category_expansion = all_data[all_data['year_label']=='expansion']
category_expansion_undersampled = resample(category_expansion, replace=False, n_samples = 6)
category_expansion_undersampled

Unnamed: 0,year,"albania_gross_domestic_product,_constant_prices","albania_gross_domestic_product,_constant_prices.1","albania_gross_domestic_product,_current_prices","albania_gross_domestic_product,_current_prices.1","albania_gross_domestic_product,_current_prices.2","albania_gross_domestic_product,_deflator","albania_gross_domestic_product_per_capita,_constant_prices","albania_gross_domestic_product_per_capita,_constant_prices.1","albania_gross_domestic_product_per_capita,_current_prices",...,"sub-saharan_africa_portfolio_investment,_net","sub-saharan_africa_other_investment,_net",sub-saharan_africa_change_in_reserves,"sub-saharan_africa_external_debt,_total","sub-saharan_africa_external_debt,_total.1","sub-saharan_africa_external_debt,_total_debt_service","sub-saharan_africa_external_debt,_total_debt_service.1","sub-saharan_africa_external_debt,_total_debt_service,_interest","sub-saharan_africa_external_debt,_total_debt_service,_interest.1",year_label
38,2018.0,820.653,4.019,1636.73,15.157,40.061,199.442,284678.591,13570.554,567769.706,...,-4.538,-19.148,4.889,650.966,39.648,113.886,7.211,17.928,1.024,expansion
8,1988.0,355.474,-1.4,20.238,2.53,9.181,5.693,113123.997,5392.591,6440.366,...,-0.08,-2.942,0.105,110.67,39.358,17.986,6.62,4.65,1.542,expansion
36,2016.0,760.044,3.315,1472.479,11.862,34.736,193.736,264262.007,12597.301,511970.59,...,-16.554,-9.04,-4.803,511.268,34.709,82.343,5.471,13.143,0.87,expansion
1,1981.0,329.27,5.7,19.126,2.229,6.663,5.809,120786.154,5757.844,7016.154,...,0.197,-9.892,-8.517,66.126,21.068,10.3,3.426,3.541,1.054,expansion
17,1997.0,297.833,-10.924,331.324,2.259,9.697,111.245,94601.804,4509.643,105239.731,...,-6.436,0.262,7.451,212.177,38.238,38.525,7.178,8.275,1.441,expansion
33,2013.0,707.14,1.002,1350.053,12.784,30.604,190.917,244254.867,11643.566,466324.606,...,-21.349,-10.531,0.067,383.841,22.985,69.861,4.352,8.767,0.504,expansion


In [34]:
imf_undersampled = pd.concat([other_categories, category_expansion_undersampled], axis=0)
imf_undersampled.shape

(19, 4039)

In [35]:
X_undersampled = imf_undersampled.drop(['year', 'year_label'],axis=1)

Y_undersampled = imf_undersampled['year_label']

encoder = LabelEncoder()
encoder.fit(Y_undersampled)
encoded_Y_undersampled = pd.DataFrame(encoder.transform(Y_undersampled))

X_train_under, X_test_under, y_train_under, y_test_under = train_test_split(X_undersampled, encoded_Y_undersampled,
                                                                            test_size=0.3)


scaler = preprocessing.StandardScaler()
X_train_under = scaler.fit_transform(X_train_under)
X_test_under = scaler.transform(X_test_under)

In [36]:
X_undersampled.shape

(19, 4037)

In [37]:
# Initialize the model
model_under = Sequential()

# Add layers to the model
model_under.add(Dense(128, activation='relu', input_shape=(X_train_under.shape[1],)))  # Use the shape of the training data for input shape
model_under.add(Dense(64, activation='relu'))
model_under.add(Dense(4, activation='softmax'))  # Softmax for multiclass classification

# Compile the model
model_under.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history_under = model_under.fit(
    X_train_under,
    to_categorical(y_train_under),
    epochs=20,
    batch_size=6
)

# Evaluate the model
evaluation_under = model_under.evaluate(X_test_under, to_categorical(y_test_under))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [38]:
predictions = model_under.predict(X_test_under)
print(np.argmax(predictions, axis=1))

predictions

[2 0 0 1 0 0]


array([[3.1631300e-01, 1.2056049e-05, 6.8366820e-01, 6.8101667e-06],
       [9.1298860e-01, 4.3744300e-03, 8.2267433e-02, 3.6958937e-04],
       [9.9612314e-01, 3.0920994e-03, 3.7838920e-04, 4.0627460e-04],
       [3.0553002e-07, 9.9999905e-01, 6.1048604e-07, 2.5810767e-08],
       [9.9980336e-01, 6.6100103e-05, 5.3941307e-07, 1.3000630e-04],
       [9.9942809e-01, 2.8045694e-04, 2.2834173e-04, 6.3028929e-05]],
      dtype=float32)

In [39]:
y_test_under

Unnamed: 0,0
8,2
5,3
4,3
13,1
17,1
7,0


## Upsampling

In [40]:
category_expansion = all_data[all_data['year_label']=='expansion']
category_recession = all_data[all_data['year_label']=='recession']
category_recovery = all_data[all_data['year_label']=='recovery']
category_downturn = all_data[all_data['year_label']=='downturn']

category_recession_oversampled = resample(category_recession,replace=True, n_samples = len(category_expansion))
category_recovery_oversampled = resample(category_recovery,replace=True, n_samples = len(category_expansion))
category_downturn_oversampled = resample(category_downturn,replace=True, n_samples = len(category_expansion))

imf_upsampled= pd.concat([category_expansion,
                          category_recession_oversampled,
                          category_recovery_oversampled,
                          category_downturn_oversampled], axis=0)
imf_upsampled.shape

(116, 4039)

In [41]:
X_updersampled = imf_upsampled.drop(['year', 'year_label'],axis=1)

Y_upsampled = imf_upsampled['year_label']

encoder = LabelEncoder()
encoder.fit(Y_upsampled)
encoded_Y_upsampled = pd.DataFrame(encoder.transform(Y_upsampled))

X_train_up, X_test_up, y_train_up, y_test_up = train_test_split(X_updersampled, encoded_Y_upsampled,
                                                                            test_size=0.3)


scaler = preprocessing.StandardScaler()
X_train_under = scaler.fit_transform(X_train_up)
X_test_under = scaler.transform(X_test_up)

In [42]:
X_updersampled.shape

(116, 4037)

In [43]:
Y_upsampled

0     expansion
1     expansion
5     expansion
6     expansion
7     expansion
        ...    
32     downturn
18     downturn
18     downturn
21     downturn
32     downturn
Name: year_label, Length: 116, dtype: object

1: expansion

2: recession, 

3: recovery, 

0: downturn

In [44]:
encoded_Y_upsampled

Unnamed: 0,0
0,1
1,1
2,1
3,1
4,1
...,...
111,0
112,0
113,0
114,0


In [45]:
# Initialize the model
model_up = Sequential()

# Add layers to the model
model_up.add(Dense(128, activation='relu', input_shape=(X_train_up.shape[1],)))
model_up.add(Dense(128, activation='relu'))
model_up.add(Dense(32, activation='relu'))
model_up.add(Dense(16, activation='relu'))
model_up.add(Dense(8, activation='relu'))
model_up.add(Dense(4, activation='softmax'))  # Softmax for multiclass classification

# Compile the model
model_up.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history_up = model_up.fit(
    X_train_up,
    to_categorical(y_train_up),
    epochs=20,
    batch_size=6
)

# Evaluate the model
evaluation_up = model_up.evaluate(X_test_up, to_categorical(y_test_up))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [46]:
predictions = model_up.predict(X_test_up)
print(np.argmax(predictions, axis=1))

predictions

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


array([[0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 , 0.24308121, 0.24072267],
       [0.24964733, 0.2665488 ,

In [47]:
# 1: expansion, 2: recession, 3: recovery, 0: downturn

### Conclusions

None of the models was able to predict the categories with accuracy. Even with upsampling, almost  every category is being missed.

The next database can be better if we focus on % data instead of data in dollars will have better results, because the effect of growth trend and currency change throughout the years.

In [48]:
orignal_imf['Scale'].unique()

array(['Billions', 'None', 'Units', 'Millions'], dtype=object)

# World Bank Data

In [49]:
wb_gdp = pd.read_csv(r'C:\Users\menno\IH-Labs\IH-final-project\01_CSV files\wb_gdp_predictions.csv')
wb_gdp

Unnamed: 0,WLD_gdp_per_capita_constant_us$,WLD_gdp_per_capita_%_growth,WLD_gdp_%_growth,ARG_gdp_per_capita_constant_us$,ARG_gdp_per_capita_%_growth,ARG_gdp_%_growth,AUS_gdp_per_capita_constant_us$,AUS_gdp_per_capita_%_growth,AUS_gdp_%_growth,BRA_gdp_per_capita_constant_us$,...,TUR_gdp_per_capita_constant_us$,TUR_gdp_per_capita_%_growth,TUR_gdp_%_growth,GBR_gdp_per_capita_constant_us$,GBR_gdp_per_capita_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_constant_us$,USA_gdp_per_capita_%_growth,USA_gdp_%_growth,time
0,3694.968086,2.406823,3.790081,7687.517338,3.740903,5.427843,20040.523110,0.463671,2.482656,2717.001315,...,2468.308533,-1.507613,1.156069,15198.153420,1.899262,2.677119,19253.547330,0.618121,2.300000,YR1961
1,3823.614985,3.481678,5.314851,7498.840061,-2.454333,-0.852022,19810.518080,-1.147700,1.294611,2811.627110,...,2535.943615,2.740139,5.571429,15235.923700,0.248519,1.102910,20116.235120,4.480669,6.100000,YR1962
2,3937.998747,2.991508,5.184483,6986.564802,-6.831393,-5.308197,20642.259970,4.198486,6.216107,2746.513434,...,2692.213168,6.162186,9.066306,15859.449110,4.092469,4.874384,20701.269950,2.908272,4.400000,YR1963
3,4109.998137,4.367685,6.557806,7572.231166,8.382751,10.130298,21653.977840,4.901197,6.980061,2758.862972,...,2766.383072,2.754979,5.459057,16628.576080,4.849645,5.533659,21599.818710,4.340549,5.800000,YR1964
4,4249.735255,3.399931,5.549061,8241.529337,8.838850,10.569433,22503.623610,3.923740,5.980438,2746.257726,...,2775.433757,0.327167,2.823530,16876.017360,1.488048,2.142177,22696.678580,5.078098,6.400000,YR1965
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,10499.647110,-4.042395,-3.072225,11341.268370,-10.812611,-9.943235,58115.930670,-1.278358,-0.050885,8255.567064,...,12072.399440,1.147944,1.940032,42098.602930,-11.355676,-11.030858,58451.606720,-3.700953,-2.767803,YR2020
60,11037.294040,5.120619,6.023813,12402.490790,9.357176,10.398249,59339.684350,2.105711,2.236212,8621.730590,...,13341.556340,10.512880,11.353496,45334.157000,7.685657,7.597471,61829.845630,5.779548,5.945485,YR2021
61,11287.148500,2.263729,3.080322,12932.469530,4.273164,5.243044,60797.958650,2.457503,3.617589,8831.126713,...,13990.746870,4.865928,5.565860,47232.271810,4.186942,4.101621,62866.714390,1.676971,2.061593,YR2022
62,11443.973299,1.912327,3.370353,12894.976521,1.306230,2.616845,61531.376965,1.973410,3.453485,8878.670912,...,14520.695582,2.946106,4.835036,47621.031746,2.040600,2.388049,63588.890780,1.937953,2.783232,YR2023


In [50]:
wb_gdp ['time'] = wb_gdp ['time'].str.replace('YR', '')

In [51]:
# Dropping 2022,2023 and 2024
wb_gdp ['time'] = wb_gdp ['time'].str.replace('YR', '')
wb_gdp_to_predict = wb_gdp[-5:]
wb_gdp = wb_gdp[:-2]
wb_gdp

Unnamed: 0,WLD_gdp_per_capita_constant_us$,WLD_gdp_per_capita_%_growth,WLD_gdp_%_growth,ARG_gdp_per_capita_constant_us$,ARG_gdp_per_capita_%_growth,ARG_gdp_%_growth,AUS_gdp_per_capita_constant_us$,AUS_gdp_per_capita_%_growth,AUS_gdp_%_growth,BRA_gdp_per_capita_constant_us$,...,TUR_gdp_per_capita_constant_us$,TUR_gdp_per_capita_%_growth,TUR_gdp_%_growth,GBR_gdp_per_capita_constant_us$,GBR_gdp_per_capita_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_constant_us$,USA_gdp_per_capita_%_growth,USA_gdp_%_growth,time
0,3694.968086,2.406823,3.790081,7687.517338,3.740903,5.427843,20040.52311,0.463671,2.482656,2717.001315,...,2468.308533,-1.507613,1.156069,15198.15342,1.899262,2.677119,19253.54733,0.618121,2.300000,1961
1,3823.614985,3.481678,5.314851,7498.840061,-2.454333,-0.852022,19810.51808,-1.147700,1.294611,2811.627110,...,2535.943615,2.740139,5.571429,15235.92370,0.248519,1.102910,20116.23512,4.480669,6.100000,1962
2,3937.998747,2.991508,5.184483,6986.564802,-6.831393,-5.308197,20642.25997,4.198486,6.216107,2746.513434,...,2692.213168,6.162186,9.066306,15859.44911,4.092469,4.874384,20701.26995,2.908272,4.400000,1963
3,4109.998137,4.367685,6.557806,7572.231166,8.382751,10.130298,21653.97784,4.901197,6.980061,2758.862972,...,2766.383072,2.754979,5.459057,16628.57608,4.849645,5.533659,21599.81871,4.340549,5.800000,1964
4,4249.735255,3.399931,5.549061,8241.529337,8.838850,10.569433,22503.62361,3.923740,5.980438,2746.257726,...,2775.433757,0.327167,2.823530,16876.01736,1.488048,2.142177,22696.67858,5.078098,6.400000,1965
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,10777.870450,2.161637,3.288050,13105.397160,-3.601610,-2.617396,58479.50355,1.348283,2.883045,8553.864918,...,11938.745440,2.085158,2.979885,47006.14293,1.090624,1.705021,59607.39366,2.404868,2.945385,2018
58,10941.964490,1.522509,2.594550,12716.224200,-2.969562,-2.000861,58868.48063,0.665151,2.171396,8592.210218,...,11935.387870,-0.028123,0.783610,47491.59456,1.032741,1.604309,60698.01130,1.829668,2.294439,2019
59,10499.647110,-4.042395,-3.072225,11341.268370,-10.812611,-9.943235,58115.93067,-1.278358,-0.050885,8255.567064,...,12072.399440,1.147944,1.940032,42098.60293,-11.355676,-11.030858,58451.60672,-3.700953,-2.767803,2020
60,11037.294040,5.120619,6.023813,12402.490790,9.357176,10.398249,59339.68435,2.105711,2.236212,8621.730590,...,13341.556340,10.512880,11.353496,45334.15700,7.685657,7.597471,61829.84563,5.779548,5.945485,2021


In [52]:
wb_gdp_to_predict

Unnamed: 0,WLD_gdp_per_capita_constant_us$,WLD_gdp_per_capita_%_growth,WLD_gdp_%_growth,ARG_gdp_per_capita_constant_us$,ARG_gdp_per_capita_%_growth,ARG_gdp_%_growth,AUS_gdp_per_capita_constant_us$,AUS_gdp_per_capita_%_growth,AUS_gdp_%_growth,BRA_gdp_per_capita_constant_us$,...,TUR_gdp_per_capita_constant_us$,TUR_gdp_per_capita_%_growth,TUR_gdp_%_growth,GBR_gdp_per_capita_constant_us$,GBR_gdp_per_capita_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_constant_us$,USA_gdp_per_capita_%_growth,USA_gdp_%_growth,time
59,10499.64711,-4.042395,-3.072225,11341.26837,-10.812611,-9.943235,58115.93067,-1.278358,-0.050885,8255.567064,...,12072.39944,1.147944,1.940032,42098.60293,-11.355676,-11.030858,58451.60672,-3.700953,-2.767803,2020
60,11037.29404,5.120619,6.023813,12402.49079,9.357176,10.398249,59339.68435,2.105711,2.236212,8621.73059,...,13341.55634,10.51288,11.353496,45334.157,7.685657,7.597471,61829.84563,5.779548,5.945485,2021
61,11287.1485,2.263729,3.080322,12932.46953,4.273164,5.243044,60797.95865,2.457503,3.617589,8831.126713,...,13990.74687,4.865928,5.56586,47232.27181,4.186942,4.101621,62866.71439,1.676971,2.061593,2022
62,11443.973299,1.912327,3.370353,12894.976521,1.30623,2.616845,61531.376965,1.97341,3.453485,8878.670912,...,14520.695582,2.946106,4.835036,47621.031746,2.0406,2.388049,63588.89078,1.937953,2.783232,2023
63,11602.010641,1.866738,3.437995,12859.314127,1.034007,2.370431,62267.007641,1.880169,3.405138,8925.260331,...,15073.439631,2.957,4.839809,48006.381258,1.92502,2.317215,64311.296217,1.977023,2.938613,2024


In [53]:
# Label the years

wb_gdp ['time'] = wb_gdp ['time'].str.replace('YR', '')

pre_recession_years = ['1974', '1981','1990', '2008' ]
crisis_years = ['1975', '1982', '1991', '2009', '2020']
recovery_years = ['1976', '1977', '1983', '1984', '1992', '1993', '2010', '2011']
downturn_years = ['1958', '1998', '2001', '2012']

labels = []
for i in range(len(wb_gdp)):
    if wb_gdp['time'][i] in pre_recession_years:
        labels.append('pre_recession')
    
    elif wb_gdp ['time'][i] in crisis_years:
        labels.append('recession')

    elif wb_gdp['time'][i] in recovery_years:
        labels.append('recovery')

    elif wb_gdp ['time'][i] in downturn_years:
        labels.append('downturn')
        
    else:
        labels.append('expansion')

wb_gdp['year_label'] = labels

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wb_gdp ['time'] = wb_gdp ['time'].str.replace('YR', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wb_gdp['year_label'] = labels


In [55]:
X = wb_gdp.drop(['time', 'year_label'],axis=1)

Y = wb_gdp['year_label']

encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = pd.DataFrame(encoder.transform(Y))
X_train, X_test, y_train, y_test = train_test_split(X, encoded_Y, test_size=0.3, shuffle=True)

scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X.shape)

(62, 60)


In [56]:
# Initialize the model
model = Sequential()

# Add layers to the model
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))  # Use the shape of the training data for input shape
model.add(Dense(32, activation='relu'))
model.add(Dense(5, activation='softmax'))  # Softmax for multiclass classification

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train,
    to_categorical(y_train),
    epochs=20,
    batch_size=12
)

# Evaluate the model
evaluation = model.evaluate(X_test, to_categorical(y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [57]:
predictions = model.predict(X_test)
print(np.argmax(predictions, axis=1))

predictions

[1 1 1 2 1 1 1 1 1 3 1 1 1 1 4 1 1 1 1]


array([[1.09932154e-01, 7.84246087e-01, 1.81435030e-02, 6.60199746e-02,
        2.16582920e-02],
       [7.94254392e-02, 7.71988928e-01, 3.04319952e-02, 2.24464647e-02,
        9.57071409e-02],
       [8.05252173e-04, 8.27541113e-01, 8.73545092e-03, 9.38391531e-05,
        1.62824348e-01],
       [6.89342245e-02, 1.12591185e-01, 5.92465460e-01, 9.17273909e-02,
        1.34281754e-01],
       [1.06512338e-01, 6.28693163e-01, 6.60878867e-02, 1.40491590e-01,
        5.82150221e-02],
       [1.20776273e-01, 7.94539154e-01, 1.77664310e-02, 2.23571584e-02,
        4.45609353e-02],
       [7.42822047e-03, 6.91080451e-01, 7.24207088e-02, 3.29717877e-03,
        2.25773469e-01],
       [1.71880156e-03, 9.73036826e-01, 7.11360574e-03, 5.62313828e-04,
        1.75683629e-02],
       [1.11841954e-01, 5.70273340e-01, 1.54723957e-01, 6.29113019e-02,
        1.00249380e-01],
       [2.50637978e-01, 2.12064028e-01, 1.55117854e-01, 3.23863506e-01,
        5.83166592e-02],
       [1.37302518e-01, 3.5969

Again, it can only predict mostly the expansion years

In [58]:
columns_percentage = ['time', 'year_label']

for column in wb_gdp.columns:
    if '%' in column:
        columns_percentage.append(column)

        
columns_percentage

['time',
 'year_label',
 'WLD_gdp_per_capita_%_growth',
 'WLD_gdp_%_growth',
 'ARG_gdp_per_capita_%_growth',
 'ARG_gdp_%_growth',
 'AUS_gdp_per_capita_%_growth',
 'AUS_gdp_%_growth',
 'BRA_gdp_per_capita_%_growth',
 'BRA_gdp_%_growth',
 'CAN_gdp_per_capita_%_growth',
 'CAN_gdp_%_growth',
 'CHN_gdp_per_capita_%_growth',
 'CHN_gdp_%_growth',
 'FRA_gdp_per_capita_%_growth',
 'FRA_gdp_%_growth',
 'DEU_gdp_per_capita_%_growth',
 'DEU_gdp_%_growth',
 'IND_gdp_per_capita_%_growth',
 'IND_gdp_%_growth',
 'IDN_gdp_per_capita_%_growth',
 'IDN_gdp_%_growth',
 'ITA_gdp_per_capita_%_growth',
 'ITA_gdp_%_growth',
 'JPN_gdp_per_capita_%_growth',
 'JPN_gdp_%_growth',
 'MEX_gdp_per_capita_%_growth',
 'MEX_gdp_%_growth',
 'RUS_gdp_per_capita_%_growth',
 'RUS_gdp_%_growth',
 'SAU_gdp_per_capita_%_growth',
 'SAU_gdp_%_growth',
 'ZAF_gdp_per_capita_%_growth',
 'ZAF_gdp_%_growth',
 'KOR_gdp_per_capita_%_growth',
 'KOR_gdp_%_growth',
 'TUR_gdp_per_capita_%_growth',
 'TUR_gdp_%_growth',
 'GBR_gdp_per_capita_%

In [59]:
gdp_percentage = wb_gdp[columns_percentage]
gdp_percentage

Unnamed: 0,time,year_label,WLD_gdp_per_capita_%_growth,WLD_gdp_%_growth,ARG_gdp_per_capita_%_growth,ARG_gdp_%_growth,AUS_gdp_per_capita_%_growth,AUS_gdp_%_growth,BRA_gdp_per_capita_%_growth,BRA_gdp_%_growth,...,ZAF_gdp_per_capita_%_growth,ZAF_gdp_%_growth,KOR_gdp_per_capita_%_growth,KOR_gdp_%_growth,TUR_gdp_per_capita_%_growth,TUR_gdp_%_growth,GBR_gdp_per_capita_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_%_growth,USA_gdp_%_growth
0,1961,expansion,2.406823,3.790081,3.740903,5.427843,0.463671,2.482656,5.374304,8.600000,...,0.977924,3.844734,3.809555,6.935993,-1.507613,1.156069,1.899262,2.677119,0.618121,2.300000
1,1962,expansion,3.481678,5.314851,-2.454333,-0.852022,-1.147700,1.294611,3.482729,6.600000,...,3.061899,6.177931,0.966642,3.895273,2.740139,5.571429,0.248519,1.102910,4.480669,6.100000
2,1963,expansion,2.991508,5.184483,-6.831393,-5.308197,4.198486,6.216107,-2.315872,0.600000,...,4.165498,7.373709,6.026426,9.020568,6.162186,9.066306,4.092469,4.874384,2.908272,4.400000
3,1964,expansion,4.367685,6.557806,8.382751,10.130298,4.901197,6.980061,0.449644,3.400000,...,4.685237,7.939609,6.647770,9.473825,2.754979,5.459057,4.849645,5.533659,4.340549,5.800000
4,1965,expansion,3.399931,5.549061,8.838850,10.569433,3.923740,5.980438,-0.456900,2.400000,...,2.892205,6.122798,4.624622,7.318434,0.327167,2.823530,1.488048,2.142177,5.078098,6.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,2018,expansion,2.161637,3.288050,-3.601610,-2.617396,1.348283,2.883045,0.978938,1.783667,...,0.285736,1.522329,2.462246,2.907404,2.085158,2.979885,1.090624,1.705021,2.404868,2.945385
58,2019,expansion,1.522509,2.594550,-2.969562,-2.000861,0.665151,2.171396,0.448280,1.220778,...,-0.987175,0.303453,1.888915,2.243978,-0.028123,0.783610,1.032741,1.604309,1.829668,2.294439
59,2020,recession,-4.042395,-3.072225,-10.812611,-9.943235,-1.278358,-0.050885,-3.918004,-3.276759,...,-7.481093,-6.342471,-0.846212,-0.709415,1.147944,1.940032,-11.355676,-11.030858,-3.700953,-2.767803
60,2021,expansion,5.120619,6.023813,9.357176,10.398249,2.105711,2.236212,4.435353,4.988850,...,3.870315,4.913097,4.329207,4.145324,10.512880,11.353496,7.685657,7.597471,5.779548,5.945485


In [61]:
X = gdp_percentage.drop(['time', 'year_label'], axis=1)
Y = gdp_percentage['year_label']

encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = pd.DataFrame(encoder.transform(Y))

# Setting shuffle to True
X_train, X_test, y_train, y_test = train_test_split(X, encoded_Y, test_size=0.35, shuffle=True)

scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X.shape)

(62, 40)


In [62]:
# Initialize the model
model = Sequential()

# Add layers to the model
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))  # Use the shape of the training data for input shape
model.add(Dense(128, activation='relu'))
model.add(Dense(5, activation='softmax'))  # Softmax for multiclass classification

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train,
    to_categorical(y_train),
    epochs=20,
    batch_size=6
)

# Evaluate the model
evaluation = model.evaluate(X_test, to_categorical(y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [63]:
predictions = model.predict(X_test)
print(np.argmax(predictions, axis=1))

predictions

[0 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 3 1]


array([[4.56403911e-01, 1.55938193e-01, 1.31527215e-01, 1.93145543e-01,
        6.29851222e-02],
       [8.62709479e-04, 9.90636766e-01, 1.04307442e-03, 3.03722220e-04,
        7.15367822e-03],
       [8.25757310e-02, 6.47515059e-01, 7.04010017e-03, 3.25608104e-02,
        2.30308324e-01],
       [4.63934839e-02, 4.24570948e-01, 1.67800829e-01, 1.58974245e-01,
        2.02260509e-01],
       [1.17904937e-03, 9.92462993e-01, 2.79898057e-03, 3.99113633e-04,
        3.15986341e-03],
       [1.09234662e-03, 9.19591129e-01, 1.92064885e-03, 1.98757602e-03,
        7.54082203e-02],
       [9.73951630e-03, 6.08289540e-01, 6.93899486e-03, 1.06827188e-02,
        3.64349186e-01],
       [3.26761277e-04, 9.98232186e-01, 3.88967310e-04, 7.06729406e-05,
        9.81420861e-04],
       [4.11237702e-02, 9.06250000e-01, 2.44246274e-02, 8.87712650e-03,
        1.93244331e-02],
       [2.25298731e-08, 9.99999523e-01, 8.84063045e-09, 7.36793349e-10,
        4.80900269e-07],
       [4.90464754e-06, 9.9998

In [64]:
y_test

Unnamed: 0,0
41,1
39,1
24,1
13,2
46,1
49,4
26,1
43,1
28,1
60,1


In [65]:
# Removing the columns in dollar seems to have improved the predictions
# Let's upsample again and downsample again


category_expansion = gdp_percentage[gdp_percentage['year_label']=='expansion']
category_pre_recession = gdp_percentage[gdp_percentage['year_label']=='pre_recession']
category_recession = gdp_percentage[gdp_percentage['year_label']=='recession']
category_recovery = gdp_percentage[gdp_percentage['year_label']=='recovery']
category_downturn = gdp_percentage[gdp_percentage['year_label']=='downturn']

category_pre_recession_oversampled = resample(category_pre_recession,replace=True, n_samples = len(category_expansion))
category_recession_oversampled = resample(category_recession,replace=True, n_samples = len(category_expansion))
category_recovery_oversampled = resample(category_recovery,replace=True, n_samples = len(category_expansion))
category_downturn_oversampled = resample(category_downturn,replace=True, n_samples = len(category_expansion))

gdp_percentage_upsampled = pd.concat([category_expansion,
                                      category_pre_recession,
                                      category_recession_oversampled,
                                      category_recovery_oversampled,
                                      category_downturn_oversampled], axis=0)

gdp_percentage_upsampled.shape

(172, 42)

In [67]:
X = gdp_percentage_upsampled.drop(['time', 'year_label'], axis=1)
Y = gdp_percentage_upsampled['year_label']

encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = pd.DataFrame(encoder.transform(Y))

X_train, X_test, y_train, y_test = train_test_split(X, encoded_Y, test_size=0.35, shuffle=True)

scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(X.shape)

(172, 40)


In [68]:
# 1: Expansion, 2: pre_recession , 3:recession, 4:recovery , 0: downturn

In [70]:
# Initialize the model
model = Sequential()

# Add layers to the model
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))  # Use the shape of the training data for input shape
model.add(Dense(128, activation='relu'))
model.add(Dense(5, activation='softmax'))  # Softmax for multiclass classification

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train,
    to_categorical(y_train),
    epochs=20,
    batch_size=6
)

# Evaluate the model
evaluation = model.evaluate(X_test, to_categorical(y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [71]:
predictions = model.predict(X_test)
print(np.argmax(predictions, axis=1))

[3 4 1 1 0 4 0 1 0 4 3 1 3 3 0 4 0 4 3 1 4 1 4 1 0 1 0 4 3 1 4 0 4 4 4 3 1
 3 4 4 3 3 4 3 3 4 3 4 4 1 3 0 0 1 4 0 4 1 1 0 1]


In [72]:
predictions_array =np.argmax(predictions, axis=1)
y_test_df = pd.DataFrame(y_test).reset_index(drop=True)
y_test_df.columns= ['y_test']
predictions_df = pd.DataFrame(predictions_array)
predictions_df.columns= ['predicted_y']
predictions_df = pd.concat([predictions_df,y_test_df], axis=1)
pd.set_option('display.max_rows',None)
predictions_df

Unnamed: 0,predicted_y,y_test
0,3,3
1,4,4
2,1,1
3,1,1
4,0,0
5,4,4
6,0,0
7,1,1
8,0,0
9,4,4


This model has a much better accuracy! Will be used for predictions

In [73]:
model_upsampled = model 
scaler_upsampled  = scaler

filename = 'model_gdp_upsampled.sav'
pickle.dump(model, open(filename, 'wb'))

filename= 'scaler_gdp_upsample.sav'
pickle.dump(scaler, open(filename, 'wb'))

INFO:tensorflow:Assets written to: C:\Users\menno\AppData\Local\Temp\tmp93zdbci1\assets


INFO:tensorflow:Assets written to: C:\Users\menno\AppData\Local\Temp\tmp93zdbci1\assets


In [74]:
gdp_percentage['year_label'].value_counts()

expansion        42
recovery          8
recession         5
pre_recession     4
downturn          3
Name: year_label, dtype: int64

In [75]:
category_expansion = gdp_percentage[gdp_percentage['year_label']=='expansion']
other_categories = gdp_percentage[gdp_percentage['year_label']!='expansion']
category_category_expansion_undersampled = resample(category_expansion,replace=True, n_samples = 8)

gdp_percentage_undersampled = pd.concat([other_categories, category_category_expansion_undersampled], axis=0)

gdp_percentage_undersampled.shape

(28, 42)

In [77]:
# Initialize the model
model = Sequential()

# Add layers to the model
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))  # Use the shape of the training data for input shape
model.add(Dense(128, activation='relu'))
model.add(Dense(5, activation='softmax'))  # Softmax for multiclass classification

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train,
    to_categorical(y_train),
    epochs=20,
    batch_size=6
)

# Evaluate the model
evaluation = model.evaluate(X_test, to_categorical(y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [78]:
# Initialize the model
model = Sequential()

# Add layers to the model
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))  # Use the shape of the training data for input shape
model.add(Dense(128, activation='relu'))
model.add(Dense(5, activation='softmax'))  # Softmax for multiclass classification

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train,
    to_categorical(y_train),
    epochs=20,
    batch_size=6
)

# Evaluate the model
evaluation = model.evaluate(X_test, to_categorical(y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [79]:
predictions = model.predict(X_test)
print(np.argmax(predictions, axis=1))

[3 4 1 0 0 4 0 1 0 4 3 1 3 3 0 4 0 4 3 1 4 1 4 1 0 1 0 4 3 1 4 0 4 4 4 3 1
 3 4 4 3 3 4 3 3 4 3 4 4 1 3 0 0 1 4 0 4 1 1 0 1]


In [80]:
y_test

Unnamed: 0,0
82,3
88,4
28,1
27,1
158,0
95,4
168,0
8,1
170,0
117,4


In [81]:
y_train

Unnamed: 0,0
118,4
105,4
156,0
32,1
45,2
43,2
152,0
20,1
137,0
116,4


In [82]:
wb_gdp_to_predict

Unnamed: 0,WLD_gdp_per_capita_constant_us$,WLD_gdp_per_capita_%_growth,WLD_gdp_%_growth,ARG_gdp_per_capita_constant_us$,ARG_gdp_per_capita_%_growth,ARG_gdp_%_growth,AUS_gdp_per_capita_constant_us$,AUS_gdp_per_capita_%_growth,AUS_gdp_%_growth,BRA_gdp_per_capita_constant_us$,...,TUR_gdp_per_capita_constant_us$,TUR_gdp_per_capita_%_growth,TUR_gdp_%_growth,GBR_gdp_per_capita_constant_us$,GBR_gdp_per_capita_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_constant_us$,USA_gdp_per_capita_%_growth,USA_gdp_%_growth,time
59,10499.64711,-4.042395,-3.072225,11341.26837,-10.812611,-9.943235,58115.93067,-1.278358,-0.050885,8255.567064,...,12072.39944,1.147944,1.940032,42098.60293,-11.355676,-11.030858,58451.60672,-3.700953,-2.767803,2020
60,11037.29404,5.120619,6.023813,12402.49079,9.357176,10.398249,59339.68435,2.105711,2.236212,8621.73059,...,13341.55634,10.51288,11.353496,45334.157,7.685657,7.597471,61829.84563,5.779548,5.945485,2021
61,11287.1485,2.263729,3.080322,12932.46953,4.273164,5.243044,60797.95865,2.457503,3.617589,8831.126713,...,13990.74687,4.865928,5.56586,47232.27181,4.186942,4.101621,62866.71439,1.676971,2.061593,2022
62,11443.973299,1.912327,3.370353,12894.976521,1.30623,2.616845,61531.376965,1.97341,3.453485,8878.670912,...,14520.695582,2.946106,4.835036,47621.031746,2.0406,2.388049,63588.89078,1.937953,2.783232,2023
63,11602.010641,1.866738,3.437995,12859.314127,1.034007,2.370431,62267.007641,1.880169,3.405138,8925.260331,...,15073.439631,2.957,4.839809,48006.381258,1.92502,2.317215,64311.296217,1.977023,2.938613,2024


In [83]:
X = wb_gdp_to_predict.drop('time', axis=1)
X = wb_gdp_to_predict.drop([column for column in wb_gdp_to_predict.columns if '%' not in column], axis=1)
X

Unnamed: 0,WLD_gdp_per_capita_%_growth,WLD_gdp_%_growth,ARG_gdp_per_capita_%_growth,ARG_gdp_%_growth,AUS_gdp_per_capita_%_growth,AUS_gdp_%_growth,BRA_gdp_per_capita_%_growth,BRA_gdp_%_growth,CAN_gdp_per_capita_%_growth,CAN_gdp_%_growth,...,ZAF_gdp_per_capita_%_growth,ZAF_gdp_%_growth,KOR_gdp_per_capita_%_growth,KOR_gdp_%_growth,TUR_gdp_per_capita_%_growth,TUR_gdp_%_growth,GBR_gdp_per_capita_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_%_growth,USA_gdp_%_growth
59,-4.042395,-3.072225,-10.812611,-9.943235,-1.278358,-0.050885,-3.918004,-3.276759,-6.088212,-5.074358,...,-7.481093,-6.342471,-0.846212,-0.709415,1.147944,1.940032,-11.355676,-11.030858,-3.700953,-2.767803
60,5.120619,6.023813,9.357176,10.398249,2.105711,2.236212,4.435353,4.98885,4.409275,5.011801,...,3.870315,4.913097,4.329207,4.145324,10.51288,11.353496,7.685657,7.597471,5.779548,5.945485
61,2.263729,3.080322,4.273164,5.243044,2.457503,3.617589,2.428702,2.900531,1.52799,3.396202,...,1.187663,2.042299,2.792482,2.560537,4.865928,5.56586,4.186942,4.101621,1.676971,2.061593
62,1.912327,3.370353,1.30623,2.616845,1.97341,3.453485,2.177225,3.299039,1.586507,3.01687,...,0.896477,2.481249,4.739591,4.921079,2.946106,4.835036,2.0406,2.388049,1.937953,2.783232
63,1.866738,3.437995,1.034007,2.370431,1.880169,3.405138,2.067424,3.500259,1.600916,2.897294,...,0.780184,2.659726,5.45842,6.040775,2.957,4.839809,1.92502,2.317215,1.977023,2.938613


In [84]:
# Loading the model

filename = 'model_gdp_upsampled.sav'
loaded_model  = pickle.load(open(filename, 'rb'))

filename= 'scaler_gdp_upsample.sav'
loaded_scaler = pickle.load(open(filename, 'rb'))

X_scaled = loaded_scaler.transform(X)

predictions = loaded_model.predict(X_scaled)
print(np.argmax(predictions, axis=1))

[3 1 1 1 1]


In [None]:
# 1: Expansion, 2: pre_recession , 3:recession, 4:recovery , 0: downturn

# Conclusion:

#### This model has predicted expansion for 2023 and 2024

# World Bank: data from 1981

In [85]:
pd.set_option('display.max_rows', 10)
wb_1981 = pd.read_csv(r'C:\Users\menno\IH-Labs\IH-final-project\01_CSV files\wb_from_1981_predictions.csv')
wb_1981

Unnamed: 0,time,WLD_gdp_per_capita_constant_us$,WLD_gdp_per_capita_%_growth,WLD_consumer_price_index_%,WLD_unemployment_%_of_total_labor_force,WLD_government_expense_%_gdp,WLD_industry_value_added_us$,WLD_gdp_%_growth,ARG_gdp_per_capita_constant_us$,ARG_gdp_per_capita_%_growth,...,USA_consumer_price_index_%,USA_unemployment_%_of_total_labor_force,USA_government_debt_total_local_currency,USA_government_debt_total_%_of_gdp,USA_government_expense_local_currency,USA_government_expense_%_gdp,USA_current_account_balance_us$,USA_industry_value_added_us$,USA_net_trade_goods_services_us$,USA_gdp_%_growth
0,YR1981,5952.588435,0.180175,12.442437,5.996166,26.291669,26.291669,1.952000,9585.206772,-6.676587,...,10.334715,5.837531,1.011451e+13,67.911073,6.736300e+11,21.004721,4.810000e+09,21.004721,-1.568000e+10,2.537719
1,YR1982,5864.556722,-1.478881,10.221727,5.996166,26.291669,26.291669,0.303379,9366.170380,-2.285150,...,6.131427,5.837531,1.011451e+13,67.911073,7.511200e+11,22.463140,-1.160700e+10,22.463140,-2.353700e+10,-1.802887
2,YR1983,5912.911415,0.824524,8.669272,5.996166,26.291669,26.291669,2.624594,9622.347319,2.735130,...,3.212435,5.837531,1.011451e+13,67.911073,8.286600e+11,22.802733,-4.422200e+10,22.802733,-5.713500e+10,4.583913
3,YR1984,6085.186623,2.913543,8.080320,5.996166,26.291669,26.291669,4.709696,9624.404556,0.021380,...,4.300535,5.837531,1.011451e+13,67.911073,8.668900e+11,21.470359,-9.900800e+10,21.470359,-1.082770e+11,7.236633
4,YR1985,6203.021121,1.936416,6.807567,5.996166,26.291669,26.291669,3.720727,8987.933706,-6.613093,...,3.545644,5.837531,1.011451e+13,67.911073,9.621600e+11,22.174802,-1.244550e+11,22.174802,-1.211020e+11,4.169656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,YR2020,10499.647110,-4.042395,1.928955,6.898273,33.696136,33.696136,-3.072225,11341.268370,-10.812611,...,1.233584,8.050000,2.658520e+13,126.232494,6.780400e+12,32.194915,-6.197020e+11,32.194915,-6.539900e+11,-2.767803
40,YR2021,11037.294040,5.120619,3.475403,6.201871,32.167331,32.167331,6.023813,12402.490790,9.357176,...,4.697859,5.350000,2.806530e+13,120.374017,7.310240e+12,31.354124,-8.463540e+11,31.354124,-8.450500e+11,5.945485
41,YR2022,11287.148500,2.263729,8.271804,5.772710,26.291669,26.291669,3.080322,12932.469530,4.273164,...,8.002800,3.611000,1.011451e+13,67.911073,2.542650e+12,22.077180,-9.438000e+11,22.077180,-9.453230e+11,2.061593
42,YR2023,11465.810221,1.560105,7.363968,5.869994,26.291669,26.291669,2.988789,12911.282795,1.564507,...,5.363330,4.254237,1.011451e+13,67.911073,2.595302e+12,22.115447,-9.629594e+11,22.115447,-9.675857e+11,2.557817


In [86]:
wb_1981 ['time'] = wb_1981 ['time'].str.replace('YR', '')
percentage_columns_1981 = ['time']
for column in wb_1981.columns:
    if "%" in column:
        percentage_columns_1981.append(column)

wb_1981  = wb_1981[percentage_columns_1981]
wb_1981_to_predict = wb_1981[-5:]
wb_1981 = wb_1981[:-2]

In [87]:
wb_1981

Unnamed: 0,time,WLD_gdp_per_capita_%_growth,WLD_consumer_price_index_%,WLD_unemployment_%_of_total_labor_force,WLD_government_expense_%_gdp,WLD_gdp_%_growth,ARG_gdp_per_capita_%_growth,ARG_unemployment_%_of_total_labor_force,ARG_industry_value_added_%_growth,ARG_gdp_%_growth,...,GBR_government_debt_total_%_of_gdp,GBR_government_expense_%_gdp,GBR_industry_value_added_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_%_growth,USA_consumer_price_index_%,USA_unemployment_%_of_total_labor_force,USA_government_debt_total_%_of_gdp,USA_government_expense_%_gdp,USA_gdp_%_growth
0,1981,0.180175,12.442437,5.996166,26.291669,1.952000,-6.676587,10.770125,-10.663334,-5.189789,...,107.078720,37.484438,0.784630,-0.787744,1.536320,10.334715,5.837531,67.911073,21.004721,2.537719
1,1982,-1.478881,10.221727,5.996166,26.291669,0.303379,-2.285150,10.770125,-3.209381,-0.735659,...,107.078720,37.671549,0.784630,1.994891,-2.734570,6.131427,5.837531,67.911073,22.463140,-1.802887
2,1983,0.824524,8.669272,5.996166,26.291669,2.624594,2.735130,10.770125,5.377688,4.349093,...,107.078720,36.881620,0.784630,4.221856,3.631979,3.212435,5.837531,67.911073,22.802733,4.583913
3,1984,2.913543,8.080320,5.996166,26.291669,4.709696,0.021380,10.770125,0.674286,1.570739,...,107.078720,36.819563,0.784630,2.269105,6.312168,4.300535,5.837531,67.911073,21.470359,7.236633
4,1985,1.936416,6.807567,5.996166,26.291669,3.720727,-6.613093,10.770125,-8.651012,-5.189024,...,107.078720,35.932758,0.784630,4.147415,3.250656,3.545644,5.837531,67.911073,22.174802,4.169656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37,2018,2.161637,2.438737,5.699076,26.301876,3.288050,-3.601610,9.220000,-3.049315,-2.617396,...,157.803663,37.428398,1.794476,1.705021,2.404868,2.442583,3.900000,99.060976,22.320573,2.945385
38,2019,1.522509,2.206073,5.543161,26.944593,2.594550,-2.969562,9.840000,-4.740755,-2.000861,...,160.020247,36.611912,2.089229,1.604309,1.829668,1.812210,3.670000,100.805042,22.726136,2.294439
39,2020,-4.042395,1.928955,6.898273,33.696136,-3.072225,-10.812611,11.460000,-9.284819,-9.943235,...,195.392289,47.557208,-3.141194,-11.030858,-3.700953,1.233584,8.050000,126.232494,32.194915,-2.767803
40,2021,5.120619,3.475403,6.201871,32.167331,6.023813,9.357176,8.740000,15.264071,10.398249,...,186.481994,43.961668,9.287127,7.597471,5.779548,4.697859,5.350000,120.374017,31.354124,5.945485


In [88]:
wb_1981_to_predict

Unnamed: 0,time,WLD_gdp_per_capita_%_growth,WLD_consumer_price_index_%,WLD_unemployment_%_of_total_labor_force,WLD_government_expense_%_gdp,WLD_gdp_%_growth,ARG_gdp_per_capita_%_growth,ARG_unemployment_%_of_total_labor_force,ARG_industry_value_added_%_growth,ARG_gdp_%_growth,...,GBR_government_debt_total_%_of_gdp,GBR_government_expense_%_gdp,GBR_industry_value_added_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_%_growth,USA_consumer_price_index_%,USA_unemployment_%_of_total_labor_force,USA_government_debt_total_%_of_gdp,USA_government_expense_%_gdp,USA_gdp_%_growth
39,2020,-4.042395,1.928955,6.898273,33.696136,-3.072225,-10.812611,11.46,-9.284819,-9.943235,...,195.392289,47.557208,-3.141194,-11.030858,-3.700953,1.233584,8.05,126.232494,32.194915,-2.767803
40,2021,5.120619,3.475403,6.201871,32.167331,6.023813,9.357176,8.74,15.264071,10.398249,...,186.481994,43.961668,9.287127,7.597471,5.779548,4.697859,5.35,120.374017,31.354124,5.945485
41,2022,2.263729,8.271804,5.77271,26.291669,3.080322,4.273164,6.491,5.958342,5.243044,...,107.07872,37.3784,-0.475587,4.101621,1.676971,8.0028,3.611,67.911073,22.07718,2.061593
42,2023,1.560105,7.363968,5.869994,26.291669,2.988789,1.564507,7.104069,2.345115,2.716573,...,107.07872,37.386082,0.738147,2.104794,1.705986,5.36333,4.254237,67.911073,22.115447,2.557817
43,2024,1.584147,6.701702,5.923616,26.291669,2.992479,1.068194,7.616798,1.756325,2.254729,...,107.07872,37.391986,0.781849,2.148387,1.708398,4.075682,4.700754,67.911073,22.14132,2.625491


In [89]:
# Label the years

pre_crisis_years = ['1974', '1981', '1990', '2008','2019']
crisis_years = ['1975', '1982', '1991', '2009', '2020']
recovery_years = ['1976', '1977', '1983', '1984', '1992', '1993', '2010', '2011']
downturn_years = ['1958', '1998', '2001', '2012']

labels = []

for i in range(len(wb_1981)):
    
    if wb_1981['time'][i] in pre_crisis_years:
        labels.append('pre_recession')
        
    elif wb_1981['time'][i] in crisis_years:
        labels.append('recession')

    elif wb_1981['time'][i] in recovery_years:
        labels.append('recovery')

    elif wb_1981['time'][i] in downturn_years:
        labels.append('downturn')
        
    else:
        labels.append('expansion')

wb_1981['year_label']= labels
wb_1981

Unnamed: 0,time,WLD_gdp_per_capita_%_growth,WLD_consumer_price_index_%,WLD_unemployment_%_of_total_labor_force,WLD_government_expense_%_gdp,WLD_gdp_%_growth,ARG_gdp_per_capita_%_growth,ARG_unemployment_%_of_total_labor_force,ARG_industry_value_added_%_growth,ARG_gdp_%_growth,...,GBR_government_expense_%_gdp,GBR_industry_value_added_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_%_growth,USA_consumer_price_index_%,USA_unemployment_%_of_total_labor_force,USA_government_debt_total_%_of_gdp,USA_government_expense_%_gdp,USA_gdp_%_growth,year_label
0,1981,0.180175,12.442437,5.996166,26.291669,1.952000,-6.676587,10.770125,-10.663334,-5.189789,...,37.484438,0.784630,-0.787744,1.536320,10.334715,5.837531,67.911073,21.004721,2.537719,pre_recession
1,1982,-1.478881,10.221727,5.996166,26.291669,0.303379,-2.285150,10.770125,-3.209381,-0.735659,...,37.671549,0.784630,1.994891,-2.734570,6.131427,5.837531,67.911073,22.463140,-1.802887,recession
2,1983,0.824524,8.669272,5.996166,26.291669,2.624594,2.735130,10.770125,5.377688,4.349093,...,36.881620,0.784630,4.221856,3.631979,3.212435,5.837531,67.911073,22.802733,4.583913,recovery
3,1984,2.913543,8.080320,5.996166,26.291669,4.709696,0.021380,10.770125,0.674286,1.570739,...,36.819563,0.784630,2.269105,6.312168,4.300535,5.837531,67.911073,21.470359,7.236633,recovery
4,1985,1.936416,6.807567,5.996166,26.291669,3.720727,-6.613093,10.770125,-8.651012,-5.189024,...,35.932758,0.784630,4.147415,3.250656,3.545644,5.837531,67.911073,22.174802,4.169656,expansion
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37,2018,2.161637,2.438737,5.699076,26.301876,3.288050,-3.601610,9.220000,-3.049315,-2.617396,...,37.428398,1.794476,1.705021,2.404868,2.442583,3.900000,99.060976,22.320573,2.945385,expansion
38,2019,1.522509,2.206073,5.543161,26.944593,2.594550,-2.969562,9.840000,-4.740755,-2.000861,...,36.611912,2.089229,1.604309,1.829668,1.812210,3.670000,100.805042,22.726136,2.294439,pre_recession
39,2020,-4.042395,1.928955,6.898273,33.696136,-3.072225,-10.812611,11.460000,-9.284819,-9.943235,...,47.557208,-3.141194,-11.030858,-3.700953,1.233584,8.050000,126.232494,32.194915,-2.767803,recession
40,2021,5.120619,3.475403,6.201871,32.167331,6.023813,9.357176,8.740000,15.264071,10.398249,...,43.961668,9.287127,7.597471,5.779548,4.697859,5.350000,120.374017,31.354124,5.945485,expansion


In [91]:
# Separate the data by categories
category_expansion = wb_1981[wb_1981['year_label'] == 'expansion']
category_pre_recession = wb_1981[wb_1981['year_label'] == 'pre_recession']
category_recession = wb_1981[wb_1981['year_label'] == 'recession']
category_recovery = wb_1981[wb_1981['year_label'] == 'recovery']
category_downturn = wb_1981[wb_1981['year_label'] == 'downturn']

# Oversample the minority classes to match the majority class
category_pre_recession_oversampled = resample(category_pre_recession, replace=True, n_samples=len(category_expansion))
category_recession_oversampled = resample(category_recession, replace=True, n_samples=len(category_expansion))
category_recovery_oversampled = resample(category_recovery, replace=True, n_samples=len(category_expansion))
category_downturn_oversampled = resample(category_downturn, replace=True, n_samples=len(category_expansion))

# Combine the original and oversampled data
wb_1981_upsampled = pd.concat([
    category_expansion,
    category_pre_recession_oversampled,
    category_recession_oversampled,
    category_recovery_oversampled,
    category_downturn_oversampled
], axis=0)

# Split the data into features and target variables
X = wb_1981_upsampled.drop(['time', 'year_label'], axis=1)
Y = wb_1981_upsampled['year_label']

# Encode the target variable
encoder = LabelEncoder()
encoded_Y = pd.DataFrame(encoder.fit_transform(Y))

# Split the data into training and test sets (with shuffling)
X_train, X_test, y_train, y_test = train_test_split(X, encoded_Y, test_size=0.35, shuffle=True)

# Standardize the features
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Print the shape of the feature matrix
print(X.shape)

(125, 109)


In [92]:
encoded_Y

Unnamed: 0,0
0,1
1,1
2,1
3,1
4,1
...,...
120,0
121,0
122,0
123,0


In [93]:
# 1: Expansion, 2: pre_recession , 3:recession, 4:recovery , 0: downturn

In [94]:
# Initialize the model
model = Sequential()

# Add layers to the model
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))  # Use the shape of the training data for input shape
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(5, activation='softmax'))  # Softmax for multiclass classification

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train,
    to_categorical(y_train),
    epochs=50,
    batch_size=6
)

# Evaluate the model
evaluation = model.evaluate(X_test, to_categorical(y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [95]:
predictions = model.predict(X_test)
print(np.argmax(predictions, axis=1))

[0 1 2 0 1 2 4 4 4 2 0 3 3 4 4 1 4 4 3 3 4 2 0 2 0 0 2 2 3 2 4 3 0 4 4 2 1
 4 0 0 0 2 3 3]


In [96]:
y_test

Unnamed: 0,0
102,0
23,1
29,2
115,0
21,1
...,...
113,0
118,0
41,2
71,3


In [97]:
model_1981_upsampled = model 
scaler_1981_upsampled  = scaler

filename = 'model_1981_upsampled.sav'
pickle.dump(model_1981_upsampled, open(filename, 'wb'))

filename= 'scaler_1981_upsampled.sav'
pickle.dump(scaler_1981_upsampled, open(filename, 'wb'))


INFO:tensorflow:Assets written to: C:\Users\menno\AppData\Local\Temp\tmp9ir8k3wz\assets


INFO:tensorflow:Assets written to: C:\Users\menno\AppData\Local\Temp\tmp9ir8k3wz\assets


In [98]:
X = wb_1981_to_predict.drop(['time'], axis=1)
X

Unnamed: 0,WLD_gdp_per_capita_%_growth,WLD_consumer_price_index_%,WLD_unemployment_%_of_total_labor_force,WLD_government_expense_%_gdp,WLD_gdp_%_growth,ARG_gdp_per_capita_%_growth,ARG_unemployment_%_of_total_labor_force,ARG_industry_value_added_%_growth,ARG_gdp_%_growth,AUS_gdp_per_capita_%_growth,...,GBR_government_debt_total_%_of_gdp,GBR_government_expense_%_gdp,GBR_industry_value_added_%_growth,GBR_gdp_%_growth,USA_gdp_per_capita_%_growth,USA_consumer_price_index_%,USA_unemployment_%_of_total_labor_force,USA_government_debt_total_%_of_gdp,USA_government_expense_%_gdp,USA_gdp_%_growth
39,-4.042395,1.928955,6.898273,33.696136,-3.072225,-10.812611,11.46,-9.284819,-9.943235,-1.278358,...,195.392289,47.557208,-3.141194,-11.030858,-3.700953,1.233584,8.05,126.232494,32.194915,-2.767803
40,5.120619,3.475403,6.201871,32.167331,6.023813,9.357176,8.74,15.264071,10.398249,2.105711,...,186.481994,43.961668,9.287127,7.597471,5.779548,4.697859,5.35,120.374017,31.354124,5.945485
41,2.263729,8.271804,5.77271,26.291669,3.080322,4.273164,6.491,5.958342,5.243044,2.457503,...,107.07872,37.3784,-0.475587,4.101621,1.676971,8.0028,3.611,67.911073,22.07718,2.061593
42,1.560105,7.363968,5.869994,26.291669,2.988789,1.564507,7.104069,2.345115,2.716573,1.807538,...,107.07872,37.386082,0.738147,2.104794,1.705986,5.36333,4.254237,67.911073,22.115447,2.557817
43,1.584147,6.701702,5.923616,26.291669,2.992479,1.068194,7.616798,1.756325,2.254729,1.679026,...,107.07872,37.391986,0.781849,2.148387,1.708398,4.075682,4.700754,67.911073,22.14132,2.625491


In [99]:
# load the model

filename = 'model_1981_upsampled.sav'
loaded_model = pickle.load(open(filename, 'rb'))

filename= 'scaler_1981_upsampled.sav'
loaded_scaler = pickle.load(open(filename, 'rb'))

X_scaled = loaded_scaler.transform(X)

predictions = loaded_model.predict(X_scaled)
print(np.argmax(predictions, axis=1))

[3 1 2 2 2]


In [None]:
# 1: Expansion, 2: pre_recession , 3:recession, 4:recovery , 0: downturn

# Conclusion

### Both models predicted pre-recession for 2023 and 2024.