In [34]:
# Import the required variables
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt, style # for ploting
import seaborn as sns

In [6]:
def warn(*args, **kwargs): pass
import warnings
warnings.warn = warn

In [13]:
def num_plot(df, col, title, symb):
    fig, ax = plt.subplots(2, 1, sharex=True, figsize=(8,5),gridspec_kw={"height_ratios": (.2, .8)})
    ax[0].set_title(title,fontsize=18)
    sns.boxplot(x=col, data=df, ax=ax[0])
    ax[0].set(yticks=[])
    sns.histplot(x=col, data=df, ax=ax[1])
    ax[1].set_xlabel(col, fontsize=16)
    plt.axvline(df[col].mean(), color='darkgreen', linewidth=2.2, label='mean=' + str(np.round(df[col].mean(),1)) + symb)
    plt.axvline(df[col].median(), color='red', linewidth=2.2, label='median='+ str(np.round(df[col].median(),1)) + symb)
    plt.axvline(df[col].mode()[0], color='purple', linewidth=2.2, label='mode='+ str(df[col].mode()[0]) + symb)
    plt.legend(bbox_to_anchor=(1, 1.03), ncol=1, fontsize=17, fancybox=True, shadow=True, frameon=True)
    plt.tight_layout()
    plt.show()

In [14]:
sns.set_style("whitegrid")
sns.despine()
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True)
plt.rc("axes", labelweight="bold", labelsize="large", titleweight="bold", titlesize=14, titlepad=10)

<Figure size 432x288 with 0 Axes>

In [37]:
# Import the required variables
import pandas as pd
import numpy as np

# Read the data
Res_Nonhydro_Capita = pd.read_excel ("../data/processed/Res_Nonhydro_Capita.xlsx", index_col = 0)
SDGs_Ranks_2021 = pd.read_excel ("../data/processed/SDGs_Ranks_2021.xlsx", index_col = 0)
SDGs_Scores_2021 = pd.read_excel ("../data/processed/SDGs_Scores_2021.xlsx", index_col = 0)

# Only consider the 2021 year
Res_Nonhydro_Capita_2021 = Res_Nonhydro_Capita[[2021]]
Res_Nonhydro_Capita_2021= Res_Nonhydro_Capita_2021.rename(columns={ 2021: 'Res_Nonhydro_Capita_2021'})

# Do not consider the first column
SDGs_Ranks_2021 = SDGs_Ranks_2021.iloc[:,1:]
SDGs_Scores_2021 = SDGs_Scores_2021.iloc[:,1:]

# Append the target to the Datasets
SDGs_Ranks_2021 = pd.concat([SDGs_Ranks_2021, Res_Nonhydro_Capita_2021], axis=1)
SDGs_Scores_2021 = pd.concat([SDGs_Scores_2021, Res_Nonhydro_Capita_2021], axis=1)

# Predicciones - Modelo SVR SDGs_Scores_2021

In [29]:
# Load libraries
import numpy as np
from sklearn import datasets

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

from sklearn import metrics

from sklearn.model_selection import train_test_split 
# Set random seed
np.random.seed(0)

X = SDGs_Scores_2021.drop('Res_Nonhydro_Capita_2021',1)
y = SDGs_Scores_2021['Res_Nonhydro_Capita_2021']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

std_scale = StandardScaler().fit(X_train)
X_train_scaled = std_scale.transform(X_train)
X_test_scaled = std_scale.transform(X_test)

maxC = [0.01, 0.1, 0.3, 0.5, 1.0, 3, 5.0, 15, 30, 50, 75, 100] # Parametro de regularizacion
maxKernel = ["linear","poly","rbf"] # Tipo de kernel, probar varios
maxDegree = [2,3,4,5] # Cuantos grados queremos para el kernel polinomico
maxGamma = [0.001, 0.1, "auto", "scale", 1.0, 10.0, 30.0] # Coeficiente de regulaizacion para los kernels rbf, poly y sigmoid
maxEpsilon = [0.001, 0.1, 0.5, 1.0, 1.5]

n = 0
for Ci in maxC:
    for kerneli in maxKernel:
        for degreei in maxDegree:
            for gammai in maxGamma:
                for epsiloni in maxEpsilon:
                    svm_reg = SVR(kernel=kerneli, degree=degreei, C=Ci, epsilon = epsiloni, gamma= gammai)
                    svm_reg.fit(X_train_scaled, y_train)
                    predictions = svm_reg.predict(X_test_scaled)
                    if (n == 0): # The first time initialize
                        MAE_Best = metrics.mean_absolute_error(y_test, predictions)
                        maxC_Best = Ci
                        maxKernel_Best = kerneli
                        maxDegree_Best = degreei
                        maxGamma_Best = gammai
                        maxEpsilon_Best = epsiloni
                    else:
                        MAE_New = metrics.mean_absolute_error(y_test, predictions)
                        if (MAE_New < MAE_Best):
                            MAE_Best = MAE_New
                            maxC_Best = Ci
                            maxKernel_Best = kerneli
                            maxDegree_Best = degreei
                            maxGamma_Best = gammai
                            maxEpsilon_Best = epsiloni
                        else:
                            pass
    n +=1
print('MAE_Best', MAE_Best, 
        'C', maxC_Best, 
        'kernel', maxKernel_Best,
        'degree', maxDegree_Best,
        'gamma', maxGamma_Best,
        'epsilon',maxEpsilon_Best)


MAE_Best 861.3649635710564 C 75 kernel poly degree 5 gamma auto epsilon 0.001


In [38]:
from sklearn import metrics

svm_reg = SVR(kernel='poly', degree=5, C=75, epsilon = 0.001, gamma= 'auto')
svm_reg.fit(X_train_scaled, y_train)
predictions = svm_reg.predict(X_test_scaled)

print("Coeficiente determinación Train:", metrics.r2_score(y_train, svm_reg.predict(X_train_scaled)))
print("MAE Train:", metrics.mean_absolute_error(y_train, svm_reg.predict(X_train_scaled)))
print("MAPE Train:", metrics.mean_absolute_percentage_error(y_train, svm_reg.predict(X_train_scaled)))
print("MSE Train:", metrics.mean_squared_error(y_train, svm_reg.predict(X_train_scaled)))
print("RMSE Train:", np.sqrt(metrics.mean_squared_error(y_train, svm_reg.predict(X_train_scaled))))

print('-'*20)

print("Coeficiente determinación Test:", metrics.r2_score(y_test, predictions))
print("MAE Test:", metrics.mean_absolute_error(y_test, predictions))
print("MAPE Test:", metrics.mean_absolute_percentage_error(y_test, predictions))
print("MSE Test:", metrics.mean_squared_error(y_test, predictions))
print("RMSE Test:", np.sqrt(metrics.mean_squared_error(y_test, predictions)))


Coeficiente determinación Train: 0.6065170454548635
MAE Train: 296.0185196143277
MAPE Train: 3.572314917506691e+16
MSE Train: 258661.46977643334
RMSE Train: 508.5877208274236
--------------------
Coeficiente determinación Test: 0.0036455117215157262
MAE Test: 870.4226709347624
MAPE Test: 34.66293972823645
MSE Test: 9438692.707521591
RMSE Test: 3072.245548051391


# Predicciones - Modelo Decission Trees SDGs_Ranks_2021

In [32]:
# Load libraries
import numpy as np
from sklearn import datasets

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

from sklearn import metrics

from sklearn.model_selection import train_test_split 
# Set random seed
np.random.seed(0)

X = SDGs_Ranks_2021.drop('Res_Nonhydro_Capita_2021',1)
y = SDGs_Ranks_2021['Res_Nonhydro_Capita_2021']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

std_scale = StandardScaler().fit(X_train)
X_train_scaled = std_scale.transform(X_train)
X_test_scaled = std_scale.transform(X_test)

maxC = [0.01, 0.1, 0.3, 0.5, 1.0, 3, 5.0, 15, 30, 50, 75, 100] # Parametro de regularizacion
maxKernel = ["linear","poly","rbf"] # Tipo de kernel, probar varios
maxDegree = [2,3,4,5] # Cuantos grados queremos para el kernel polinomico
maxGamma = [0.001, 0.1, "auto", "scale", 1.0, 10.0, 30.0] # Coeficiente de regulaizacion para los kernels rbf, poly y sigmoid
maxEpsilon = [0.001, 0.1, 0.5, 1.0, 1.5]

n = 0
for Ci in maxC:
    for kerneli in maxKernel:
        for degreei in maxDegree:
            for gammai in maxGamma:
                for epsiloni in maxEpsilon:
                    svm_reg = SVR(kernel=kerneli, degree=degreei, C=Ci, epsilon = epsiloni, gamma= gammai)
                    svm_reg.fit(X_train_scaled, y_train)
                    predictions = svm_reg.predict(X_test_scaled)
                    if (n == 0): # The first time initialize
                        MAE_Best = metrics.mean_absolute_error(y_test, predictions)
                        maxC_Best = Ci
                        maxKernel_Best = kerneli
                        maxDegree_Best = degreei
                        maxGamma_Best = gammai
                        maxEpsilon_Best = epsiloni
                    else:
                        MAE_New = metrics.mean_absolute_error(y_test, predictions)
                        if (MAE_New < MAE_Best):
                            MAE_Best = MAE_New
                            maxC_Best = Ci
                            maxKernel_Best = kerneli
                            maxDegree_Best = degreei
                            maxGamma_Best = gammai
                            maxEpsilon_Best = epsiloni
                        else:
                            pass
    n +=1
print('MAE_Best', MAE_Best, 
        'C', maxC_Best, 
        'kernel', maxKernel_Best,
        'degree', maxDegree_Best,
        'gamma', maxGamma_Best,
        'epsilon',maxEpsilon_Best)

MAE_Best 859.7111983693998 C 100 kernel poly degree 5 gamma auto epsilon 0.1


In [43]:
from sklearn import metrics

svm_reg = SVR(kernel='poly', degree=5, C=100, epsilon = 0.1, gamma= 'auto')
svm_reg.fit(X_train_scaled, y_train)
predictions = svm_reg.predict(X_test_scaled)

print("Coeficiente determinación Train:", metrics.r2_score(y_train, svm_reg.predict(X_train_scaled)))
print("MAE Train:", metrics.mean_absolute_error(y_train, svm_reg.predict(X_train_scaled)))
print("MAPE Train:", metrics.mean_absolute_percentage_error(y_train, svm_reg.predict(X_train_scaled)))
print("MSE Train:", metrics.mean_squared_error(y_train, svm_reg.predict(X_train_scaled)))
print("RMSE Train:", np.sqrt(metrics.mean_squared_error(y_train, svm_reg.predict(X_train_scaled))))

print('-'*20)

print("Coeficiente determinación Test:", metrics.r2_score(y_test, predictions))
print("MAE Test:", metrics.mean_absolute_error(y_test, predictions))
print("MAPE Test:", metrics.mean_absolute_percentage_error(y_test, predictions))
print("MSE Test:", metrics.mean_squared_error(y_test, predictions))
print("RMSE Test:", np.sqrt(metrics.mean_squared_error(y_test, predictions)))

print('-'*20)


Coeficiente determinación Train: 0.6385358496074564
MAE Train: 276.61215277072716
MAPE Train: 3.779046786174788e+16
MSE Train: 237613.46541708976
RMSE Train: 487.45611640135337
--------------------
Coeficiente determinación Test: -0.004432682249981168
MAE Test: 859.7111983693998
MAPE Test: 42.124431988569135
MSE Test: 9515219.276554722
RMSE Test: 3084.674906137553
--------------------


# CONCLUSIÓN

# Los dos modelos resultan semejantes - NO SON BUENOS

SDGs_Scores_2021
* Coeficiente determinación Test: 0.10227851844619062
* MAE Test: 861.3649635710564
* MAPE Test: 51.95503962009982
* MSE Test: 8504319.7988376
* RMSE Test: 2916.2166927095113

SDGs_Ranks_2021
* Coeficiente determinación Test: -0.004432682249981168
* MAE Test: 859.7111983693998
* MAPE Test: 42.124431988569135
* MSE Test: 9515219.276554722
* RMSE Test: 3084.674906137553