## Cross validation scores

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from numpy import mean
from numpy import absolute
from numpy import sqrt
import numpy as np
import pandas as pd

from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix

# Regressors
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor

# Classifiers
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

seed =1

In [2]:
df = pd.read_csv('../LassoRegression/LassoForwardSelectionDataSetForModelling.csv')
#lassoFeatures = pd.read_csv('top10predictorsLasso_cv.csv')['0'].tolist()
forwardSelectionFeatures = pd.read_csv('../LassoRegression/ForwardSelectionVia5FoldCV.csv')["0"].tolist()

y_qualitative = df['AboveAverageLifeExpectancyByYear']
y_quantitative = df['Life expectancy at birth, total (years)']
#xLasso = df[np.intersect1d(df.columns, lassoFeatures)]
x = df[np.intersect1d(df.columns, forwardSelectionFeatures)]

forwardSelectionFeatures

['Adolescent fertility rate (births per 1,000 women ages 15-19)',
 'Arable land (% of land area)',
 'Arable land (hectares per person)',
 'CO2 emissions from solid fuel consumption (kt)',
 'GDP per capita (current US$)',
 'Merchandise exports by the reporting economy, residual (% of total merchandise exports)',
 'Merchandise trade (% of GDP)',
 'Permanent cropland (% of land area)',
 'Population density (people per sq. km of land area)',
 'Urban population (% of total)']

In [3]:
allScores = pd.DataFrame(columns=['Type','Model', 'Score'])

### Classifier Models

In [4]:
# KNN
knn = KNeighborsClassifier(n_neighbors=5)
cv_scores = cross_val_score(knn, x.values, y_qualitative, cv=10)
print(cv_scores)
print('cv_scores mean:{}'.format(np.mean(cv_scores)))
allScores.loc[allScores.shape[0]-1] = ['Classifier', 'KNN n = 5', np.mean(cv_scores)]

[0.73482428 0.7971246  0.89776358 0.87539936 0.81469649 0.84345048
 0.87220447 0.90734824 0.8514377  0.8544    ]
cv_scores mean:0.8448649201277956


In [5]:
# Logistic Regression
model = LogisticRegression(random_state=0, max_iter=500)
cv_scores = cross_val_score(model, x.values, y_qualitative, cv=10)
print(cv_scores)
print('cv_scores mean:{}'.format(np.mean(cv_scores)))
allScores.loc[allScores.shape[0]-1] = ['Classifier', 'Logistic Regression', np.mean(cv_scores)]

[0.75399361 0.81309904 0.90734824 0.87060703 0.80191693 0.88658147
 0.91693291 0.90734824 0.81789137 0.872     ]
cv_scores mean:0.8547718849840255


In [6]:
# Multi Layer Perceptron Classifier
model = MLPClassifier(solver='adam', activation='tanh', alpha=1e-2, hidden_layer_sizes=(5,2), random_state=1, max_iter=20000)
cv_scores = cross_val_score(model, x.values, y_qualitative, cv=10)
print(cv_scores)
print('cv_scores mean:{}'.format(np.mean(cv_scores)))
allScores.loc[allScores.shape[0]-1] = ['Classifier', 'Multi Layer Perceptron', np.mean(cv_scores)]

[0.72523962 0.81789137 0.82108626 0.87060703 0.84824281 0.82747604
 0.81948882 0.85942492 0.80511182 0.8       ]
cv_scores mean:0.8194568690095847


In [7]:
# Random Forest
model = RandomForestClassifier(
                      min_samples_leaf=50,
                      n_estimators=150,
                      bootstrap=True,
                      oob_score=True,
                      n_jobs=-1,
                      random_state=seed,
                      max_features='auto')
cv_scores = cross_val_score(model, x.values, y_qualitative, cv=10)
print(cv_scores)
print('cv_scores mean:{}'.format(np.mean(cv_scores)))
allScores.loc[allScores.shape[0]-1] = ['Classifier', 'Random Forest', np.mean(cv_scores)]

[0.77476038 0.88178914 0.9456869  0.91693291 0.83226837 0.8913738
 0.93610224 0.95047923 0.88658147 0.9152    ]
cv_scores mean:0.8931174440894569


## Regession models

In [8]:
# Multi Layer Perceptron Regression
model = MLPRegressor(solver='adam', activation='logistic', alpha=1e-2, hidden_layer_sizes=(10,5), random_state=1, max_iter=20000)
cv_scores = cross_val_score(model, x.values, y_quantitative, cv=10)
print(cv_scores)
print('cv_scores mean:{}'.format(np.mean(cv_scores)))
allScores.loc[allScores.shape[0]-1] = ['Regression', 'Multi Layer Perceptron', np.mean(cv_scores)]

[0.58777597 0.58482693 0.65293133 0.3130321  0.23677432 0.66932171
 0.66872994 0.64248121 0.65118879 0.67855471]
cv_scores mean:0.5685617010669904


In [9]:
# KNN Regression
model = KNeighborsRegressor(n_neighbors=12)
cv_scores = cross_val_score(model, x.values, y_quantitative, cv=10)
print(cv_scores)
print('cv_scores mean:{}'.format(np.mean(cv_scores)))
allScores.loc[allScores.shape[0]-1] = ['Regression', 'KNN n = 12', np.mean(cv_scores)]

[0.68849794 0.62008223 0.77418559 0.69573144 0.69523304 0.81441262
 0.79823056 0.81678857 0.76915823 0.82496906]
cv_scores mean:0.7497289274990149


In [10]:
# Multilinear Regression
model = LinearRegression()
cv_scores = cross_val_score(model, x.values, y_quantitative, cv=10)
print(cv_scores)
print('cv_scores mean:{}'.format(np.mean(cv_scores)))
allScores.loc[allScores.shape[0]-1] = ['Regression', 'Multilinear', np.mean(cv_scores)]

[0.70224966 0.77573482 0.78491146 0.73478651 0.69077621 0.75721927
 0.76366421 0.76961067 0.78292607 0.78225342]
cv_scores mean:0.7544132294183508


In [13]:
# Random Forest
model = RandomForestRegressor(n_estimators = 1000, random_state = 42)
cv_scores = cross_val_score(model, x.values, y_qualitative, cv=10)
print(cv_scores)
print('cv_scores mean:{}'.format(np.mean(cv_scores)))
allScores.loc[allScores.shape[0]-1] = ['Regression', 'Random Forest', np.mean(cv_scores)]

[0.87124308 0.87881636 0.94458374 0.90889649 0.82297952 0.83530298
 0.89597939 0.93031414 0.87354013 0.81780528]
cv_scores mean:0.877946108534279


In [14]:
display(allScores)

Unnamed: 0,Type,Model,Score
-1,Classifier,KNN n = 5,0.844865
0,Classifier,Logistic Regression,0.854772
1,Classifier,Multi Layer Perceptron,0.819457
2,Classifier,Random Forest,0.893117
3,Regression,Multi Layer Perceptron,0.568562
4,Regression,KNN n = 12,0.749729
5,Regression,Multilinear,0.754413
6,Regression,Random Forest,0.877946


In [None]:
allScores.to_csv("allScores.csv",index=False )