## Regression 
### Import required libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing  import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.svm import SVR, LinearSVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV

### Import data and Preprocessing

In [2]:
data = pd.read_csv('C:/Users/kdpat/Downloads/life-expectancy-who/led.csv')

##### Mean values for missing values group by Country
data_grouped1=data.groupby(['Country'])
data_imputed = data_grouped1.transform(lambda grp: grp.fillna(grp.mean()))

#### Target Column: remove record without values
data_imputed = data_imputed.dropna(axis=0, subset=['Lifeexpectancy'])
data_imputed[['Country']]=data[['Country']]
data_imputed[['Status']]=data[['Status']]


##### Mean values for missing valuesgroup by status
data_grouped2=data_imputed.groupby(['Status'])
data_imputed = data_grouped2.transform(lambda grp: grp.fillna(grp.mean()))
data_imputed[['Country']]=data[['Country']]
data_imputed[['Status']]=data[['Status']]


#### Binary Columns
data_imputed['Status'] = data_imputed['Status'].map({'Developing':1, 'Developed':0}).astype(int)
data_imputed.describe(include=['O'])

#### Vector Column
Country=pd.get_dummies(data_imputed['Country'],columns='Country',prefix='Country')
Year=pd.get_dummies(data_imputed['Year'],columns='Year',prefix='Year')
data_imputed=pd.concat([data_imputed, Country], axis=1)
data_imputed=pd.concat([data_imputed, Year], axis=1)

#### Drop original vector column
data_imputed.drop(['Country'],axis=1,inplace= True)
data_imputed.drop(['Year'],axis=1,inplace= True)

#### x and Y
x=data_imputed.copy()
x.drop(['Lifeexpectancy'],axis=1,inplace= True)
y=data_imputed['Lifeexpectancy']
#y=pd.DataFrame(y)


#split train-test data and scalling 
x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=0)
x_tr, x_val, y_tr, y_val = train_test_split(x_train,y_train,random_state=0)
mn = MinMaxScaler()
x_train = pd.DataFrame(mn.fit_transform(x_train))
x_test = pd.DataFrame(mn.transform(x_test))
x_tr = pd.DataFrame(mn.fit_transform(x_tr))
x_val = pd.DataFrame(mn.transform(x_val))

## 1.  Bagging 
### 1.1 Bagging with SVM Kernel=rbf

In [93]:
lregSVRR = SVR(kernel='rbf', gamma=0.1, C=100)
bag_lregSVRR_grid = BaggingRegressor(lregSVRR, bootstrap=True, n_jobs=-1, random_state=0)
param_grid_bag_lreg = {"n_estimators": [2, 5, 10],
              "max_samples": [0.2, 0.5, 0.7, 1.0]
              }
bag_lregSVRR_grid = GridSearchCV(bag_lregSVRR_grid, param_grid_bag_lreg, cv = 5) 
bag_lregSVRR_grid.fit(x_train, y_train)

print('Bagging with SVM Kernel=rbf : Train score: {:.4f}'.format(bag_lregSVRR_grid.score(x_train, y_train)))
print('Bagging with SVM Kernel=rbf : Test score: {:.4f}'.format(bag_lregSVRR_grid.score(x_test, y_test)))

Bagging with SVM Kernel=rbf : Train score: 0.9897
Bagging with SVM Kernel=rbf : Test score: 0.9536


### 1.2 Bagging with Linear Regression

In [92]:
lreg = LinearRegression()
bag_lreg_grid = BaggingRegressor(lreg, bootstrap=True, n_jobs=-1, random_state=0)
param_grid_bag_lreg = {"n_estimators": [2, 5, 10],
              "max_samples": [0.2, 0.5, 0.7, 1.0]
              }
bag_lreg_grid = GridSearchCV(bag_lreg_grid, param_grid_bag_lreg, cv = 5) 
bag_lreg_grid.fit(x_train, y_train)

print('Bagging with Linear Regression : Train score: {:.4f}'.format(bag_lreg_grid.score(x_train, y_train)))
print('Bagging with Linear Regression : Test score: {:.4f}'.format(bag_lreg_grid.score(x_test, y_test)))

Bagging with Linear Regression : Train score: 0.9654
Bagging with Linear Regression : Test score: 0.9489


## 2. Pasting 
### 2.1 Pasting with SVM Kernel=rbf

In [96]:
lregSVRR = SVR(kernel='rbf', gamma=0.1, C=100)
pas_lregSVRR = BaggingRegressor(lregSVRR, bootstrap=False, n_jobs=-1, random_state=0)
#pas_lregSVRR.fit(x_train, y_train)
param_grid_pag_lregSVRR = {"n_estimators": [2, 5, 10],
              "max_samples": [0.2, 0.5, 0.7, 1.0]
              }
pas_lregSVRR_grid = GridSearchCV(pas_lregSVRR, param_grid_pag_lregSVRR, cv = 5) 
pas_lregSVRR_grid.fit(x_train, y_train)


print('Pasting with SVM Kernel=rbf: {:.4f}'.format(pas_lregSVRR_grid.score(x_train, y_train)))
print('Pasting with SVM Kernel=rbf: {:.4f}'.format(pas_lregSVRR_grid.score(x_test, y_test)))

Pasting with SVM Kernel=rbf: 0.9958
Pasting with SVM Kernel=rbf: 0.9578


### 2.2 Pasting with Linear Regression

In [98]:
lreg = LinearRegression()
pas_lreg = BaggingRegressor(lreg, bootstrap=False, n_jobs=-1, random_state=0)
param_grid_pag_lreg = {"n_estimators": [2, 5, 10],
              "max_samples": [0.2, 0.5, 0.7, 1.0]
              }
pas_lreg_grid = GridSearchCV(pas_lregSVRR, param_grid_pag_lreg, cv = 5) 
pas_lreg_grid.fit(x_train, y_train)
print('Pasting with Linear Regression: Train score: {:.4f}'.format(pas_lreg_grid.score(x_train, y_train)))
print('Pasting with Linear Regression: Test score: {:.4f}'.format(pas_lreg_grid.score(x_test, y_test)))

Pasting with Linear Regression: Train score: 0.9958
Pasting with Linear Regression: Test score: 0.9578


### 3 AdaBoosting 
### 3.1 AdaBoosting with SVM Kernel=rbf

In [99]:
lregSVRR = SVR(kernel='rbf', gamma=0.1, C=100)
ada_lregSVRR = AdaBoostRegressor(lregSVRR, loss='square', learning_rate=0.5, random_state=0)
ada_lregSVRR.fit(x_train, y_train)
print('AdaBoosting with SVM Kernel=rbf : Train score: {:.4f}'.format(ada_lregSVRR.score(x_train, y_train)))
print('AdaBoosting with SVM Kernel=rbf : Test score: {:.4f}'.format(ada_lregSVRR.score(x_test, y_test)))
y_pred = ada_lregSVRR.predict(x)

AdaBoosting with SVM Kernel=rbf : Train score: 0.9992
AdaBoosting with SVM Kernel=rbf : Test score: 0.9539


### 3.2 AdaBoosting with Linear Regression

In [100]:
lreg = LinearRegression()
ada_lreg = AdaBoostRegressor(lreg, loss='square', learning_rate=0.5, random_state=0)
ada_lreg.fit(x_train, y_train)
print('AdaBoosting with Linear Regression: Train score: {:.4f}'.format(ada_lreg.score(x_train, y_train)))
print('AdaBoosting with Linear Regression: Test score: {:.4f}'.format(ada_lreg.score(x_test, y_test)))

AdaBoosting with Linear Regression: Train score: 0.9394
AdaBoosting with Linear Regression: Test score: 0.9132


### 4. Gradient Boosting

In [103]:
gbrt = GradientBoostingRegressor(random_state=0,learning_rate=0.5,n_estimators=10)
gbrt.fit(x_train, y_train)

print("Gradient Boosting: Train score: {:.4f}".format(gbrt.score(x_train, y_train)))
print("Gradient Boosting: Test score: {:.4f}".format(gbrt.score(x_test, y_test)))

Gradient Boosting: Train score: 0.9413
Gradient Boosting: Test score: 0.9139


### 5. PCA

In [78]:
pca = PCA(.95)
x_train_pca =  pd.DataFrame(pca.fit_transform(x_train))
x_test_pca = pd.DataFrame(pca.transform(x_test))
x_tr_pca = pd.DataFrame(pca.fit_transform(x_tr))
x_val_pca = pd.DataFrame(pca.transform(x_val))

#print(x_train_pca.shape,x_test_pca.shape,x_tr_pca.shape,x_val_pca.shape)

#print("Variation explained",pca.explained_variance_ratio_)
print("No. of Features in Reduced Data",pca.n_components_)


No. of Features 169


### 5.1 Linear Regression

In [135]:
cvsl={'Model':[],'Mean_Score':[],'Std':[]}
Cross_validation_Score = pd.DataFrame(cvsl)
lreg = LinearRegression()
cslreg=cross_val_score(lreg,x_train_pca,y_train,cv=5)
Cross_validation_Score = Cross_validation_Score.append({'Model':'Linear Regression','Mean_Score':cslreg.mean(),'Std':cslreg.std() * 2}, ignore_index=True)
print("Avg Cross Validation Score: %0.6f (+/- %0.2f)" % (cslreg.mean(), cslreg.std() * 2))

Avg Cross Validation Score: 0.854794 (+/- 0.02)


### 5.2 Ridge

In [136]:
x_range = [0.01, 0.1, 1, 10, 100]
train_score_list = []
test_score_list = []

best_score=0
for alpha in x_range: 
    ridge = Ridge(alpha)
    ridge.fit(x_tr_pca,y_tr)
    train_score_list.append(ridge.score(x_tr_pca,y_tr))
    score=ridge.score(x_val_pca, y_val)
    test_score_list.append((score))
    if score > best_score:
        best_score = score
        best_parameters = {'alpha': alpha}

print("Best score: {:.4f}".format(best_score))
print("Best parameters: {}".format(best_parameters))

ridge = Ridge(1)
csridge=cross_val_score(ridge,x_train_pca,y_train,cv=5)
Cross_validation_Score = Cross_validation_Score.append({'Model':'Ridge','Mean_Score':csridge.mean(),'Std':csridge.std() * 2}, ignore_index=True)
print("Avg Cross Validation Score: %0.6f (+/- %0.2f)" % (csridge.mean(), csridge.std() * 2))

Best score: 0.8566
Best parameters: {'alpha': 1}
Avg Cross Validation Score: 0.856707 (+/- 0.02)


### 5.3 Lasso

In [137]:
x_range = [0.01, 0.1, 1, 10, 100]
train_score_list = []
test_score_list = []

best_score=0
for alpha in x_range: 
    lasso = Lasso(alpha)
    lasso.fit(x_tr_pca,y_tr)
    train_score_list.append(lasso.score(x_tr_pca,y_tr))
    score=lasso.score(x_val_pca, y_val)
    test_score_list.append((score))
    if score > best_score:
        best_score = score
        best_parameters = {'alpha': alpha}

print("Best score: {:.4f}".format(best_score))
print("Best parameters: {}".format(best_parameters))


lasso = Lasso(0.01)
cslasso=cross_val_score(lasso,x_train_pca,y_train,cv=5)
Cross_validation_Score = Cross_validation_Score.append({'Model':'Lasso','Mean_Score':cslasso.mean(),'Std':cslasso.std() * 2}, ignore_index=True)
print("Avg Cross Validation score: %0.6f (+/- %0.2f)" % (cslasso.mean(), cslasso.std() * 2))

Best score: 0.8443
Best parameters: {'alpha': 0.01}
Avg Cross Validation score: 0.842413 (+/- 0.03)


### 5.4 Polynomial Regression

In [138]:
lreg = LinearRegression()
poly = PolynomialFeatures(1)
X_train_poly = poly.fit_transform(x_train_pca)
cslregpoly=cross_val_score(lreg,X_train_poly,y_train,cv=5)
Cross_validation_Score = Cross_validation_Score.append({'Model':'Polynomial Regression','Mean_Score':cslregpoly.mean(),'Std':cslregpoly.std() * 2}, ignore_index=True)
print("Avg Cross Validation score: %0.6f (+/- %0.2f)" % (cslregpoly.mean(), cslregpoly.std() * 2))

Avg Cross Validation score: 0.854794 (+/- 0.02)


### 5.5 Linear SVR

In [139]:
x_range = [0.01, 0.1, 1, 10]
train_score_list = []
test_score_list = []

best_score=0
for c in x_range: 
    lregSVR = LinearSVR(C=c)
    lregSVR.fit(x_tr_pca,y_tr)
    train_score_list.append(lregSVR.score(x_tr_pca,y_tr))
    score=lregSVR.score(x_val_pca, y_val)
    test_score_list.append((score))
    if score > best_score:
        best_score = score
        best_parameters = {'C': c}

print("Best score: {:.4f}".format(best_score))
print("Best parameters: {}".format(best_parameters))


lregSVR = LinearSVR(C=1)
cslregSVR=cross_val_score(lregSVR,x_train_pca,y_train,cv=5)
Cross_validation_Score = Cross_validation_Score.append({'Model':'SVM Simple','Mean_Score':cslregSVR.mean(),'Std':cslregSVR.std() * 2}, ignore_index=True)
print("Avg Cross validation score: %0.6f (+/- %0.2f)" % (cslregSVR.mean(), cslregSVR.std() * 2))

Best score: 0.8386
Best parameters: {'C': 1}
Avg Cross validation score: 0.836388 (+/- 0.04)




### 5.6 SVM Simple

In [140]:
x_range = [0.01, 0.1, 1, 10]
train_score_list = []
test_score_list = []

best_score=0
for c in x_range: 
    lregSVRL = SVR(kernel='linear', C=c)
    lregSVRL.fit(x_tr_pca,y_tr)
    train_score_list.append(lregSVRL.score(x_tr_pca,y_tr))
    score=lregSVRL.score(x_val_pca, y_val)
    test_score_list.append((score))
    if score > best_score:
        best_score = score
        best_parameters = {'C': c}

print("Best score: {:.4f}".format(best_score))
print("Best parameters: {}".format(best_parameters))

lregSVRL = SVR(kernel='linear', C=1)
cslregSVRL=cross_val_score(lregSVRL,x_train_pca,y_train,cv=5)
Cross_validation_Score = Cross_validation_Score.append({'Model':'SVM Kernel=Linear','Mean_Score':cslregSVRL.mean(),'Std':cslregSVRL.std() * 2}, ignore_index=True)
print("Avg Cross validation score: %0.6f (+/- %0.2f)" % (cslregSVRL.mean(), cslregSVRL.std() * 2))

Best score: 0.8395
Best parameters: {'C': 1}
Avg Cross validation score: 0.836355 (+/- 0.04)


### 5.7 SVM Kernel=rbf

In [141]:
Gamma = [0.001, 0.01, 0.1, 1, 10, 100]
C = [0.001, 0.01, 0.1, 1, 10, 100]
train_score_list = []
test_score_list = []

best_score=0
for gamma in Gamma: 
    for c in C:
        lregSVRR = SVR(kernel='rbf', gamma=gamma, C=c)
        lregSVRR.fit(x_tr_pca,y_tr)
        train_score_list.append(lregSVRR.score(x_tr_pca,y_tr))
        score=lregSVRR.score(x_val_pca, y_val)
        test_score_list.append((score))
        if score > best_score:
            best_score = score
            best_parameters = {'gamma': gamma, 'C': c}

print("Best score: {:.4f}".format(best_score))
print("Best parameters: {}".format(best_parameters))


lregSVRR = SVR(kernel='rbf', gamma=0.1, C=100)
cslregSVRR=cross_val_score(lregSVRR,x_train_pca,y_train,cv=5)
Cross_validation_Score = Cross_validation_Score.append({'Model':'SVM Kernel=rbf','Mean_Score':cslregSVRR.mean(),'Std':cslregSVRR.std() * 2}, ignore_index=True)
print("Avg Cross validation score: %0.6f (+/- %0.2f)" % (cslregSVRR.mean(), cslregSVRR.std() * 2))

Best score: 0.9253
Best parameters: {'gamma': 0.1, 'C': 100}
Avg Cross validation score: 0.936589 (+/- 0.02)


### 5.8 SVM Kernel=poly

In [142]:
Degree = [2,3,4]
C = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
train_score_list = []
test_score_list = []

best_score=0
for degree in Degree: 
    for c in C:
        lregSVRP = SVR(kernel='poly', degree=degree, C=c, gamma='auto')
        lregSVRP.fit(x_tr_pca,y_tr)
        train_score_list.append(lregSVRP.score(x_tr_pca,y_tr))
        score=lregSVRP.score(x_val_pca, y_val)
        test_score_list.append((score))
        if score > best_score:
            best_score = score
            best_parameters = {'Degree': degree, 'C': c}

print("Best score: {:.4f}".format(best_score))
print("Best parameters: {}".format(best_parameters))

lregSVRP = SVR(kernel='poly', degree=2, C=1000, gamma='auto')
lregSVRP.fit(x_train_pca,y_train)
cslregSVRP=cross_val_score(lregSVRP,x_train_pca,y_train,cv=5)
Cross_validation_Score = Cross_validation_Score.append({'Model':'SVM Kernel=Poly','Mean_Score':cslregSVRP.mean(),'Std':cslregSVRP.std() * 2}, ignore_index=True)
print("Avg Cross validation score: %0.6f (+/- %0.2f)" % (cslregSVRP.mean(), cslregSVRP.std() * 2))

Best score: 0.1739
Best parameters: {'Degree': 2, 'C': 1000}
Avg Cross validation score: 0.183494 (+/- 0.09)


### 5.9 KNeighborsRegressor 

In [143]:
train_score_list = []
test_score_list = []
K=[1,2,3,4,5,6,7,8,9]
best_score=0
for k in range(1,10):
    knn_reg = KNeighborsRegressor(k)
    knn_reg.fit(x_tr_pca, y_tr)
    train_score_list.append(knn_reg.score(x_tr_pca, y_tr))
    score=knn_reg.score(x_val_pca, y_val)
    test_score_list.append((score))
    if score > best_score:
        best_score = score
        best_parameters = {'K': k}

print("Best score: {:.4f}".format(best_score))
print("Best parameters: {}".format(best_parameters))

knn_reg = KNeighborsRegressor(4)
csknn_reg=cross_val_score(knn_reg,x_train_pca,y_train,cv=5)
Cross_validation_Score = Cross_validation_Score.append({'Model':'KNeighborsRegressor','Mean_Score':csknn_reg.mean(),'Std':csknn_reg.std() * 2}, ignore_index=True)
print("Avg Cross validation score: %0.6f (+/- %0.2f)" % (csknn_reg.mean(), csknn_reg.std() * 2))

Best score: 0.6847
Best parameters: {'K': 9}
Avg Cross validation score: 0.606448 (+/- 0.06)


## 6 .Performance Comparison : PCA and Non PCA DataSet
### 6.1 Non PCA Resuts

In [74]:
cvsl={'Model':[],'Mean_Score':[],'Std':[]}
Non_PCA_Score = pd.DataFrame(cvsl)
Non_PCA_Score = Non_PCA_Score.append({'Model':'Linear Regression','Mean_Score':0.955076,'Std':0.013500}, ignore_index=True)
Non_PCA_Score = Non_PCA_Score.append({'Model':'Ridge','Mean_Score':0.954983,'Std':0.015913}, ignore_index=True)
Non_PCA_Score = Non_PCA_Score.append({'Model':'Lasso Regression','Mean_Score':0.894714,'Std':0.023043}, ignore_index=True)
Non_PCA_Score = Non_PCA_Score.append({'Model':'Polynomial Regression','Mean_Score':0.954351,'Std':0.012865}, ignore_index=True)
Non_PCA_Score = Non_PCA_Score.append({'Model':'SVM Simple','Mean_Score':0.885082,'Std':0.068014}, ignore_index=True)
Non_PCA_Score = Non_PCA_Score.append({'Model':'SVM Kernel=Linea','Mean_Score':0.915591,'Std':0.034254}, ignore_index=True)
Non_PCA_Score = Non_PCA_Score.append({'Model':'SVM Kernel=rbf','Mean_Score':0.957147,'Std':0.016217}, ignore_index=True)
Non_PCA_Score = Non_PCA_Score.append({'Model':'SVM Kernel=Poly','Mean_Score':0.822728,'Std':0.029408}, ignore_index=True)
Non_PCA_Score = Non_PCA_Score.append({'Model':'KNeighborsRegressor','Mean_Score':0.888586,'Std':0.026405}, ignore_index=True)
print(pd.DataFrame(Non_PCA_Score))

                   Model  Mean_Score       Std
0      Linear Regression    0.955076  0.013500
1                  Ridge    0.954983  0.015913
2       Lasso Regression    0.894714  0.023043
3  Polynomial Regression    0.954351  0.012865
4             SVM Simple    0.885082  0.068014
5       SVM Kernel=Linea    0.915591  0.034254
6         SVM Kernel=rbf    0.957147  0.016217
7        SVM Kernel=Poly    0.822728  0.029408
8    KNeighborsRegressor    0.888586  0.026405


### 6.2 PCA Resuts

In [144]:
Cross_validation_Score = Cross_validation_Score.drop_duplicates()
print(Cross_validation_Score)

                   Model  Mean_Score       Std
0      Linear Regression    0.854794  0.024134
1                  Ridge    0.856707  0.023984
2                  Lasso    0.842413  0.027182
3  Polynomial Regression    0.854794  0.024134
4             SVM Simple    0.836388  0.039319
5      SVM Kernel=Linear    0.836355  0.038256
6         SVM Kernel=rbf    0.936589  0.015297
7        SVM Kernel=Poly    0.183494  0.089108
8    KNeighborsRegressor    0.606448  0.062731


#### Based on above tables, Regression algorithm performs betters with non reduced(PCA) dataset

## 6. Deep Learning Model
###  6.1 with optimizer = SGD

In [48]:
from keras.models import Sequential
from keras.layers import Dense
import numpy

# fix random seed for reproducibility
numpy.random.seed(10)
model = Sequential()
model.add(Dense(50, input_dim=218, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))

In [49]:
model.compile(loss='mse', optimizer='sgd' , metrics = ['mse'])

In [50]:
model.fit(x_train, y_train, epochs = 100, batch_size = 20)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.callbacks.History at 0x262f9fbd708>

In [51]:
model.evaluate(x_test, y_test)



[4.32135035692017, 4.321350574493408]

In [52]:
from sklearn.metrics import r2_score, recall_score, precision_score

y_train_predict = model.predict(x_train)
y_test_predict = model.predict(x_test)
#print(pd.DataFrame(y_test_predict))
print('Train score: {:.4f}'.format(r2_score(y_train, y_train_predict)))
print('Test score: {:.4f}'.format(r2_score(y_test, y_test_predict)))

Train score: 0.9737
Test score: 0.9535


###  6.2 With optimizer = Adam

In [157]:
from keras.wrappers.scikit_learn import KerasRegressor
numpy.random.seed(10)

def create_model():
    #create model
    model = Sequential()
    model.add(Dense(50, input_dim=218, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    #compile model
    model.compile(loss='mse', optimizer='Adam' , metrics = ['mse'])
    return model

In [158]:
np.random.seed(10)

model = KerasRegressor(build_fn = create_model, verbose = 0)
param_grid = {'batch_size':[10,20] , 'epochs':[50,100]}
grid_search = GridSearchCV(estimator= model, param_grid = param_grid, cv = 5)
grid_search_result = grid_search.fit(x_train, y_train)

In [163]:
print(grid_search.best_params_)
y_train_predict = grid_search.predict(x_train)
y_test_predict = grid_search.predict(x_test)
#print(pd.DataFrame(y_test_predict))
print('Train score: {:.4f}'.format(r2_score(y_train, y_train_predict)))
print('Test score: {:.4f}'.format(r2_score(y_test, y_test_predict)))

{'batch_size': 10, 'epochs': 100}
Train score: 0.9811
Test score: 0.9562
