Companion notebook for Lecture 27. Demoing MLP (FNN) classification and regression.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn as skl

from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.datasets import load_diabetes, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LassoCV, RidgeCV
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

In [None]:
# load bread cancer data set

X, Y = load_breast_cancer(return_X_y=True)

feature_names = load_breast_cancer()['feature_names']

print('features: ', feature_names)
print('X_b: ', X)
print('Y_b: ', Y)
print(X.shape)
print(Y.shape)

features:  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
X_b:  [[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]
Y_b:  [0 0 0 0 0 0 0

In [None]:
# preprocessing

X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.33, random_state= 42)

X_scaler = StandardScaler()
X_scaler.fit(X_train)

X_train_s = X_scaler.transform(X_train)
X_test_s = X_scaler.transform(X_test)

# I also like to shift the output so we have +1, -1 for classes
Y_train_s = 2*Y_train - 1
Y_test_s = 2*Y_test - 1


In [None]:
# fit Lasso 
bc_lassoCV = LassoCV( alphas = np.linspace(1e-6, 0.05, 40), cv= 10, max_iter = 50000 )

bc_lassoCV.fit(X_train_s, Y_train_s)

# fit Ridge 
bc_ridgeCV = RidgeCV( alphas = np.linspace(1e-6, 0.05, 40), cv= 10)

bc_ridgeCV.fit(X_train_s, Y_train_s)

In [None]:
# compute MSEs 
ridge_train_MSE = mean_squared_error( Y_train_s, np.sign(bc_ridgeCV.predict(X_train_s)) )
ridge_test_MSE = mean_squared_error( Y_test_s, np.sign(bc_ridgeCV.predict(X_test_s)) )


lasso_train_MSE = mean_squared_error( Y_train_s, np.sign(bc_lassoCV.predict(X_train_s)) )
lasso_test_MSE = mean_squared_error( Y_test_s, np.sign(bc_lassoCV.predict(X_test_s)) )

print('Ridge train MSE:', ridge_train_MSE)
print('Lasso train MSE:', lasso_train_MSE)
print('Ridge test MSE:', ridge_test_MSE)
print('Lasso test MSE:', lasso_test_MSE)


Ridge train MSE: 0.12598425196850394
Lasso train MSE: 0.10498687664041995
Ridge test MSE: 0.23404255319148937
Lasso test MSE: 0.1702127659574468


In [None]:
# fit MLP models 

lmbd = 1e-3
rng = 10 #42, 10, 13 
actf =  'relu' #'relu', 'tanh'

bc_MLP_1 = MLPClassifier( hidden_layer_sizes= (5, 5, 5), activation = actf, alpha = lmbd, verbose= True, random_state= rng )
bc_MLP_2 = MLPClassifier( hidden_layer_sizes= (15, 15, 15), activation = actf, alpha = lmbd, verbose= True, random_state = rng )
bc_MLP_3 = MLPClassifier( hidden_layer_sizes= (30, 30, 30), activation = actf, alpha = lmbd, verbose= True, random_state = rng )
bc_MLP_4 = MLPClassifier( hidden_layer_sizes= (40, 40, 40), activation = actf, alpha = lmbd, verbose= True, random_state = rng )




In [None]:
# train all models 

print('Model 1:')
bc_MLP_1.fit(X_train_s, Y_train_s)
print('Model 2:')
bc_MLP_2.fit(X_train_s, Y_train_s)
print('Model 3:')
bc_MLP_3.fit(X_train_s, Y_train_s)
print('Model 4:')
bc_MLP_4.fit(X_train_s, Y_train_s)

Model 1:
Iteration 1, loss = 1.15034338
Iteration 2, loss = 1.12599923
Iteration 3, loss = 1.10190364
Iteration 4, loss = 1.07852122
Iteration 5, loss = 1.05593219
Iteration 6, loss = 1.03422622
Iteration 7, loss = 1.01266376
Iteration 8, loss = 0.99187692
Iteration 9, loss = 0.97166798
Iteration 10, loss = 0.95227785
Iteration 11, loss = 0.93286051
Iteration 12, loss = 0.91433147
Iteration 13, loss = 0.89592748
Iteration 14, loss = 0.87842520
Iteration 15, loss = 0.86051520
Iteration 16, loss = 0.84333591
Iteration 17, loss = 0.82612073
Iteration 18, loss = 0.80966595
Iteration 19, loss = 0.79307935
Iteration 20, loss = 0.77675554
Iteration 21, loss = 0.76052199
Iteration 22, loss = 0.74517837
Iteration 23, loss = 0.72950695
Iteration 24, loss = 0.71364715
Iteration 25, loss = 0.69806881
Iteration 26, loss = 0.68307957
Iteration 27, loss = 0.66808311
Iteration 28, loss = 0.65286084
Iteration 29, loss = 0.63843119
Iteration 30, loss = 0.62351140
Iteration 31, loss = 0.60917506
Iteratio



Iteration 8, loss = 0.43716066
Iteration 9, loss = 0.41444938
Iteration 10, loss = 0.39345180
Iteration 11, loss = 0.37377429
Iteration 12, loss = 0.35546367
Iteration 13, loss = 0.33888028
Iteration 14, loss = 0.32260723
Iteration 15, loss = 0.30778943
Iteration 16, loss = 0.29403632
Iteration 17, loss = 0.28114035
Iteration 18, loss = 0.26890527
Iteration 19, loss = 0.25765574
Iteration 20, loss = 0.24710324
Iteration 21, loss = 0.23699072
Iteration 22, loss = 0.22758396
Iteration 23, loss = 0.21887016
Iteration 24, loss = 0.21069811
Iteration 25, loss = 0.20308573
Iteration 26, loss = 0.19576021
Iteration 27, loss = 0.18900191
Iteration 28, loss = 0.18270140
Iteration 29, loss = 0.17678372
Iteration 30, loss = 0.17115609
Iteration 31, loss = 0.16600163
Iteration 32, loss = 0.16097000
Iteration 33, loss = 0.15636838
Iteration 34, loss = 0.15185202
Iteration 35, loss = 0.14778115
Iteration 36, loss = 0.14376937
Iteration 37, loss = 0.14022890
Iteration 38, loss = 0.13690436
Iteration 




Iteration 8, loss = 0.42840358
Iteration 9, loss = 0.38829072
Iteration 10, loss = 0.35151866
Iteration 11, loss = 0.31973371
Iteration 12, loss = 0.29265466
Iteration 13, loss = 0.26829652
Iteration 14, loss = 0.24748001
Iteration 15, loss = 0.22904586
Iteration 16, loss = 0.21218322
Iteration 17, loss = 0.19740521
Iteration 18, loss = 0.18463161
Iteration 19, loss = 0.17256893
Iteration 20, loss = 0.16290315
Iteration 21, loss = 0.15358260
Iteration 22, loss = 0.14586219
Iteration 23, loss = 0.13844143
Iteration 24, loss = 0.13177889
Iteration 25, loss = 0.12591145
Iteration 26, loss = 0.12072640
Iteration 27, loss = 0.11587756
Iteration 28, loss = 0.11178105
Iteration 29, loss = 0.10773557
Iteration 30, loss = 0.10431319
Iteration 31, loss = 0.10078258
Iteration 32, loss = 0.09764282
Iteration 33, loss = 0.09496979
Iteration 34, loss = 0.09249034
Iteration 35, loss = 0.09002914
Iteration 36, loss = 0.08772399
Iteration 37, loss = 0.08567705
Iteration 38, loss = 0.08385018
Iteration



Iteration 28, loss = 0.07818734
Iteration 29, loss = 0.07632043
Iteration 30, loss = 0.07474834
Iteration 31, loss = 0.07322129
Iteration 32, loss = 0.07173424
Iteration 33, loss = 0.07022448
Iteration 34, loss = 0.06885063
Iteration 35, loss = 0.06773280
Iteration 36, loss = 0.06646934
Iteration 37, loss = 0.06528772
Iteration 38, loss = 0.06442722
Iteration 39, loss = 0.06320497
Iteration 40, loss = 0.06237217
Iteration 41, loss = 0.06133970
Iteration 42, loss = 0.06047640
Iteration 43, loss = 0.05962643
Iteration 44, loss = 0.05869853
Iteration 45, loss = 0.05787552
Iteration 46, loss = 0.05706745
Iteration 47, loss = 0.05632179
Iteration 48, loss = 0.05563183
Iteration 49, loss = 0.05480680
Iteration 50, loss = 0.05418150
Iteration 51, loss = 0.05347464
Iteration 52, loss = 0.05277377
Iteration 53, loss = 0.05209492
Iteration 54, loss = 0.05143932
Iteration 55, loss = 0.05080926
Iteration 56, loss = 0.05020777
Iteration 57, loss = 0.04966978
Iteration 58, loss = 0.04902069
Iteratio



MLPClassifier(activation='tanh', alpha=0.001, hidden_layer_sizes=(40, 40, 40),
              random_state=10, verbose=True)

In [None]:
# predict and compute MSEs 

m1_train_MSE = mean_squared_error( Y_train_s, bc_MLP_1.predict(X_train_s) )
m1_test_MSE = mean_squared_error( Y_test_s, bc_MLP_1.predict(X_test_s) )

m2_train_MSE = mean_squared_error( Y_train_s, bc_MLP_2.predict(X_train_s) )
m2_test_MSE = mean_squared_error( Y_test_s, bc_MLP_2.predict(X_test_s) )

m3_train_MSE = mean_squared_error( Y_train_s, bc_MLP_3.predict(X_train_s) )
m3_test_MSE = mean_squared_error( Y_test_s, bc_MLP_3.predict(X_test_s) )

m4_train_MSE = mean_squared_error( Y_train_s, bc_MLP_4.predict(X_train_s) )
m4_test_MSE = mean_squared_error( Y_test_s, bc_MLP_4.predict(X_test_s) )

In [None]:
# Print MSEs and compare to Ridge and Lasso 

print('Ridge train MSE:', ridge_train_MSE)
print('Lasso train MSE:', lasso_train_MSE)
print('Model 1 train MSE:', m1_train_MSE)
print('Model 2 train MSE:', m2_train_MSE)
print('Model 3 train MSE:', m3_train_MSE)
print('Model 4 train MSE:', m4_train_MSE)

print('Ridge test MSE:', ridge_test_MSE)
print('Lasso test MSE:', lasso_test_MSE)
print('Model 1 test MSE:', m1_test_MSE)
print('Model 2 test MSE:', m2_test_MSE)
print('Model 3 test MSE:', m3_test_MSE)
print('Model 4 test MSE:', m4_test_MSE)

Ridge train MSE: 0.12598425196850394
Lasso train MSE: 0.10498687664041995
Model 1 train MSE: 0.05249343832020997
Model 2 train MSE: 0.02099737532808399
Model 3 train MSE: 0.010498687664041995
Model 4 train MSE: 0.0
Ridge test MSE: 0.23404255319148937
Lasso test MSE: 0.1702127659574468
Model 1 test MSE: 0.06382978723404255
Model 2 test MSE: 0.0851063829787234
Model 3 test MSE: 0.10638297872340426
Model 4 test MSE: 0.10638297872340426


In [None]:
# deep networks 

bc_MLP_5 = MLPClassifier( hidden_layer_sizes= tuple(40*np.ones(10, dtype = int)), activation = actf, alpha = lmbd, verbose= True, random_state = rng )

In [None]:
print('Model deep:')
bc_MLP_5.fit(X_train_s, Y_train_s)


m5_train_MSE = mean_squared_error( Y_train_s, bc_MLP_5.predict(X_train_s) )
m5_test_MSE = mean_squared_error( Y_test_s, bc_MLP_5.predict(X_test_s) )

Model deep:
Iteration 1, loss = 0.60045672
Iteration 2, loss = 0.44297765
Iteration 3, loss = 0.33393310
Iteration 4, loss = 0.25452213
Iteration 5, loss = 0.20151712
Iteration 6, loss = 0.16159676
Iteration 7, loss = 0.13523633
Iteration 8, loss = 0.11578063
Iteration 9, loss = 0.10433439
Iteration 10, loss = 0.09167084
Iteration 11, loss = 0.08477216
Iteration 12, loss = 0.07674046
Iteration 13, loss = 0.07419161
Iteration 14, loss = 0.07218267
Iteration 15, loss = 0.06803737
Iteration 16, loss = 0.06469357
Iteration 17, loss = 0.06150326
Iteration 18, loss = 0.05730405
Iteration 19, loss = 0.05451799
Iteration 20, loss = 0.05086591
Iteration 21, loss = 0.04909471
Iteration 22, loss = 0.04705828
Iteration 23, loss = 0.04623676
Iteration 24, loss = 0.04446524
Iteration 25, loss = 0.04275337
Iteration 26, loss = 0.04067046
Iteration 27, loss = 0.03997573
Iteration 28, loss = 0.03749039
Iteration 29, loss = 0.03635926
Iteration 30, loss = 0.03540536
Iteration 31, loss = 0.03451454
Itera

In [None]:
print('Model deep train MSE:', m5_train_MSE)
print('Model deep test MSE:', m5_test_MSE)

Model deep train MSE: 0.010498687664041995
Model deep test MSE: 0.0851063829787234


In [None]:
# Try regression on diabetes data set 

Xd, Yd = load_diabetes(return_X_y=True)

feature_names_d = load_diabetes()['feature_names']

Xd_train, Xd_test, Yd_train, Yd_test = train_test_split( Xd, Yd, test_size=0.33, random_state= 42)

Xd_scaler = StandardScaler()
Xd_scaler.fit(Xd_train)

Xd_train_s = Xd_scaler.transform(Xd_train)
Xd_test_s = Xd_scaler.transform(Xd_test)


Yd_train_s = (Yd_train - Yd_train.mean())/Yd_train.std() 
Yd_test_s = (Yd_test - Yd_train.mean())/Yd_test.std()



alg = 'adam' # adam, lbfgs, sgd

d_MLP = MLPRegressor( hidden_layer_sizes= (40, 40, 40), activation = actf, alpha = lmbd, verbose= True, random_state = rng, solver = alg )

In [None]:
# fit 

d_MLP.fit(Xd_train, Yd_train)

Iteration 1, loss = 14627.10591006
Iteration 2, loss = 14591.10773286
Iteration 3, loss = 14555.13485951
Iteration 4, loss = 14518.26629730
Iteration 5, loss = 14481.28271244
Iteration 6, loss = 14443.10381485
Iteration 7, loss = 14404.73350342
Iteration 8, loss = 14364.19336593
Iteration 9, loss = 14322.24883648
Iteration 10, loss = 14280.25150325
Iteration 11, loss = 14236.61935802
Iteration 12, loss = 14191.02743132
Iteration 13, loss = 14144.30108619
Iteration 14, loss = 14096.48177224
Iteration 15, loss = 14047.67060406
Iteration 16, loss = 13999.16609548
Iteration 17, loss = 13948.83367123
Iteration 18, loss = 13898.81359704
Iteration 19, loss = 13848.04385379
Iteration 20, loss = 13799.23753175
Iteration 21, loss = 13750.75499578
Iteration 22, loss = 13703.12045587
Iteration 23, loss = 13656.90993499
Iteration 24, loss = 13612.08423099
Iteration 25, loss = 13568.29016273
Iteration 26, loss = 13526.23814089
Iteration 27, loss = 13486.38315508
Iteration 28, loss = 13447.90958118
I



MLPRegressor(activation='tanh', alpha=0.001, hidden_layer_sizes=(40, 40, 40),
             random_state=10, verbose=True)

In [None]:
# report MSEs 

d_train_MSE = mean_squared_error( Yd_train_s, d_MLP.predict(Xd_train_s) )
d_test_MSE = mean_squared_error( Yd_test_s, d_MLP.predict(Xd_test_s) )

print('Regression train MSE:', d_train_MSE)
print('Regrassion test MSE:', d_test_MSE)

Regression train MSE: 520.2249881563903
Regrassion test MSE: 508.46545623660154
