# CASE_STUDY_4_NEURAL_NETWORKS

In [1]:
from pathlib import Path # to interact with file system.

import numpy as np # for working with arrays.
import pandas as pd # for working with data frames (tables).
import seaborn as sns # for plotting box plots to detect outliers

from sklearn.model_selection import train_test_split # for data partition.
from sklearn.metrics import r2_score # to identify r_square for regression model.

from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.neural_network import MLPClassifier, MLPRegressor 
from sklearn.preprocessing import StandardScaler

from mord import LogisticIT

%matplotlib inline 
import matplotlib.pylab as plt # for building and showing graphs.


from dmba import regressionSummary, exhaustive_search
from dmba import backward_elimination, forward_selection, stepwise_selection
from dmba import adjusted_r2_score, AIC_score, BIC_score

In [2]:
boston_df = pd.read_csv("BostonHousing.csv")

In [3]:
print('Dataframe dimensions, i.e. Number of rows and columns in data set:', 
      boston_df.shape)

Dataframe dimensions, i.e. Number of rows and columns in data set: (506, 14)


In [4]:
boston_df.head()

Unnamed: 0,CRIME,ZONE,INDUST,CHAR RIV,NIT OXIDE,ROOMS,AGE,DISTANCE,RADIAL,TAX,ST RATIO,LOW STAT,MVALUE,C MVALUE
0,0.00632,18.0,2.31,N,0.538,6.575,65.2,4.09,1,296,15.3,4.98,24.0,No
1,0.02731,0.0,7.07,N,0.469,6.421,78.9,4.9671,2,242,17.8,9.14,21.6,No
2,0.02729,0.0,7.07,N,0.469,7.185,61.1,4.9671,2,242,17.8,4.03,34.7,Yes
3,0.03237,0.0,2.18,N,0.458,6.998,45.8,6.0622,3,222,18.7,2.94,33.4,Yes
4,0.06905,0.0,2.18,N,0.458,7.147,54.2,6.0622,3,222,18.7,5.33,36.2,Yes


In [5]:
boston_df.tail()

Unnamed: 0,CRIME,ZONE,INDUST,CHAR RIV,NIT OXIDE,ROOMS,AGE,DISTANCE,RADIAL,TAX,ST RATIO,LOW STAT,MVALUE,C MVALUE
501,0.06263,0.0,11.93,N,0.573,6.593,69.1,2.4786,1,273,21.0,9.67,22.4,No
502,0.04527,0.0,11.93,N,0.573,6.12,76.7,2.2875,1,273,21.0,9.08,20.6,No
503,0.06076,0.0,11.93,N,0.573,6.976,91.0,2.1675,1,273,21.0,5.64,23.9,No
504,0.10959,0.0,11.93,N,0.573,6.794,89.3,2.3889,1,273,21.0,6.48,22.0,No
505,0.04741,0.0,11.93,N,0.573,6.03,80.8,2.505,1,273,21.0,7.88,11.9,No


In [6]:
# Use the sample() function to retrieve a random sample of observations. 
# Here we sample 5 observations without replacement.
boston_df.sample(5)

Unnamed: 0,CRIME,ZONE,INDUST,CHAR RIV,NIT OXIDE,ROOMS,AGE,DISTANCE,RADIAL,TAX,ST RATIO,LOW STAT,MVALUE,C MVALUE
125,0.16902,0.0,25.65,N,0.581,5.986,88.4,1.9929,2,188,19.1,14.81,21.4,No
355,0.10659,80.0,1.91,N,0.413,5.936,19.5,10.5857,4,334,22.0,5.57,20.6,No
456,4.66883,0.0,18.1,N,0.713,5.976,87.9,2.5806,24,666,20.2,19.01,12.7,No
200,0.01778,95.0,1.47,N,0.403,7.135,13.9,7.6534,3,402,17.0,4.45,32.9,Yes
326,0.30347,0.0,7.38,N,0.493,6.312,28.9,5.4159,5,287,19.6,6.15,23.0,No


In [7]:
print('Original column titles:')
boston_df.columns

Original column titles:


Index(['CRIME', 'ZONE', 'INDUST', 'CHAR RIV', 'NIT OXIDE', 'ROOMS', 'AGE',
       'DISTANCE', 'RADIAL', 'TAX', 'ST RATIO', 'LOW STAT', 'MVALUE',
       'C MVALUE'],
      dtype='object')

In [8]:
print('Modified column titles with no space and one word for titles:')
boston_df.columns = [s.strip().replace(" ", "_") for s in boston_df.columns]
boston_df.columns

Modified column titles with no space and one word for titles:


Index(['CRIME', 'ZONE', 'INDUST', 'CHAR_RIV', 'NIT_OXIDE', 'ROOMS', 'AGE',
       'DISTANCE', 'RADIAL', 'TAX', 'ST_RATIO', 'LOW_STAT', 'MVALUE',
       'C_MVALUE'],
      dtype='object')

In [9]:
boston_df.dtypes

CRIME        float64
ZONE         float64
INDUST       float64
CHAR_RIV      object
NIT_OXIDE    float64
ROOMS        float64
AGE          float64
DISTANCE     float64
RADIAL         int64
TAX            int64
ST_RATIO     float64
LOW_STAT     float64
MVALUE       float64
C_MVALUE      object
dtype: object

In [10]:
# The CHAR_RIV and C_MVALUE column are 'object's; does not have 
# the 'category' definition.
print('Original CHAR_RIV and C_MVALUE variables:')
print(boston_df.CHAR_RIV.dtype, boston_df.C_MVALUE.dtype )

# Need to change variable types to 'category'. 
boston_df.CHAR_RIV = boston_df.CHAR_RIV.astype('category')
boston_df.C_MVALUE = boston_df.C_MVALUE.astype("category")

# Display category levels (attributes) and category type.
print(' ')
print('Category levels and changed variable type of CHAR_RIV column:')
print(boston_df.CHAR_RIV.cat.categories)  # It can take one of two levels.
print(boston_df.CHAR_RIV.dtype)  # Type is now 'category'.
print(' ')
print('Category levels and changed variable type of C_MVALUE column:')
print(boston_df.C_MVALUE.cat.categories)  # It can take one of two levels.
print(boston_df.C_MVALUE.dtype) 

Original CHAR_RIV and C_MVALUE variables:
object object
 
Category levels and changed variable type of CHAR_RIV column:
Index(['N', 'Y'], dtype='object')
category
 
Category levels and changed variable type of C_MVALUE column:
Index(['No', 'Yes'], dtype='object')
category


In [11]:
boston_df = pd.get_dummies(boston_df, prefix_sep='_', drop_first = True)
print("Modified list of column variables:")
boston_df.columns

Modified list of column variables:


Index(['CRIME', 'ZONE', 'INDUST', 'NIT_OXIDE', 'ROOMS', 'AGE', 'DISTANCE',
       'RADIAL', 'TAX', 'ST_RATIO', 'LOW_STAT', 'MVALUE', 'CHAR_RIV_Y',
       'C_MVALUE_Yes'],
      dtype='object')

In [12]:
# Display values of the new dummy variables, 
# CHAR_RIV_Y and C_MVALUE_Yes.
#if the dummy variale CHAR_RIV_Y is 0 then it is N. Similarly, if the dummy variale C_MVALUE_Yes is 0 then it is No.
print(boston_df.loc[:, 'CHAR_RIV_Y':'C_MVALUE_Yes'].head(5))

   CHAR_RIV_Y  C_MVALUE_Yes
0           0             0
1           0             0
2           0             1
3           0             1
4           0             1


In [13]:
des_df = pd.DataFrame(np.round(boston_df.describe(), decimals=2))
print("Descriptive statistics for all the columns in the modified data frame:")
print(" ")
print(des_df.head(5))


Descriptive statistics for all the columns in the modified data frame:
 
        CRIME    ZONE  INDUST  NIT_OXIDE   ROOMS     AGE  DISTANCE  RADIAL  \
count  506.00  506.00  506.00     506.00  506.00  506.00    506.00  506.00   
mean     3.61   11.36   11.14       0.55    6.28   68.57      3.80    9.55   
std      8.60   23.32    6.86       0.12    0.70   28.15      2.11    8.71   
min      0.01    0.00    0.46       0.38    3.56    2.90      1.13    1.00   
25%      0.08    0.00    5.19       0.45    5.89   45.02      2.10    4.00   

          TAX  ST_RATIO  LOW_STAT  MVALUE  CHAR_RIV_Y  C_MVALUE_Yes  
count  506.00    506.00    506.00  506.00      506.00        506.00  
mean   408.24     18.46     12.65   22.53        0.07          0.17  
std    168.54      2.16      7.14    9.20        0.25          0.37  
min    187.00     12.60      1.73    5.00        0.00          0.00  
25%    279.00     17.40      6.95   17.02        0.00          0.00  


In [14]:
print("               Count")
print(boston_df[:].count()) #find out missing values #result shows no missing values

               Count
CRIME           506
ZONE            506
INDUST          506
NIT_OXIDE       506
ROOMS           506
AGE             506
DISTANCE        506
RADIAL          506
TAX             506
ST_RATIO        506
LOW_STAT        506
MVALUE          506
CHAR_RIV_Y      506
C_MVALUE_Yes    506
dtype: int64


In [15]:
# Identify predictors and outcome, 
# and data preprocessing.


# Identify predictors (same as those in linear regression and 
# regression tree) and outcome for neural network model. 
predictors = ['CRIME', 'ZONE', 'INDUST', 'NIT_OXIDE', 'ROOMS', 'AGE', 'DISTANCE',
              'RADIAL', 'TAX', 'ST_RATIO', 'LOW_STAT', 'CHAR_RIV_Y', 'C_MVALUE_Yes']
outcome = 'MVALUE'

print(f'The outcome variable is "{outcome}"\n\nThe predictor variables are {predictors} \n')
# Name predictors and outcome data frames as X and y, respectively. 
# Convert, if necessary, categorical variables to dummy variables. 
X = pd.get_dummies(boston_df[predictors], drop_first=True)
y = boston_df[outcome]

# Display the first 10 records of Boston data set. 
boston_df_reg = pd.concat([y, X], axis=1)
print('First 10 Records of Boston Data Set')
print(boston_df_reg.head(5))

The outcome variable is "MVALUE"

The predictor variables are ['CRIME', 'ZONE', 'INDUST', 'NIT_OXIDE', 'ROOMS', 'AGE', 'DISTANCE', 'RADIAL', 'TAX', 'ST_RATIO', 'LOW_STAT', 'CHAR_RIV_Y', 'C_MVALUE_Yes'] 

First 10 Records of Boston Data Set
   MVALUE    CRIME  ZONE  INDUST  NIT_OXIDE  ROOMS   AGE  DISTANCE  RADIAL  \
0    24.0  0.00632  18.0    2.31      0.538  6.575  65.2    4.0900       1   
1    21.6  0.02731   0.0    7.07      0.469  6.421  78.9    4.9671       2   
2    34.7  0.02729   0.0    7.07      0.469  7.185  61.1    4.9671       2   
3    33.4  0.03237   0.0    2.18      0.458  6.998  45.8    6.0622       3   
4    36.2  0.06905   0.0    2.18      0.458  7.147  54.2    6.0622       3   

   TAX  ST_RATIO  LOW_STAT  CHAR_RIV_Y  C_MVALUE_Yes  
0  296      15.3      4.98           0             0  
1  242      17.8      9.14           0             0  
2  242      17.8      4.03           0             1  
3  222      18.7      2.94           0             1  
4  222      18.7

In [16]:
# Create data partitioning and scale the data using 
# StandardScaler()from scikit-learn libray. Display 
# original and scaled predictors for training partition. 

# Create data partition with training set, 60%(0.6), and 
# validation set 40%(0.4) of the Toyota Corolla data set.
train_X, valid_X, train_y, valid_y = train_test_split(X, y, 
                            test_size=0.4, random_state=1)

# Display the first 10 records of training 
# partition's predictors. 
print('Predictors for Training Partition')
print(train_X.head(5))

# Scale input data (predictors) for training  and validation 
# partitions using StandardScaler().
sc_X = StandardScaler()
train_X_sc = sc_X.fit_transform(train_X)
valid_X_sc = sc_X.transform(valid_X)

# Develop a data frame to display scaled predictors for 
# training partition. Round scaled values to 3 decimals.
# Add coloumn titles to data frame.
train_X_sc_df = np.round(pd.DataFrame(train_X_sc), decimals=3)                            
train_X_sc_df.columns=['CRIME', 'ZONE', 'INDUST', 'NIT_OXIDE', 'ROOMS', 'AGE', 'DISTANCE',
                       'RADIAL', 'TAX', 'ST_RATIO', 'LOW_STAT', 'CHAR_RIV_Y', 'C_MVALUE_Yes']

# Display scaled predictors for training partition.
print()
print('Scaled Predictors for Training Partition')
print(train_X_sc_df.head(5))

Predictors for Training Partition
       CRIME  ZONE  INDUST  NIT_OXIDE  ROOMS   AGE  DISTANCE  RADIAL  TAX  \
452  5.09017   0.0   18.10      0.713  6.297  91.8    2.3682      24  666   
346  0.06162   0.0    4.39      0.442  5.898  52.3    8.0136       3  352   
295  0.12932   0.0   13.92      0.437  6.678  31.1    5.9604       4  289   
88   0.05660   0.0    3.41      0.489  7.007  86.3    3.4217       2  270   
322  0.35114   0.0    7.38      0.493  6.041  49.9    4.7211       5  287   

     ST_RATIO  LOW_STAT  CHAR_RIV_Y  C_MVALUE_Yes  
452      20.2     17.27           0             0  
346      18.8     12.67           0             0  
295      16.0      6.27           0             0  
88       17.8      5.50           0             0  
322      19.6      7.70           0             0  

Scaled Predictors for Training Partition
   CRIME   ZONE  INDUST  NIT_OXIDE  ROOMS    AGE  DISTANCE  RADIAL    TAX  \
0  0.146 -0.482   1.006      1.306  0.083  0.803    -0.688   1.662  1.53

In [17]:
# Use MLPRegressor() function to train neural network model.
# Apply: 
# (a) default input layer with the number of nodes equal 
#     to number of predictor variables (13); 
# (b) default single hidden layer with 9 nodes; 
# (c) default output layer with one outcome variable (Price);
# (d) optimization function solver = 'lbfgs', 
#     which is applied for small data sets for better 
#     performance and fast convergence. For large data sets, 
#     apply default solver = 'adam' optimization function;
# (e) model is fit with scaled predictors and regular outcome
#     in training partition.
boston_reg = MLPRegressor(hidden_layer_sizes=(9), 
                solver='lbfgs', max_iter=10000, random_state=1)
boston_reg.fit(train_X_sc, train_y)

# Display network structure with the final values of 
# intercepts (Theta) and weights (W).
print('Final Intercepts for Neural Network Model')
print(boston_reg.intercepts_)

print()
print('Network Weights for Neural Network Model')
print(boston_reg.coefs_)

Final Intercepts for Neural Network Model
[array([ 2.26914474,  4.18675342, -1.50466092, -0.95080817,  0.32205673,
        0.49141424, -0.81237211,  0.89649621,  2.52709405]), array([-11.97658924])]

Network Weights for Neural Network Model
[array([[-0.36367279,  0.52687745, -0.00964011, -1.5129948 ,  1.31769872,
        -0.41850542, -1.53776369, -0.62480937, -1.90841567],
       [ 0.66134206, -1.23924591,  0.28794966,  2.11953871,  1.07434276,
         0.43220725, -4.11086338,  0.40334917, -0.08228966],
       [ 0.59676784,  0.58331396,  2.48943055,  0.90207757, -1.28237762,
         0.42433014, -1.95774694,  0.95234875,  1.23319412],
       [-2.20869382, -1.37160062, -0.98568213,  0.36329227, -0.6196563 ,
        -0.30782417,  1.05946611,  0.83457418,  2.71468783],
       [ 0.16998566, -0.09067205, -1.7444194 ,  0.11677319,  1.80120993,
         0.81352311, -0.01783085, -2.03768459,  0.23723986],
       [-0.27165058, -2.52251129, -1.29506367,  0.3880619 ,  0.44373225,
         0.5111

In [18]:
# Make 'Mvalue' predictions for validation set using  
# neural network model. 

# Use boston_reg model to predict 'Mvalue' outcome
# for validation set.
Mvalue_pred = np.round(boston_reg.predict(valid_X_sc), decimals=2)

# Create data frame to display prediction results for
# validation set. 
Mvalue_pred_result = pd.DataFrame({'Actual': valid_y, 
                'Prediction': Mvalue_pred, 'Residual': valid_y-Mvalue_pred})

print('Predictions for Validation Partition')
print(Mvalue_pred_result.head(5))

Predictions for Validation Partition
     Actual  Prediction  Residual
307    28.2       29.63     -1.43
343    23.9       23.49      0.41
47     16.6       17.80     -1.20
67     22.0       18.73      3.27
362    20.8       25.30     -4.50


In [20]:
# Neural network model accuracy measures for training and
# validation partitions. 

# Identify and display neural network model accuracy measures 
# for training partition.
print('Accuracy Measures for Training Partition for Neural Network')
regressionSummary(train_y, boston_reg.predict(train_X_sc))

# Identify and display neural network accuracy measures 
# for validation partition.
print()
print('Accuracy Measures for Validation Partition for Neural Network')
regressionSummary(valid_y, boston_reg.predict(valid_X_sc))

Accuracy Measures for Training Partition for Neural Network

Regression statistics

                      Mean Error (ME) : -0.0034
       Root Mean Squared Error (RMSE) : 1.5617
            Mean Absolute Error (MAE) : 1.1368
          Mean Percentage Error (MPE) : -0.8274
Mean Absolute Percentage Error (MAPE) : 6.0681

Accuracy Measures for Validation Partition for Neural Network

Regression statistics

                      Mean Error (ME) : -0.0912
       Root Mean Squared Error (RMSE) : 3.1675
            Mean Absolute Error (MAE) : 2.2668
          Mean Percentage Error (MPE) : -3.0502
Mean Absolute Percentage Error (MAPE) : 11.6748


## Grid Search CV

In [21]:
# Identify grid search parameters. 
param_grid = {
    'hidden_layer_sizes': list(range(2, 20)), 
}

# Utilize GridSearchCV() to identify the best number 
# of nodes in the hidden layer. 
gridSearch = GridSearchCV(MLPRegressor(solver='lbfgs', max_iter=10000, random_state=1), 
                          param_grid, cv=5, n_jobs=-1, return_train_score=True)
gridSearch.fit(train_X_sc, train_y)

# Display the best score and best parament value.
print(f'Best score:{gridSearch.best_score_:.4f}')
print('Best parameter: ', gridSearch.best_params_)

Best score:0.8759
Best parameter:  {'hidden_layer_sizes': 2}


In [22]:
# Use MLPRegressor() function to train the improved neural network model
# based on grid search results. 

# Apply: 
# (a) default input layer with the number of nodes equal 
#     to number of predictor variaSbles (13); 
# (b) single hidden layer with 2 nodes based on grid search; 
# (c) default output layer with the number nodes equal
#     to one outcome variable "MVALUE";
# (d) solver = 'lbfgs', which is applied for small data 
#     sets for better performance and fast convergence. 
#     For large data sets, apply default solver = 'adam'. 
boston_imp = MLPRegressor(hidden_layer_sizes=(2), max_iter=10000,
                                 solver='lbfgs', random_state=1)
boston_imp.fit(train_X_sc, train_y)

# Display network structure with the final values of 
# intercepts (Theta) and weights (W).
print('Final Intercepts for Neural Network Model')
print(boston_imp.intercepts_)

print()
print('Network Weights for Neural Network Model')
print(boston_imp.coefs_)

Final Intercepts for Neural Network Model
[array([-5.59839276,  8.69124415]), array([6.60346469])]

Network Weights for Neural Network Model
[array([[-0.20769012, -1.78999663],
       [-0.31789029,  0.2530354 ],
       [ 3.68804841, -0.23745973],
       [-0.39979107, -0.36498108],
       [-1.54682674,  2.3322485 ],
       [ 0.12819544, -0.90563125],
       [ 0.31696069, -1.11355164],
       [ 3.16426975,  0.13759987],
       [ 1.58396291, -1.3793737 ],
       [-2.27344137, -0.55228335],
       [-1.32734466, -0.43485492],
       [-0.01351816,  0.17987294],
       [ 3.13443234,  1.31033442]]), array([[2.40608879],
       [1.58695947]])]


In [23]:
# Neural network model based on Grid Search results and its accuracy measures for training and
# validation partitions. 

# Identify and display neural network model accuracy measures 
# for training partition.
print('Accuracy Measures for Training Partition for Neural Network')
regressionSummary(train_y, boston_imp.predict(train_X_sc))

# Identify and display neural network accuracy measures 
# for validation partition.
print()
print('Accuracy Measures for Validation Partition for Neural Network')
regressionSummary(valid_y, boston_imp.predict(valid_X_sc))

Accuracy Measures for Training Partition for Neural Network

Regression statistics

                      Mean Error (ME) : 0.0013
       Root Mean Squared Error (RMSE) : 2.6108
            Mean Absolute Error (MAE) : 2.0053
          Mean Percentage Error (MPE) : -1.8353
Mean Absolute Percentage Error (MAPE) : 10.4838

Accuracy Measures for Validation Partition for Neural Network

Regression statistics

                      Mean Error (ME) : 0.0295
       Root Mean Squared Error (RMSE) : 3.0570
            Mean Absolute Error (MAE) : 2.2651
          Mean Percentage Error (MPE) : -2.3393
Mean Absolute Percentage Error (MAPE) : 11.4870
