# 3-layer Neural Network
Overview of Implementation
1. <a href="#section1">Import Dataset</a>
2. <a href="#section2">Cleaning the Data for Model Training</a>
3. <a href="#section3">3-layer Neural Network</a>



In [1]:
# Import libraries

# math library
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## <a id='section1'>Import Dataset</a>

In [2]:
train = pd.read_csv('train.csv')
train

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,62.0,7917,Pave,,Reg,Lvl,AllPub,...,0,,,,0,8,2007,WD,Normal,175000
1456,1457,20,RL,85.0,13175,Pave,,Reg,Lvl,AllPub,...,0,,MnPrv,,0,2,2010,WD,Normal,210000
1457,1458,70,RL,66.0,9042,Pave,,Reg,Lvl,AllPub,...,0,,GdPrv,Shed,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,68.0,9717,Pave,,Reg,Lvl,AllPub,...,0,,,,0,4,2010,WD,Normal,142125


## <a id='section2'>Cleaning the Data for Model Training</a>
Remove the NA data and perform One Hot Encoding

In [3]:
nullData = [['LotFrontage', 259], ['MasVnrArea', 8], ['Electrical', 1], ['GarageYrBlt', 81]]
n = len(train)
treshold = 0.1
drop = []

print('Drop feature - too many nulls:')
for i in nullData:
    if i[1]/n > treshold: # Arbitrary treshold: 10%
        print(i[0])
        train.drop(columns=[i[0]], inplace=True)
    else:
        drop.append(i[0])
        
print('Remove data point:')
print(drop)
train.dropna(subset=drop, inplace=True)

train

Drop feature - too many nulls:
LotFrontage
Remove data point:
['MasVnrArea', 'Electrical', 'GarageYrBlt']


Unnamed: 0,Id,MSSubClass,MSZoning,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,7917,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,8,2007,WD,Normal,175000
1456,1457,20,RL,13175,Pave,,Reg,Lvl,AllPub,Inside,...,0,,MnPrv,,0,2,2010,WD,Normal,210000
1457,1458,70,RL,9042,Pave,,Reg,Lvl,AllPub,Inside,...,0,,GdPrv,Shed,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,9717,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,4,2010,WD,Normal,142125


In [4]:
#One-Hot encoding
categoricalcolumns = ['MSSubClass', 'MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond','Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
train1 = pd.get_dummies(train, columns= categoricalcolumns, prefix= categoricalcolumns)
print(train1)

        Id  LotArea  YearBuilt  YearRemodAdd  MasVnrArea  BsmtFinSF1  \
0        1     8450       2003          2003       196.0         706   
1        2     9600       1976          1976         0.0         978   
2        3    11250       2001          2002       162.0         486   
3        4     9550       1915          1970         0.0         216   
4        5    14260       2000          2000       350.0         655   
...    ...      ...        ...           ...         ...         ...   
1455  1456     7917       1999          2000         0.0           0   
1456  1457    13175       1978          1988       119.0         790   
1457  1458     9042       1941          2006         0.0         275   
1458  1459     9717       1950          1996         0.0          49   
1459  1460     9937       1965          1965         0.0         830   

      BsmtFinSF2  BsmtUnfSF  TotalBsmtSF  1stFlrSF  ...  SaleType_ConLw  \
0              0        150          856       856  ...     

## <a id='section3'>3-layer Neural Network</a>

In [5]:
#Applying Standardization (Z-Score method)
continuous = ['LotArea', 'YearBuilt', 'YearRemodAdd', 
       'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr','TotRmsAbvGrd', 'Fireplaces', 
       'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF',
       'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold']
features_stand=train1.copy()
for i in continuous:
    # fit on training data column
    scale = StandardScaler().fit(train1[[i]])
    # transform training data column
    features_stand[i] = scale.transform(features_stand[[i]])
labels = train1['SalePrice']
features_stand.drop(['Id', 'SalePrice'], axis=1, inplace=True)
train_X_stan, test_X_stan, train_Y_stan, test_Y_stan = train_test_split(features_stand,labels,test_size=0.2, random_state=0)

# initialisation for training data
X_train=train_X_stan.to_numpy().T
y_train=train_Y_stan.to_numpy()[:,None]

# initialisation for test data 
X_test=test_X_stan.to_numpy().T
y_test=test_Y_stan.to_numpy()[:,None]

Yhat=y_train.T
print(Yhat.shape)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(1, 1096)
(314, 1096)
(1096, 1)
(314, 274)
(274, 1)


In [6]:
## testing with sample dataset
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=200, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

X_train=X_train.T
print(X_train.shape)
y_train=y_train[:,None]
print(y_train.shape)
X_test=X_test.T
print(X_test.shape)
y_test=y_test[:,None]
print(y_test.shape)
Yhat=y_train.T
print(Yhat.shape)

(100, 150)
(150, 1)
(100, 50)
(50, 1)
(1, 150)


In [7]:
K = 1 # number of classes
n = X_train.shape[1] # number of training data
n1 = 100
n2 = 50
n3 = K

In [8]:
# sigmoid function
def sigmoid(z):
    sigmoid_f = 1 / (1 + np.exp(-z)) 
    return sigmoid_f 

# derivate of the sigmoid function
def sigmoid_derivate(z):
    sigm = sigmoid(z)
    return sigm* (1-sigm)


# accuracy function
def score(y_pred,y_true):
    u=((y_true - y_pred) ** 2).sum()
    v=((y_true - y_true.mean()) ** 2).sum()
    return (1-u/v)

a = 2/ np.sqrt(n1)
W1 = np.random.uniform(-a,a,[n2,n1+1])
a = 2/ np.sqrt(n2)
W2 = np.random.uniform(-a,a,[n3,n2+1])

print(W1.shape)
print(W2.shape)

(50, 101)
(1, 51)


In [12]:
tau = 0.0000001 # learning rate
lamb = 0  # regularization
#tau_list=np.array([0.0003, 0.0001, 0.003, 0.001, 0.03, 0.01, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000, 3000])
tau_list=np.array([0.001])
for tau in tau_list:
    # iterate
    for iter in range(1000):
    
        # forward pass
        Y1 = X_train
        Y1bias = np.insert(Y1,0,1,axis=0)
        Y2 = sigmoid(W1.dot(Y1bias))
        Y2bias = np.insert(Y2,0,1,axis=0)
        Y3 = sigmoid(W2.dot(Y2bias))
        # backward pass
        Delta3 = Y3 - Yhat
        Grad2 = 2/n* Delta3.dot(Y2bias.T)
        Grad2 += 2* lamb* W2
        W2 = W2 - tau* Grad2
        W2bar = W2[:,1:n2+1]
        Delta2 = ( W2bar.T.dot(Delta3) ) * sigmoid_derivate(Y2)
        Grad1 = 2/n* Delta2.dot(Y1bias.T)
        Grad1 += 2* lamb* W1
        W1 = W1 - tau* Grad1
        # print intermediate result
        if not iter%500:
        
            # loss 
            loss = 1/n* (y_train - Y3.T).T.dot(y_train - Y3.T) + \
                lamb* ( np.sum(W1**2) + np.sum(W2**2) )
        
            # train accuracy
            acc = score(Y3.T,y_train)
            # test accuracy (with forward pass on the test set)
            Y1_test = X_test
            Y1bias_test = np.insert(Y1_test,0,1,axis=0)
            Y2_test = sigmoid(W1.dot(Y1bias_test))
            Y2bias_test = np.insert(Y2_test,0,1,axis=0)
            Y3_test = sigmoid(W2.dot(Y2bias_test))
            acc_test = score(Y3_test.T,y_test)
        
            # print
            print('iter:',iter,'\nloss:',loss,'train acc:',acc,'test acc:',acc_test)
        
    
    print('iter:',iter+1,'\nloss:',loss,'train acc:',acc,'test acc:',acc_test)


iter: 0 
loss: [[20916.46994999]] train acc: 0.0035658172738696203 test acc: -0.008629817212501667


  This is separate from the ipykernel package so we can avoid doing imports until


iter: 500 
loss: [[20916.46997523]] train acc: 0.003565816071614658 test acc: -0.008862513069237021
iter: 1000 
loss: [[20916.46997523]] train acc: 0.003565816071614658 test acc: -0.008862513069237021


## sklearn neural network

In [10]:
from sklearn.neural_network import MLPRegressor

In [11]:
for i in range(10):
    print("i =",i)
    regr = MLPRegressor(random_state=i,solver='adam', max_iter=10**(200),early_stopping=True).fit(train_X_stan,train_Y_stan)
    print("train acc = ",regr.score(train_X_stan,train_Y_stan))
    print("test acc = ",regr.score(test_X_stan,test_Y_stan))


i = 0
train acc =  0.7887285049919801
test acc =  0.6984392410143507
i = 1
train acc =  0.7839236675268634
test acc =  0.7126127154429884
i = 2
train acc =  0.7793065681639332
test acc =  0.6971028595631417
i = 3
train acc =  0.7428969926672738
test acc =  0.6805571335417322
i = 4
train acc =  0.7921887494911636
test acc =  0.7002060073998777
i = 5
train acc =  0.7838490645846071
test acc =  0.6963244863170769
i = 6
train acc =  0.7982544051635382
test acc =  0.6989764862953143
i = 7
train acc =  0.7818001478418587
test acc =  0.6973913107237465
i = 8
train acc =  0.7710040629065562
test acc =  0.6938589205510697
i = 9
train acc =  0.8059127524168183
test acc =  0.7024302653401071
