# Neural Networks

In [3]:
# Import required packages for this chapter
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import classification_report, confusion_matrix



# Credit Card Use

Consider the hypothetical bank data in on consumers’ use of credit card credit facilities. Import dataset _creditcard.csv_. Use Python program to illustrate one pass through a simple neural network (Randomly generate initial weight values)

_Years: number of years the customer has been with the bank_

_Salary: customer’s salary (in thousands of dollars)_

_Used Credit: 1 = customer has left an unpaid credit card balance at the end of at least one month in the prior year, 0 = balance was paid off at the end of each month_

__Data Preprocessing.__ The dataset is too small, we will not split the data. We are using the whole set as a training set to estabish our model.

In [2]:
# Load the data into creditcard_df dataframe
# Use critical functions to explore the dataframe using print() to show results
creditcard_df=pd.read_csv("creditcard.csv")
print(creditcard_df.shape)
print(creditcard_df.count())
print(creditcard_df.describe().transpose())

(6, 3)
Years          6
Salary         6
Used_Credit    6
dtype: int64
             count       mean        std   min    25%   50%    75%    max
Years          6.0   7.833333   6.968979   1.0   3.25   5.0  12.75   18.0
Salary         6.0  76.000000  26.608269  43.0  56.00  76.5  93.25  112.0
Used_Credit    6.0   0.500000   0.547723   0.0   0.00   0.5   1.00    1.0


In [3]:
# construct the dataset: predictors(X) and outcome(y)
# print out results to varify

X=creditcard_df.drop('Used_Credit', axis='columns')
y=creditcard_df['Used_Credit']
print(X, y)

   Years  Salary
0      4      43
1     18      65
2      1      53
3      3      95
4     15      88
5      6     112 0    0
1    1
2    0
3    0
4    1
5    1
Name: Used_Credit, dtype: int64


In [4]:
# Normalize data using standard scalar.
# Do we need to standardize both predictors and outcome or just one of thoese two?
scaler=StandardScaler()
scaler.fit(X)
X=scaler.transform(X)
print(X)
# we don't need to scale Y, predictors need to be between 0 and 1

[[-0.60255689 -1.3585885 ]
 [ 1.59808568 -0.45286283]
 [-1.07412316 -0.94689502]
 [-0.75974565  0.78221762]
 [ 1.12651941  0.49403218]
 [-0.28817938  1.48209655]]


In [5]:
# The dataset is too small to partition into training set and validation set. We will use the whole dataset for training
# Multi-layer Perceptron classifier: one layer with 3 hidden nodes, set the activation function to be logistic function, solver to be lbfgs
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1, random_state=108)
print(X_train.shape, X_test.shape)

mlp=MLPClassifier(hidden_layer_sizes=(3), activation = 'logistic', solver='lbfgs', max_iter=500)
mlp.fit(X_train, y_train)

(5, 2) (1, 2)


MLPClassifier(activation='logistic', hidden_layer_sizes=3, max_iter=500,
              solver='lbfgs')

In [6]:
# Network structure
# Hint: sample code given in in class exercise

for i, (weights, intercepts) in enumerate(zip(mlp.coefs_, mlp.intercepts_)):
    print('Hidden layer' if i == 0 else 'Output layer', '{0[0]} => {0[1]}'.format(weights.shape))
    print(' Intercepts:\n ', intercepts)
    print(' Weights:')
    for weight in weights:
        print(' ', weight)
    print()

Hidden layer 2 => 3
 Intercepts:
  [-1.05018984  1.01083091 -0.74328605]
 Weights:
  [ 3.84666335 -3.46430064  2.85409368]
  [ 2.62543266 -2.52332552  2.29352759]

Output layer 3 => 1
 Intercepts:
  [-3.03437227]
 Weights:
  [7.84086078]
  [-7.75163921]
  [5.36739226]



In [7]:
# Predictions: merge predicted classification and the probability to each class into the orignal table
predictions=mlp.predict(X_test)
print(predictions)


[1]


In [8]:
# print the confusion_matrix and classification_report
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

[[1]]
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [9]:
# Interpret your results:
#It's over fitting?

# Lesson learned from this excercise:



# Car Sales

Consider the data on used cars (_toyotacorolla.csv_) with 1436 records and details on 38 attributes, including Price, Age, KM, HP, and other specifcations. The goal is to predict the price of a used Toyota Corolla based on its specifcations.

Use predictors Age_08_04, KM, Fuel_Type, HP, Automatic, Doors, Quarterly_Tax, Mfr_Guarantee, Guarantee_Period, Airco, Automatic_airco, CD_Player, Powered_Windows, Sport_Model, and Tow_Bar.

__Data Preprocessing.__ TUse the scikit-learn transformer _MinMaxScaler()_ to scale the data to the range [0, 1]. Use separate transformer for the input and output data. To create the dummy variables, use the pandas function _pd.get_dummies()_. Partition the data into training (60%) and validation (40%) sets.

## Data Preprocessing

In [4]:
# load the data
# explore the data
toyotacorolla_df=pd.read_csv("toyotacorolla.csv")
print(toyotacorolla_df.shape)
print(toyotacorolla_df.count())
print(toyotacorolla_df.describe().transpose())

(1436, 39)
Id                   1436
Model                1436
Price                1436
Age_08_04            1436
Mfg_Month            1436
Mfg_Year             1436
KM                   1436
Fuel_Type            1436
HP                   1436
Met_Color            1436
Color                1436
Automatic            1436
CC                   1436
Doors                1436
Cylinders            1436
Gears                1436
Quarterly_Tax        1436
Weight               1436
Mfr_Guarantee        1436
BOVAG_Guarantee      1436
Guarantee_Period     1436
ABS                  1436
Airbag_1             1436
Airbag_2             1436
Airco                1436
Automatic_airco      1436
Boardcomputer        1436
CD_Player            1436
Central_Lock         1436
Powered_Windows      1436
Power_Steering       1436
Radio                1436
Mistlamps            1436
Sport_Model          1436
Backseat_Divider     1436
Metallic_Rim         1436
Radio_cassette       1436
Parking_Assistant    1436
T

In [5]:
# convert the categorical data Fuel_type into dummy variables
dummy_Fuel_Type=pd.get_dummies(toyotacorolla_df['Fuel_Type'])

print(dummy_Fuel_Type.head(3))

toyotacorolla_df = pd.concat([toyotacorolla_df,dummy_Fuel_Type],axis=1)

   CNG  Diesel  Petrol
0    0       1       0
1    0       1       0
2    0       1       0


In [13]:
# separate out predictors and response variables
# Do we need to standardize both predictors and outcome or just one of thoese two?
# normalize the data 
X=toyotacorolla_df[['Age_08_04', 'KM', 'CNG','Diesel','Petrol', 'HP', 'Automatic', 'Doors', 'Quarterly_Tax', 'Mfr_Guarantee', 'Guarantee_Period', 'Airco', 'Automatic_airco', 'CD_Player', 'Powered_Windows', 'Sport_Model', 'Tow_Bar']]
y=toyotacorolla_df['Price']


scaler=StandardScaler()
scaler.fit(X)
X=scaler.transform(X)
# we only need to standardize X.

In [17]:
# partition data
X_training, X_valid, y_training, y_valid = train_test_split(X, y, test_size=.4, random_state=12)
print(X_training.shape, X_valid.shape)



(861, 17) (575, 17)


In [15]:
# train neural network with 2 layers of 5 hidden nodes on each layer
# don't forget to bring y_train back to a single array use dataframename.ravel() in the model fit.
toyotacorolla_mlp=MLPRegressor(hidden_layer_sizes=(5,5), max_iter=10000)
toyotacorolla_mlp.fit(X_training, y_training.ravel())

MLPRegressor(hidden_layer_sizes=(5, 5), max_iter=10000)

In [16]:
# Calculate predictions on validation dataset.
# compute model accuracy using .score()
y_predict=toyotacorolla_mlp.predict(X_valid)

print(toyotacorolla_mlp.score(X_valid, y_valid))
print(toyotacorolla_mlp.score(X_training, y_training))

0.8694980465286769
0.9107305097948526


In [23]:
# Network structure
# Hint: sample code given in in class exercise
for i, (weights, intercepts) in enumerate(zip(mlp.coefs_, mlp.intercepts_)):
    print('Hidden layer' if i == 0 else 'Output layer', '{0[0]} => {0[1]}'.format(weights.shape))
    print(' Intercepts:\n ', intercepts)
    print(' Weights:')
    for weight in weights:
        print(' ', weight)
    print()

Hidden layer 2 => 3
 Intercepts:
  [-1.05018984  1.01083091 -0.74328605]
 Weights:
  [ 3.84666335 -3.46430064  2.85409368]
  [ 2.62543266 -2.52332552  2.29352759]

Output layer 3 => 1
 Intercepts:
  [-3.03437227]
 Weights:
  [7.84086078]
  [-7.75163921]
  [5.36739226]



In [17]:
# Can we print out the confusion_matrix and classification_report?
# If you can, show us the print out. If you cannot, can you explain why?
# we can't it's something about using the MLPRegressor vs MLPClassifier

In [18]:
# Interpret your results:
#this model although needs more iterations, can fairly accurately pick the price of car.
# Lesson learned from this excercise:


