# Machine Learning CA2

# **Boston Housing Dataset - Neural Network using TensorFlow**

## **1. Import Dependencies**

In [0]:

import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
import random

##  2. Load Dataset

In [0]:
boston = load_boston()

## 3. Seperate Data into Features and Labels and load them as a Pandas Dataframe

### 3.1 Features

In [0]:
features_df = pd.DataFrame(np.array(boston.data), columns=[boston.feature_names])
features_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


### 3.2 Labels

In [0]:
labels_df = pd.DataFrame(np.array(boston.target), columns=['labels'])
labels_df.head()

Unnamed: 0,labels
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


### 3.3 Combined Data

In [0]:
# 
combined_data = pd.concat([features_df,labels_df], axis=1)
combined_data.head()

Unnamed: 0,"(CRIM,)","(ZN,)","(INDUS,)","(CHAS,)","(NOX,)","(RM,)","(AGE,)","(DIS,)","(RAD,)","(TAX,)","(PTRATIO,)","(B,)","(LSTAT,)",labels
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


## 4. Train Test Split

### 4.1 Import Library

In [0]:
from sklearn.model_selection import train_test_split

### 4.2  Train Test Split
### Training Data = 80% of Dataset
### Test Data = 20% of Dataset

In [0]:
random.seed( 1000 )
X_train, X_test, y_train, y_test = train_test_split(features_df, labels_df, test_size=0.2)

## 5. Data Pre-Processing

### 5.1 Import Library

In [0]:
from sklearn.preprocessing import StandardScaler

### 5.2 Train Data

### 5.2.1  Define the Preprocessing Method and Fit Training Data to it

In [0]:
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

### 5.2.2. Make X_train to be the Scaled Version of Data
#### This process scales all the values in all 6 columns and replaces them with the new values

In [0]:
X_train = pd.DataFrame(data=scaler.transform(X_train), columns=X_train.columns, index=X_train.index)

### 5.2.3. Converting from Pandas Dataframe to Numpy Arrays

In [0]:
X_train = np.array(X_train)
y_train = np.array(y_train)

### 5.2.4. Get the Type of Training Data

In [0]:
type(X_train), type(y_train)

(numpy.ndarray, numpy.ndarray)

### 5.3. Test Data

### 5.3.1 Define the Preprocessing Method and Fit Test Data to it

In [0]:
scal = StandardScaler()
scal.fit(X_test)

StandardScaler(copy=True, with_mean=True, with_std=True)

### 5.3.2. Make X_test to be the Scaled Version of Data
#### This process scales all the values in all columns and replaces them with the new values

In [0]:
X_test = pd.DataFrame(data=scal.transform(X_test), columns=X_test.columns, index=X_test.index)

### 5.3.3. Converting from Pandas Dataframe to Numpy Arrays

In [0]:
X_test = np.array(X_test)
y_test = np.array(y_test)

### 5.3.4. Get the Type of Test Data

In [0]:
type(X_test), type(y_test)

(numpy.ndarray, numpy.ndarray)

### 5.4. Define Feature Columns

In [0]:
features_df.columns

MultiIndex(levels=[['AGE', 'B', 'CHAS', 'CRIM', 'DIS', 'INDUS', 'LSTAT', 'NOX', 'PTRATIO', 'RAD', 'RM', 'TAX', 'ZN']],
           codes=[[3, 12, 5, 2, 7, 10, 0, 4, 9, 11, 8, 1, 6]])

In [0]:
# Make Feature Columns
feat_cols = [tf.feature_column.numeric_column('x', shape=np.array(X_train).shape[1:])]

### 5.5. Define Input Fuction

In [0]:
input_func = tf.estimator.inputs.numpy_input_fn({'x':X_train}, y_train, batch_size=1, num_epochs=2000, shuffle=True)

### 5.6. Set up Estimator Training Inputs

In [0]:
train_input_func = tf.estimator.inputs.numpy_input_fn(X_train, y_train, batch_size=1, num_epochs=1000, shuffle=False)

### 5.7. Set up Estimator Test Inputs

In [0]:
eval_input_func = tf.estimator.inputs.numpy_input_fn({'x': X_test}, y_test, batch_size=1, num_epochs=1, shuffle=False)

## 6. Build Model

### 6.1. Define DNN Regressor Model

In [0]:
dnn_model = tf.estimator.DNNRegressor(hidden_units=[5,3],feature_columns=feat_cols, optimizer='Adam')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp91evkezl', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f025db8fbe0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


### 6.2. Train the DNN Regressor Estimator

In [0]:
dnn_model.train(input_fn=input_func, steps=2000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp91evkezl/model.ckpt.
INFO:tensorflow:loss = 278.37955, step = 1
INFO:tensorflow:global_step/sec: 561.019
INFO:tensorflow:loss = 27.345854, step = 101 (0.180 sec)
INFO:tensorflow:global_step/sec: 816.737
INFO:tensorflow:loss = 14.871847, step = 201 (0.124 sec)
INFO:tensorflow:global_step/sec: 394.74
INFO:tensorflow:loss = 31.014465, step = 301 (0.257 sec)
INFO:tensorflow:global_step/sec: 787.965
INFO:tensorflow:loss = 14.534007, step = 401 (0.123 sec)
INFO:tensorflow:global_step/sec: 792.667
INFO:tensorflow:loss = 27.826647, step = 501 (0.126 sec)
INFO:tensorflow:global_step/sec: 807.381
INFO:tensorflow:loss = 1.188221, step = 601 (0.123 sec)
INFO:tensorflow:global_step/sec: 853.881
INFO:tensorflow:lo

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor at 0x7f025db8f630>

## 7. Evaluate the Model

In [0]:
dnn_model.evaluate(input_fn=eval_input_func)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-05-22T19:02:45Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp91evkezl/model.ckpt-2000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-05-22-19:02:45
INFO:tensorflow:Saving dict for global step 2000: average_loss = 18.175835, global_step = 2000, label/mean = 21.701962, loss = 18.175835, prediction/mean = 23.492506
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2000: /tmp/tmp91evkezl/model.ckpt-2000


{'average_loss': 18.175835,
 'global_step': 2000,
 'label/mean': 21.701962,
 'loss': 18.175835,
 'prediction/mean': 23.492506}

### 7.1. Predictions

In [0]:
predictions = dnn_model.predict(input_fn=eval_input_func)

In [0]:
pred = list(predictions)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp91evkezl/model.ckpt-2000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


### 7.2. Get Predicted Values

In [0]:
predicted_vals = []

for pred in dnn_model.predict(input_fn=eval_input_func):
    predicted_vals.append(pred['predictions'])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp91evkezl/model.ckpt-2000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [0]:
print(predicted_vals)

[array([24.161057], dtype=float32), array([22.168526], dtype=float32), array([22.517918], dtype=float32), array([14.887156], dtype=float32), array([16.731007], dtype=float32), array([25.640305], dtype=float32), array([14.677034], dtype=float32), array([15.385169], dtype=float32), array([15.48018], dtype=float32), array([17.276558], dtype=float32), array([18.961937], dtype=float32), array([24.169746], dtype=float32), array([23.771873], dtype=float32), array([14.163431], dtype=float32), array([29.586006], dtype=float32), array([27.14098], dtype=float32), array([20.245157], dtype=float32), array([18.265371], dtype=float32), array([24.628494], dtype=float32), array([28.037624], dtype=float32), array([25.407795], dtype=float32), array([16.91374], dtype=float32), array([22.301388], dtype=float32), array([28.156616], dtype=float32), array([18.059528], dtype=float32), array([13.769519], dtype=float32), array([29.142403], dtype=float32), array([16.87007], dtype=float32), array([42.217102], dtyp

## 8. Performance Evalution

### 8.1 Import Library

In [0]:
from sklearn.metrics import mean_squared_error


### 8.2. Calculate the Mean Squared Error

In [0]:
mse = mean_squared_error(predicted_vals, y_test)
print('Mean Squared Error [DNNRegrssor]: ',mse)

Mean Squared Error [DNNRegrssor]:  18.17583817196329


##  9. Improve the Performance by changing Parameters

### 9.1.  Redefine DNN Regressor Model

In [0]:
dnn_model_imp = tf.estimator.DNNRegressor(hidden_units=[10,5,3],feature_columns=feat_cols, optimizer=tf.train.ProximalAdagradOptimizer(
      learning_rate=0.1,
      l1_regularization_strength=0.001
    ))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpsjf_64c2', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f025dbbf080>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


 ### 9.2. Re-train the DNN Regressor Estimator

In [0]:
dnn_model_imp.train(input_fn=input_func, steps=2000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpsjf_64c2/model.ckpt.
INFO:tensorflow:loss = 516.15045, step = 1
INFO:tensorflow:global_step/sec: 590.68
INFO:tensorflow:loss = 33.568157, step = 101 (0.173 sec)
INFO:tensorflow:global_step/sec: 768.469
INFO:tensorflow:loss = 84.73392, step = 201 (0.129 sec)
INFO:tensorflow:global_step/sec: 824.451
INFO:tensorflow:loss = 90.35194, step = 301 (0.123 sec)
INFO:tensorflow:global_step/sec: 826.15
INFO:tensorflow:loss = 7.699654, step = 401 (0.123 sec)
INFO:tensorflow:global_step/sec: 784.394
INFO:tensorflow:loss = 36.362152, step = 501 (0.124 sec)
INFO:tensorflow:global_step/sec: 780.61
INFO:tensorflow:loss = 0.37279916, step = 601 (0.132 sec)
INFO:tensorflow:global_step/sec: 817.517
INFO:tensorflow:loss 

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor at 0x7f025dbdce48>

### 9.3. Re-evaluate the Model

In [0]:
dnn_model_imp.evaluate(input_fn=eval_input_func)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-05-22T19:02:51Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpsjf_64c2/model.ckpt-2000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-05-22-19:02:52
INFO:tensorflow:Saving dict for global step 2000: average_loss = 16.473116, global_step = 2000, label/mean = 21.701962, loss = 16.473116, prediction/mean = 22.228212
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2000: /tmp/tmpsjf_64c2/model.ckpt-2000


{'average_loss': 16.473116,
 'global_step': 2000,
 'label/mean': 21.701962,
 'loss': 16.473116,
 'prediction/mean': 22.228212}

### 9.4. New Predictions

In [0]:
new_predictions = dnn_model_imp.predict(input_fn=eval_input_func)

In [0]:
new_pred = list(new_predictions)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpsjf_64c2/model.ckpt-2000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


### 9.5. New Predicted values

In [0]:
new_predicted_vals = []

for new_pred in dnn_model_imp.predict(input_fn=eval_input_func):
    new_predicted_vals.append(new_pred['predictions'])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpsjf_64c2/model.ckpt-2000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [0]:
print(new_predicted_vals)

[array([22.968992], dtype=float32), array([18.19383], dtype=float32), array([19.70009], dtype=float32), array([17.335318], dtype=float32), array([19.628239], dtype=float32), array([23.19961], dtype=float32), array([14.620789], dtype=float32), array([13.659842], dtype=float32), array([17.126797], dtype=float32), array([15.946292], dtype=float32), array([18.318806], dtype=float32), array([26.773472], dtype=float32), array([23.169434], dtype=float32), array([13.317824], dtype=float32), array([36.002567], dtype=float32), array([24.681437], dtype=float32), array([17.331697], dtype=float32), array([15.443076], dtype=float32), array([20.445696], dtype=float32), array([33.89844], dtype=float32), array([30.25496], dtype=float32), array([16.963692], dtype=float32), array([19.985613], dtype=float32), array([28.58043], dtype=float32), array([16.304296], dtype=float32), array([10.426877], dtype=float32), array([27.521208], dtype=float32), array([13.336765], dtype=float32), array([33.581894], dtype=

### 9.6. Re-calculate the Mean Squared Error

In [0]:
new_mse = mean_squared_error(new_predicted_vals, y_test)
print('Improved Mean Squared Error [DNNRegrssor]: ',new_mse)

Improved Mean Squared Error [DNNRegrssor]:  16.47311791925539


## 10. Compare Performace

In [0]:
print('Old Mean Squared Error: ',mse)
print('New Mean Squared Error: ',new_mse)

Old Mean Squared Error:  18.17583817196329
New Mean Squared Error:  16.47311791925539
