## MLP4

### Neural Network Model

## Setup

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass

# TensorFlow ≥2.0 is required
import tensorflow as tf
assert tf.__version__ >= "2.0"

# Common imports
import numpy as np
import pandas as pd
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "NN_deep"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def relu(z):
    return np.maximum(0, z)

def derivative(f, z, eps=0.000001):
    return (f(z + eps) - f(z - eps))/(2 * eps)


z = np.linspace(-5, 5, 200)

plt.figure(figsize=(11,4))

plt.subplot(121)
plt.plot(z, np.sign(z), "r-", linewidth=1, label="Step")
plt.plot(z, sigmoid(z), "g--", linewidth=2, label="Sigmoid")
plt.plot(z, np.tanh(z), "b-", linewidth=2, label="Tanh")
plt.plot(z, relu(z), "m-.", linewidth=2, label="ReLU")
plt.grid(True)
plt.legend(loc="center right", fontsize=14)
plt.title("Activation functions", fontsize=14)
plt.axis([-5, 5, -1.2, 1.2])

### import tensorflow and keras

In [7]:
import tensorflow as tf
from tensorflow import keras

### Data Loading

In [1]:
# read data from CSV file to dataframe
df = pd.read_csv(r"./input/kc_sales_cleaned.csv")
print(df.head)

NameError: name 'pd' is not defined

### Quick view of the data

In [9]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21613 entries, 0 to 21612
Data columns (total 25 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     21613 non-null  int64  
 1   id             21613 non-null  int64  
 2   date           21613 non-null  object 
 3   price          21613 non-null  float64
 4   bedrooms       21613 non-null  int64  
 5   bathrooms      21613 non-null  float64
 6   sqft_living    21613 non-null  int64  
 7   sqft_lot       21613 non-null  int64  
 8   floors         21613 non-null  float64
 9   waterfront     21613 non-null  int64  
 10  view           21613 non-null  int64  
 11  condition      21613 non-null  int64  
 12  grade          21613 non-null  int64  
 13  sqft_above     21613 non-null  int64  
 14  sqft_basement  21613 non-null  int64  
 15  yr_built       21613 non-null  int64  
 16  yr_renovated   21613 non-null  int64  
 17  zipcode        21613 non-null  int64  
 18  lat   

### Training and testing datasets

In [10]:
# Import `train_test_split` from `sklearn.model_selection`
from sklearn.model_selection import train_test_split

# Specify the data 

# We use bedrooms, bathrooms, sqft_living, sqft_lot, and 'most_recent' attributes
X = df[['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'most_recent']]
#y = df['price_range']

print(X[:5])

X = X.values

# Specify the target labels and flatten the array
y = np.ravel(df.price_range)

# first 5 row data for X (attributes) and y (class label)
print(X[:10][:5])
print(y[:5])

# Split the data up in train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


   bedrooms  bathrooms  sqft_living  sqft_lot  most_recent
0         3       1.00         1180      5650         1955
1         3       2.25         2570      7242         1991
2         2       1.00          770     10000         1933
3         4       3.00         1960      5000         1965
4         3       2.00         1680      8080         1987
[[3.000e+00 1.000e+00 1.180e+03 5.650e+03 1.955e+03]
 [3.000e+00 2.250e+00 2.570e+03 7.242e+03 1.991e+03]
 [2.000e+00 1.000e+00 7.700e+02 1.000e+04 1.933e+03]
 [4.000e+00 3.000e+00 1.960e+03 5.000e+03 1.965e+03]
 [3.000e+00 2.000e+00 1.680e+03 8.080e+03 1.987e+03]]
[0 0 0 1 0]


### Standardize the data

In [11]:
# Import `StandardScaler` from `sklearn.preprocessing`
from sklearn.preprocessing import StandardScaler

# Define the scaler 
scaler = StandardScaler().fit(X_train)

# Scale the train set
X_train = scaler.transform(X_train)

# Scale the test set
X_test = scaler.transform(X_test)

## Model Construction

- input layer (5 attributes = 5 units)
- 2 hidden layer (6 units, 6 units)
- output layer (1 unit)

In [12]:
# Initialize the constructor
model = keras.models.Sequential()

# Add the first hidden layer and specify the shape of input layer
model.add(keras.layers.Dense(6, activation='relu', input_shape=(5,)))

# Add 2nd hidden hidden later
model.add(keras.layers.Dense(6, activation='relu'))

# Add an output layer 
model.add(keras.layers.Dense(1, activation='sigmoid'))

### information about the model

In [13]:
# Model output shape
model.output_shape

# Model summary
model.summary()

# Model config
model.get_config()

# List all weight tensors 
model.get_weights()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 6)                 36        
                                                                 
 dense_1 (Dense)             (None, 6)                 42        
                                                                 
 dense_2 (Dense)             (None, 1)                 7         
                                                                 
Total params: 85
Trainable params: 85
Non-trainable params: 0
_________________________________________________________________


[array([[-0.4781261 , -0.33778715,  0.22476727, -0.15862119,  0.1565783 ,
         -0.10338265],
        [ 0.11039698, -0.15762597,  0.60052377, -0.54928106,  0.03220803,
          0.46672982],
        [ 0.20325297, -0.5031246 , -0.72721124,  0.5446814 , -0.44708326,
          0.6209782 ],
        [-0.13168478,  0.7341998 ,  0.6920554 , -0.15457183, -0.46402144,
         -0.0337373 ],
        [-0.48461902,  0.55005676, -0.04535109,  0.43749827, -0.7269376 ,
          0.6717933 ]], dtype=float32),
 array([0., 0., 0., 0., 0., 0.], dtype=float32),
 array([[ 0.59263796,  0.20407099, -0.36138645, -0.63948584, -0.43124217,
         -0.59066045],
        [-0.5717014 , -0.33579034, -0.6152093 , -0.54454756, -0.38181537,
          0.30511564],
        [-0.0787273 , -0.64018077, -0.5137613 , -0.409007  ,  0.25725543,
          0.42909485],
        [ 0.63762814,  0.45585698,  0.12942135, -0.5963216 , -0.16481507,
          0.31138903],
        [-0.4039988 , -0.00677437,  0.40279406,  0.6677291 , 

### compile the model and fit the model to the data

In [14]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
                   
model.fit(X_train, y_train,epochs=3, batch_size=1, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1a90555c610>

### predict values

In [15]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print(y_test[:5])
print(y_pred[:5])

[0 1 1 1 1]
[[0]
 [1]
 [1]
 [1]
 [1]]


### evaluate the model

In [16]:
score = model.evaluate(X_test, y_test,verbose=1)

print(score)

[0.436940461397171, 0.7954577207565308]


In [17]:
# Import the modules from `sklearn.metrics`
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, cohen_kappa_score

# Confusion matrix
cmatrix = confusion_matrix(y_test, y_pred)

print(cmatrix)

# precision
precision = precision_score(y_test, y_pred)

# Recall
recall = recall_score(y_test, y_pred)

# F1 score
f1 = f1_score(y_test,y_pred)

print('precision: {:.4f}\nrecall: {:.4f}\nf1 score: {:.4f}'.format(precision, recall, f1))

[[3888  624]
 [ 835 1786]]
precision: 0.7411
recall: 0.6814
f1 score: 0.7100
