# Building Deep Learning Models with Keras

# Importing Libraries

In [1]:
# Import necessary modules
import pandas as pd
import numpy as np

import keras
!pip install keras-metrics
import keras_metrics as km

from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical

from keras.metrics import Precision, Recall

Using TensorFlow backend.


Collecting keras-metrics
  Downloading https://files.pythonhosted.org/packages/32/c9/a87420da8e73de944e63a8e9cdcfb1f03ca31a7c4cdcdbd45d2cdf13275a/keras_metrics-1.1.0-py2.py3-none-any.whl
Installing collected packages: keras-metrics
Successfully installed keras-metrics-1.1.0


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
path = '/content/drive/My Drive/Data Science/DataCamp/Datasets/'

# Regression Model

## Reading Data

In [0]:
df = pd.read_csv(path+'hourly_wages.csv')

In [5]:
df.head()

Unnamed: 0,wage_per_hour,union,education_yrs,experience_yrs,age,female,marr,south,manufacturing,construction
0,5.1,0,8,21,35,1,1,0,1,0
1,4.95,0,9,42,57,1,1,0,1,0
2,6.67,0,12,1,19,0,0,0,1,0
3,4.0,0,12,4,22,0,0,0,0,0
4,7.5,0,12,17,35,0,1,0,0,0


In [6]:
df.describe()

Unnamed: 0,wage_per_hour,union,education_yrs,experience_yrs,age,female,marr,south,manufacturing,construction
count,534.0,534.0,534.0,534.0,534.0,534.0,534.0,534.0,534.0,534.0
mean,9.024064,0.179775,13.018727,17.822097,36.833333,0.458801,0.655431,0.292135,0.185393,0.044944
std,5.139097,0.38436,2.615373,12.37971,11.726573,0.498767,0.475673,0.45517,0.388981,0.207375
min,1.0,0.0,2.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0
25%,5.25,0.0,12.0,8.0,28.0,0.0,0.0,0.0,0.0,0.0
50%,7.78,0.0,12.0,15.0,35.0,0.0,1.0,0.0,0.0,0.0
75%,11.25,0.0,15.0,26.0,44.0,1.0,1.0,1.0,0.0,0.0
max,44.5,1.0,18.0,55.0,64.0,1.0,1.0,1.0,1.0,1.0


In [7]:
df.shape

(534, 10)

## Preparing Input Data

In [8]:
predictors = df.drop('wage_per_hour', axis=1).values
target = df.wage_per_hour.values

print(predictors.shape, target.shape)

(534, 9) (534,)


In [9]:
predictors

array([[ 0,  8, 21, ...,  0,  1,  0],
       [ 0,  9, 42, ...,  0,  1,  0],
       [ 0, 12,  1, ...,  0,  1,  0],
       ...,
       [ 1, 17, 25, ...,  0,  0,  0],
       [ 1, 12, 13, ...,  1,  0,  0],
       [ 0, 16, 33, ...,  0,  1,  0]])

In [10]:
target[0:5]

array([5.1 , 4.95, 6.67, 4.  , 7.5 ])

## Specifying the Model

In [0]:
# Save the number of columns in predictors: n_cols
n_cols = predictors.shape[1]

In [0]:
# Set up the model:
model = Sequential()
model.add(Dense(50, activation='relu', input_shape=(n_cols,)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

In [13]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 50)                500       
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1632      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
Total params: 2,165
Trainable params: 2,165
Non-trainable params: 0
_________________________________________________________________


## Compiling the Model

In [0]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [15]:
# Verify that model contains information from compiling
print("Loss function: " + model.loss)

Loss function: mean_squared_error


## Fitting the Model

In [16]:
# Fit the model
model.fit(predictors, target, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f6327b03dd8>

# Classification Model

## Reading Data

In [0]:
df = pd.read_csv(path+'titanic_all_numeric.csv')

In [18]:
df.head()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare,male,age_was_missing,embarked_from_cherbourg,embarked_from_queenstown,embarked_from_southampton
0,0,3,22.0,1,0,7.25,1,False,0,0,1
1,1,1,38.0,1,0,71.2833,0,False,1,0,0
2,1,3,26.0,0,0,7.925,0,False,0,0,1
3,1,1,35.0,1,0,53.1,0,False,0,0,1
4,0,3,35.0,0,0,8.05,1,False,0,0,1


In [19]:
df.describe()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare,male,embarked_from_cherbourg,embarked_from_queenstown,embarked_from_southampton
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208,0.647587,0.188552,0.08642,0.722783
std,0.486592,0.836071,13.002015,1.102743,0.806057,49.693429,0.47799,0.391372,0.281141,0.447876
min,0.0,1.0,0.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,2.0,22.0,0.0,0.0,7.9104,0.0,0.0,0.0,0.0
50%,0.0,3.0,29.699118,0.0,0.0,14.4542,1.0,0.0,0.0,1.0
75%,1.0,3.0,35.0,1.0,0.0,31.0,1.0,0.0,0.0,1.0
max,1.0,3.0,80.0,8.0,6.0,512.3292,1.0,1.0,1.0,1.0


In [20]:
df.shape

(891, 11)

## Preparing Input Data

In [21]:
predictors = df.drop('survived', axis=1).values

# Convert the target to categorical: target
target = to_categorical(df.survived)

print(predictors.shape, target.shape)

(891, 10) (891, 2)


In [22]:
predictors

array([[3, 22.0, 1, ..., 0, 0, 1],
       [1, 38.0, 1, ..., 1, 0, 0],
       [3, 26.0, 0, ..., 0, 0, 1],
       ...,
       [3, 29.69911764705882, 1, ..., 0, 0, 1],
       [1, 26.0, 0, ..., 1, 0, 0],
       [3, 32.0, 0, ..., 0, 1, 0]], dtype=object)

In [23]:
target[0:5]

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.]], dtype=float32)

## Specifying the Model

In [0]:
# Save the number of columns in predictors: n_cols
n_cols = predictors.shape[1]

In [0]:
# Set up the model:
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(n_cols,)))
model.add(Dense(2, activation='softmax'))

In [26]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 32)                352       
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 66        
Total params: 418
Trainable params: 418
Non-trainable params: 0
_________________________________________________________________


## Compiling the Model

In [0]:
# https://en.wikipedia.org/wiki/Stochastic_gradient_descent
# Stochastic gradient descent

# Stochastic gradient descent (often abbreviated SGD) is an iterative method for optimizing an 
# objective function with suitable smoothness properties (e.g. differentiable or subdifferentiable). 
# It can be regarded as a stochastic approximation of gradient descent optimization, since it replaces 
# the actual gradient (calculated from the entire data set) by 
# an estimate thereof (calculated from a randomly selected subset of the data).

# [1] Especially in big data applications this reduces the computational burden, 
# achieving faster iterations in trade for a slightly lower convergence rate.

In [28]:
# Compile the model
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy', km.precision(), km.recall()])

tracking <tf.Variable 'Variable:0' shape=() dtype=int32, numpy=0> tp
tracking <tf.Variable 'Variable:0' shape=() dtype=int32, numpy=0> fp
tracking <tf.Variable 'Variable:0' shape=() dtype=int32, numpy=0> tp
tracking <tf.Variable 'Variable:0' shape=() dtype=int32, numpy=0> fn


## Fitting the Model

In [29]:
# Fit the model
model.fit(predictors, target, epochs=10, verbose=2)

Epoch 1/10
 - 0s - loss: 2.6944 - accuracy: 0.5971 - precision: 0.6684 - recall: 0.6509
Epoch 2/10
 - 0s - loss: 1.0994 - accuracy: 0.6296 - precision: 0.6622 - recall: 0.7318
Epoch 3/10
 - 0s - loss: 0.6544 - accuracy: 0.6644 - precision: 0.6872 - recall: 0.8803
Epoch 4/10
 - 0s - loss: 0.6362 - accuracy: 0.6734 - precision: 0.6422 - recall: 0.8635
Epoch 5/10
 - 0s - loss: 0.6284 - accuracy: 0.6779 - precision: 0.6490 - recall: 0.8802
Epoch 6/10
 - 0s - loss: 0.6250 - accuracy: 0.6801 - precision: 0.6848 - recall: 0.8635
Epoch 7/10
 - 0s - loss: 0.6135 - accuracy: 0.6914 - precision: 0.7000 - recall: 0.9095
Epoch 8/10
 - 0s - loss: 0.6013 - accuracy: 0.6925 - precision: 0.6738 - recall: 0.8597
Epoch 9/10
 - 0s - loss: 0.6001 - accuracy: 0.6824 - precision: 0.6922 - recall: 0.8778
Epoch 10/10
 - 0s - loss: 0.6021 - accuracy: 0.6869 - precision: 0.7172 - recall: 0.8794


<keras.callbacks.callbacks.History at 0x7f632178bb70>

## Making Predictions

In [0]:
pred_data = pd.read_csv(path+'titanic_pred_data.csv', header=None, dtype=None).values

In [31]:
pred_data[0:5]

array([[2, 34.0, 0, 0, 13.0, 1, False, 0, 0, 1],
       [2, 31.0, 1, 1, 26.25, 0, False, 0, 0, 1],
       [1, 11.0, 1, 2, 120.0, 1, False, 0, 0, 1],
       [3, 0.42, 0, 1, 8.5167, 1, False, 1, 0, 0],
       [3, 27.0, 0, 0, 6.975, 1, False, 0, 0, 1]], dtype=object)

In [32]:
# Calculate predictions: predictions
predictions = model.predict(pred_data)

# Calculate predicted probability of survival: predicted_prob_true
predicted_prob_true = predictions[:,1]

# print predicted_prob_true
print(predicted_prob_true)

[0.24886519 0.38357055 0.4059645  0.4439392  0.22969285 0.2082437
 0.06228171 0.3455494  0.21138734 0.51220745 0.24648583 0.28313595
 0.21676028 0.40583596 0.21429652 0.13560444 0.30941832 0.43221644
 0.12609424 0.37360138 0.5255678  0.24653621 0.06602918 0.30865693
 0.4089812  0.21917659 0.45425442 0.44341177 0.22748882 0.55751723
 0.4276729  0.4198093  0.23973085 0.27049148 0.30954364 0.5578874
 0.29300177 0.22677127 0.45000836 0.4580414  0.29436722 0.35873428
 0.48857406 0.19153236 0.3184986  0.13880649 0.40886387 0.20179334
 0.40406737 0.5978724  0.3913495  0.02752903 0.47201696 0.4683394
 0.3090339  0.34628037 0.54909956 0.26299247 0.39095157 0.23973085
 0.18050353 0.31981575 0.3142377  0.44051033 0.32263768 0.21965693
 0.3697121  0.48271683 0.24106853 0.4078408  0.24657063 0.46596697
 0.18551725 0.12320738 0.43306524 0.40144756 0.30721605 0.3004505
 0.22528654 0.55238354 0.42248228 0.19565742 0.33800486 0.27131256
 0.24579722 0.32205662 0.3032639  0.4936041  0.3585183  0.46583527