# Introduction to deep learning

## Using keras package:

Open a shell and install Keras.

For anaconda: conda install keras

For standard python: pip install keras

In [2]:
import keras

Using TensorFlow backend.


Loading IMDB data:

In [426]:
import numpy as np
import pandas as pd

df = pd.read_csv("../Lecture-4/labeledTrainData.tsv", header=0, delimiter="\t", quoting=0)

#First shuffle data
df = df.sample(frac=1, random_state= 100)

Creating the training and the testing data:

In [427]:
X_train_text = df['review'][:20000]
y_train = df['sentiment'][:20000]
#y_train = keras.utils.to_categorical(y_train)
X_valid_text = df['review'][20000:]
y_valid = df['sentiment'][20000:]

print(y_train.shape, y_valid.shape)

(20000,) (5000,)


Vecotizing the data:

In [428]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

#vect = CountVectorizer(binary = False, min_df = 10)
vect = TfidfVectorizer(min_df = 10)

X_train = vect.fit_transform(X_train_text)
print(X_train.shape)
X_valid = vect.transform(X_valid_text)

(20000, 16414)


Training logistic regression as the baseline:

In [429]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics  import accuracy_score

LR = LogisticRegression(C = 4.1)
LR.fit(X_train, y_train)
p_valid = LR.predict(X_valid)
print(accuracy_score(y_valid, p_valid))

0.8944


## Creating 2 layers MLP:

In [76]:
from keras.models import Sequential
from keras.layers import Dense, Activation

model = Sequential([
    Dense(32, activation = 'relu', input_shape=(X_train.shape[1],)),
    Dense(1, activation = 'sigmoid'),
])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_28 (Dense)             (None, 32)                525280    
_________________________________________________________________
dense_29 (Dense)             (None, 1)                 33        
Total params: 525,313
Trainable params: 525,313
Non-trainable params: 0
_________________________________________________________________


Compile the model using SGD with learning rate as .1:

In [78]:
# For a binary classification problem
opt = keras.optimizers.SGD(lr=0.1)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

Training the model with batch_size = 1024 for 5 epochs:

In [79]:
model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs=5, batch_size=1024)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2360ae07a90>

Training using Adam optimizer:

In [84]:
model = Sequential([
    Dense(32, activation = 'relu', input_shape=(X_train.shape[1],)),
    Dense(1, activation = 'sigmoid'),
])

# For a binary classification problem
opt = keras.optimizers.Adam(lr=0.1)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs=5, batch_size=1024)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x23612bf6dd8>

Using tanh as activation function:

In [86]:
model = Sequential([
    Dense(32, activation = 'tanh', input_shape=(X_train.shape[1],)),
    Dense(1, activation = 'sigmoid'),
])

# For a binary classification problem
opt = keras.optimizers.Adam(lr=0.1)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs=5, batch_size=1024)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2360bc7bac8>

Adding dropout layer:

In [312]:
from keras.layers import Dropout

model = Sequential([
    Dense(32, activation = 'relu', input_shape=(X_train.shape[1],)),
    Dropout(.8),
    Dense(1, activation = 'sigmoid'),
])

# For a binary classification problem
opt = keras.optimizers.Adam(lr=0.1)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs=5, batch_size=1024)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x236786bc240>

Increasing number of cells:

In [313]:
model = Sequential([
    Dense(64, activation = 'relu', input_shape=(X_train.shape[1],)),
    Dropout(.9),
    Dense(1, activation = 'sigmoid'),
])

# For a binary classification problem
opt = keras.optimizers.Adam(lr=0.1)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs=5, batch_size=1024)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x236d841ee48>

In [381]:
from keras.layers import BatchNormalization

model = Sequential([
    Dense(64, activation = 'relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(.5),
    Dense(1, activation = 'sigmoid'),
])

# For a binary classification problem
opt = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs=5, batch_size=1024)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x236dcf25e80>

Adding more layers:

In [430]:
model = Sequential([
    Dense(64, activation = 'relu', input_shape=(X_train.shape[1],)),
    Dropout(.8),
    Dense(8, activation = 'tanh'),
    Dense(1, activation = 'sigmoid'),
])

# For a binary classification problem
opt = keras.optimizers.Adam(lr=0.1)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs=5, batch_size=1024)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x23677f7ffd0>

## Getting word vector:

In [432]:
word2vec = model.get_weights()[0]
word2vec.shape

(16414, 64)

Compute the distance between words using cosine distance:

In [367]:
from scipy.spatial.distance import cdist
dist = cdist(word2vec, word2vec, 'cosine')

Print closest and farthest words:

In [375]:
vocab = np.array(vect.get_feature_names())

words = ['love', 'amazing', 'bad', 'horrible']
for word in words:
    A = vocab[dist[vect.vocabulary_[word]].argsort()[1:]]
    print(word, A)


love ['great' 'well' 'best' ..., 'worst' 'awful' 'horrible']
amazing ['excellent' 'wonderful' 'best' ..., 'waste' 'mess' 'awful']
bad ['worst' 'awful' 'unfunny' ..., 'wonderful' 'best' 'great']
horrible ['waste' 'worst' 'awful' ..., 'excellent' 'wonderful' 'great']


## Multi-class

Loading 20 news groups:

In [433]:
from sklearn.datasets import fetch_20newsgroups

cats = ['sci.space', 'talk.politics.mideast', 'comp.graphics']
newsgroups_train = fetch_20newsgroups(subset='train', categories=cats, remove=('headers', 'footers', 'quotes'), shuffle = True)
train_data = newsgroups_train.data

newsgroups_valid = fetch_20newsgroups(subset='test', categories=cats, remove=('headers', 'footers', 'quotes'), shuffle = True)
valid_data = newsgroups_valid.data

Using count vectorizer to create features:

In [434]:
from sklearn.feature_extraction.text import CountVectorizer

count = CountVectorizer(binary = False, min_df = 5)

X_train = count.fit_transform(train_data)
X_valid = count.transform(valid_data)
print(X_train.shape, X_valid.shape)


(1741, 5899) (1159, 5899)


For categorical classification, the target (y) must be converted to one-hot-encoder:

In [436]:
y_train = keras.utils.to_categorical(newsgroups_train.target,)
y_valid = keras.utils.to_categorical(newsgroups_valid.target)
print(y_train.shape, y_valid.shape)

(1741, 3) (1159, 3)


Creating the MLP with 3 sofmax outputs and start training:

In [437]:
model = Sequential([
    Dense(128, activation = 'relu', input_shape=(X_train.shape[1],)),
    Dropout(.2),
    Dense(3, activation = 'softmax'),
])

# For a binary classification problem
opt = keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs=5, batch_size=256)

Train on 1741 samples, validate on 1159 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x236b991eda0>

## Saving and loading model weights:

In [438]:
model.save('model.h5')

Loading weights:

In [444]:
from keras.models import load_model

model = load_model('model.h5')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_164 (Dense)            (None, 128)               755200    
_________________________________________________________________
dropout_52 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_165 (Dense)            (None, 3)                 387       
Total params: 755,587
Trainable params: 755,587
Non-trainable params: 0
_________________________________________________________________


Auto saving the model based on best validation accuracy:

In [446]:
model = Sequential([
    Dense(128, activation = 'relu', input_shape=(X_train.shape[1],)),
    Dropout(.2),
    Dense(3, activation = 'softmax'),
])

# For a binary classification problem
opt = keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

checkpoint = keras.callbacks.ModelCheckpoint('model.h5', monitor='val_acc', save_best_only=True, verbose = True)

model.fit(X_train, y_train, validation_data = (X_valid, y_valid), epochs=10, batch_size=256, callbacks = [checkpoint])

Train on 1741 samples, validate on 1159 samples
Epoch 1/10
Epoch 00001: val_acc improved from -inf to 0.76963, saving model to model.h5
Epoch 2/10
Epoch 00002: val_acc improved from 0.76963 to 0.86454, saving model to model.h5
Epoch 3/10
Epoch 00003: val_acc improved from 0.86454 to 0.88266, saving model to model.h5
Epoch 4/10
Epoch 00004: val_acc improved from 0.88266 to 0.89129, saving model to model.h5
Epoch 5/10
Epoch 00005: val_acc did not improve
Epoch 6/10
Epoch 00006: val_acc did not improve
Epoch 7/10
Epoch 00007: val_acc did not improve
Epoch 8/10
Epoch 00008: val_acc did not improve
Epoch 9/10
Epoch 00009: val_acc did not improve
Epoch 10/10
Epoch 00010: val_acc did not improve


<keras.callbacks.History at 0x236c1747be0>