In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [2]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Embedding, LSTM
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam, Adadelta
from tensorflow.keras.preprocessing.text import Tokenizer

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.utils import resample

tf.__version__

'2.1.0'

In [None]:
banknotes = pd.read_csv('./banknotes.csv')
mnist = pd.read_csv('./mnist_train.csv', nrows=3000, header=None)

# Tensors, layers, and autoencoders

## It's a flow of tensors

In [None]:
#use banknotes dataset here
print(banknotes.shape)
banknotes.head()

In [None]:
# preprocess data
X_bank = banknotes.drop('class', axis=1).to_numpy(float)
y_bank = banknotes['class'].to_numpy(int)

X_train_bank, X_test_bank, y_train_bank, y_test_bank = train_test_split(X_bank, y_bank,
                                                                        test_size=0.3,
                                                                        stratify=y_bank,
                                                                        random_state=42)

# scale data
s = StandardScaler()
X_train_bank = s.fit_transform(X_train_bank)
X_test_bank = s.transform(X_test_bank)

In [None]:
# define model
model = Sequential()

model.add(Dense(2, input_shape=(4,), activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
model.get_weights()

In [None]:
# get input tensor from 1st layer of model
inp = model.layers[0].input

# get output tensor from 1st layer of model
out = model.layers[0].output

# define a function from inputs to outputs
inp_to_out = K.function([inp], [out])

# print results of passing X_test through the 1st layer
print(inp_to_out([X_test_bank])[0].shape)

## Neural separation

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(15, 8))

for i in range(0, 21):
    # train model for 1 epoch
    h = model.fit(X_train_bank, y_train_bank,
                  batch_size=16,
                  epochs=1,
                  verbose=0)
    
    if i%4==0:
        # get output at first layer
        layer_output = inp_to_out([X_test_bank])[0]
        
        # eval model acc for this epoch
        test_acc = model.evaluate(X_test_bank, y_test_bank)[1]
        
        # plot 1st vs 2nd neuron output
        axs.flatten()[i//4].scatter(layer_output[:, 0], layer_output[:, 1],
                          c=y_test_bank, cmap='rainbow')
        axs.flatten()[i//4].set(title=f'Epoch:{i}, Test Acc: {test_acc: .1%}')

plt.show()

If you take a look at the graphs you can see how the neurons are learning to spread out the inputs based on whether they are fake or legit dollar bills (A single fake dollar bill is represented as a purple dot in the graph). At the start the outputs are closer to each other, the weights are learned as epochs go by so that fake and legit dollar bills get a different, further and further apart output.

## Building an autoencoder

In [None]:
# use mnist dataset for this one

# define autoencoder arch
autoencoder = Sequential()

autoencoder.add(Dense(32, input_shape=(784,), activation='relu'))
autoencoder.add(Dense(784, activation='sigmoid'))

autoencoder.compile(optimizer=Adadelta(lr=1.0),
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

autoencoder.summary()

## De-noising like an autoencoder

In [None]:
X_mnist = mnist.iloc[:, 1:].to_numpy(np.float32) / 255
y_mnist = mnist.iloc[:, 0].to_numpy(np.int32)

X_train_mnist, X_test_mnist, y_train_mnist, y_test_mnist = train_test_split(X_mnist, y_mnist,
                                                                            test_size=0.1,
                                                                            stratify=y_mnist,
                                                                            random_state=42)

# add noise to test data
noise = 0.2 * np.random.normal(size=X_test_mnist.shape)
X_test_mnist = np.clip(X_test_mnist + noise, 0, 1)

In [None]:
# train model
early_stop = EarlyStopping(monitor='loss',
                           patience=3)

autoencoder.fit(X_train_mnist, X_train_mnist,
                epochs=100,
                batch_size=256,
                callbacks=[early_stop],
                verbose=2)

In [None]:
def show_encodings(encodings, number):
    '''
    Displays first 5 encodings of noisy images from the 
    MNIST Test Data given a chosen number.
    '''
    idx = np.where(y_test_mnist==number)[0][:5]
    
    test_ims = X_test_mnist[idx]
    encoding_ims = encodings[idx]
    
    fig, axs = plt.subplots(2, 5, figsize=(15, 6))
    
    for ax, (t_im, e_im) in enumerate(zip(test_ims, encoding_ims)):
        axs[0, ax].imshow(t_im.reshape(28, 28), 
                          cmap='gray')
        axs[1, ax].imshow(np.tile(e_im, (32, 1)),
                          cmap='gray')
        
        axs[0, ax].grid(False)
        axs[0, ax].axis('off')
        axs[1, ax].grid(False)
        axs[1, ax].axis('off')
    
    axs[0, 2].set(title='Noisy Images')
    axs[1, 2].set(title='Encodings')
    plt.show()
    
def compare_ims(noisy, decoded):
    '''
    Display noisy and decoded images side by side
    '''
    idx = np.random.choice(noisy.shape[0], 
                           size=5, 
                           replace=False)
    noisy_ims = noisy[idx]
    decoded_ims = decoded[idx]
    
    fig, axs = plt.subplots(2, 5, figsize=(15, 6))
    
    for ax, (n_im, d_im) in enumerate(zip(noisy_ims, decoded_ims)):
        axs[0, ax].imshow(n_im.reshape(28, 28), 
                          cmap='gray')
        axs[1, ax].imshow(d_im.reshape(28, 28),
                          cmap='gray')
                          
        axs[0, ax].grid(False)
        axs[0, ax].axis('off')
        axs[1, ax].grid(False)
        axs[1, ax].axis('off')
                          
    axs[0, 2].set(title='Noisy Images')
    axs[1, 2].set(title='Decoded Images')
    plt.show()

In [None]:
# extract encoder of autoencoder thru its first layer
encoder = Sequential()
encoder.add(autoencoder.layers[0])

# encode noisy images and show the encodings
encodings = encoder.predict(X_test_mnist)
show_encodings(encodings=encodings,
               number=2)

In [None]:
# predict on noisy images with autoencoder
decoded_imgs = autoencoder.predict(X_test_mnist)

compare_ims(X_test_mnist, decoded_imgs)

# Intro to CNNs

## Building a CNN model

In [None]:
# define model arch
model = Sequential()

model.add(Conv2D(32, 
                 kernel_size=3,
                 input_shape=(28, 28, 1),
                 activation='relu'))
model.add(Conv2D(16,
                 kernel_size=3,
                 activation='relu'))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
# i'll just use the test data bc 60k is a lot for this simple case
(_, _), (X_mnist2d, y_mnist2d) = tf.keras.datasets.mnist.load_data()

In [None]:
# preprocess data
X_mnist2d = X_mnist2d.astype('float32') / 255
X_mnist2d = np.expand_dims(X_mnist2d, -1) # make sure input shape is (28, 28, 1)

y_mnist2d = to_categorical(y_mnist2d)

In [None]:
X_tr_m2d, X_tt_m2d, y_tr_m2d, y_tt_m2d = train_test_split(X_mnist2d, y_mnist2d,
                                                          test_size=0.1,
                                                          stratify=y_mnist2d,
                                                          random_state=42)

In [None]:
print(f'{X_tr_m2d.shape[0]} train samples with dims {X_tr_m2d.shape[1:]}')
print(f'{X_tt_m2d.shape[0]} test samples with dims {X_tt_m2d.shape[1:]}')

In [None]:
# train model
early_stop = EarlyStopping(monitor='val_loss',
                           patience=2)

model.fit(X_tr_m2d, y_tr_m2d,
          epochs=1000,
          batch_size=256,
          validation_split=0.2,
          callbacks=[early_stop],
          verbose=2)

## Looking at convolutions

In [None]:
# obtain a reference to the outputs of the first layer
layer1_out = model.layers[0].output

# build a model using the model's input and first layer output
layer1_model = Model(inputs=model.layers[0].input,
                     outputs=layer1_out)

# use this model to pred on test data
activations = layer1_model.predict(X_tt_m2d)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(6, 3))

ax1.matshow(activations[0, :, :, 14],
            cmap='viridis')
ax2.matshow(activations[0, :, :, 17],
            cmap='viridis')
ax1.grid(False)
ax2.grid(False)
ax1.axis('off')
ax2.axis('off')
ax1.set(title='Conv at 15th filter')
ax2.set(title='Conv at 18th filter')

plt.show()

## Preparing your input image

In [None]:
# we will use a pretrained resnet50 here
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, ResNet50, decode_predictions

In [None]:
# load image with the right target size for model
img = image.load_img('./naz.jpg', target_size=(224, 224))

# turn im into array
img_array = image.img_to_array(img)

plt.imshow(img_array / 255)
plt.grid(False)
plt.axis('off')
plt.title('Our dog Naz')
plt.show()

# expand dims of image
img_expanded = np.expand_dims(img_array, axis=0)

# pre-process img in the same way original images were
img_ready = preprocess_input(img_expanded)

## Using a real world model

In [None]:
# instantiate a ResNet50 model with 'imagenet' weights
model = ResNet50(weights='imagenet')

# predict with ResNet50 on prepped img
preds = model.predict(img_ready)

# decode first 3 preds
print(f'Predicted: {decode_predictions(preds, top=3)[0]}')

Naz is a Labrador Retriever but I searched and he does look like a Great Pyrenees on some angles. Cute dogssssss.

# Intro to LSTMs

## Text predictions with LSTMs

In [3]:
text = 'it is not the strength of the body but the strength of the spirit it is useless to meet revenge with revenge it will heal nothing even the smallest person can change the course of history all we have to decide is what to do with the time that is given us the burned hand teaches best after that advice about fire goes to the heart'

In [4]:
# split text into an array of words
words = text.split()

# make sentences of 4 words each, moving one word at a time
sentences = []
for i in range(4, len(words)):
    sentences.append(' '.join(words[i-4: i]))
    
# instantiate a Tokenizer, then fit it on the sentences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)

# turn sentences into a sequence of numbers
sequences = tokenizer.texts_to_sequences(sentences)
print(f'Sentences:\n{sentences[:5]}\nSequences:\n{sequences[:5]}')

Sentences:
['it is not the', 'is not the strength', 'not the strength of', 'the strength of the', 'strength of the body']
Sequences:
[[5, 2, 42, 1], [2, 42, 1, 6], [42, 1, 6, 4], [1, 6, 4, 1], [6, 4, 1, 10]]


## Build your LSTM model

In [5]:
vocab_size = len(tokenizer.index_word) + 1 # account for the 0th index (special chars)

In [33]:
# define model
model = Sequential()

model.add(Embedding(input_dim=vocab_size, input_length=3, output_dim=8)) # turn words into vectors
model.add(LSTM(32))
model.add(Dense(32, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))

model.summary()

model.compile(optimizer=Adam(0.1),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 3, 8)              352       
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                5248      
_________________________________________________________________
dense_4 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_5 (Dense)              (None, 44)                1452      
Total params: 8,108
Trainable params: 8,108
Non-trainable params: 0
_________________________________________________________________


In [34]:
X = []
y = []
for i in range(len(sequences)):
    X.append(sequences[i][:3])
    y.append(sequences[i][3])
    
X = np.array(X)
y = np.array(y)
y = to_categorical(y, 44)

In [35]:
model.fit(X, y,
          epochs=100,
          verbose=2)

Train on 62 samples
Epoch 1/100
62/62 - 2s - loss: 3.8315 - accuracy: 0.0161
Epoch 2/100
62/62 - 0s - loss: 3.7939 - accuracy: 0.1129
Epoch 3/100
62/62 - 0s - loss: 3.4509 - accuracy: 0.1452
Epoch 4/100
62/62 - 0s - loss: 3.1817 - accuracy: 0.1452
Epoch 5/100
62/62 - 0s - loss: 2.7797 - accuracy: 0.2097
Epoch 6/100
62/62 - 0s - loss: 2.5463 - accuracy: 0.2258
Epoch 7/100
62/62 - 0s - loss: 2.3511 - accuracy: 0.2581
Epoch 8/100
62/62 - 0s - loss: 1.9126 - accuracy: 0.3065
Epoch 9/100
62/62 - 0s - loss: 1.6205 - accuracy: 0.4839
Epoch 10/100
62/62 - 0s - loss: 1.2067 - accuracy: 0.5645
Epoch 11/100
62/62 - 0s - loss: 1.0957 - accuracy: 0.5968
Epoch 12/100
62/62 - 0s - loss: 0.6913 - accuracy: 0.7581
Epoch 13/100
62/62 - 0s - loss: 0.5514 - accuracy: 0.8226
Epoch 14/100
62/62 - 0s - loss: 0.4032 - accuracy: 0.8387
Epoch 15/100
62/62 - 0s - loss: 0.3237 - accuracy: 0.8548
Epoch 16/100
62/62 - 0s - loss: 0.1854 - accuracy: 0.9516
Epoch 17/100
62/62 - 0s - loss: 0.0960 - accuracy: 0.9839
Epo

<tensorflow.python.keras.callbacks.History at 0x1a479901148>

In [36]:
def predict_text(test_text, model=model):
    if len(test_text.split()) != 3:
        print('Text input should be 3 words!')
        return False
    
    # Turn test_text into sequence of numbers
    test_seq = tokenizer.texts_to_sequences([test_text])
    test_seq = np.array(test_seq)
    
    # use model passed as a parameter to predict the next word
    pred = model.predict(test_seq).argmax(axis=1)[0]
    
    # return word that maps to the prediction
    return tokenizer.index_word[pred]

In [40]:
predict_text('strength of the')

'spirit'