<a href="https://colab.research.google.com/github/sangeetsaurabh/PyTorch_Keras_Experiment/blob/master/Text_Number_Prediction/Hybrid_between_PyTorch_and_Keras_Tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Hybrid Keras and Tensorflow Model

In this notebook, embeddings are generated using Keras model. Pytorch LSTM model uses Keras generated embeddings.


#### Data Preparation

In [2]:
#### Make sure that the right version of Torch is there
!pip install torchtext==0.6.0
import torchtext
print(torchtext.__version__)

0.6.0


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
#drive.flush_and_unmount

In [0]:
#### Setting up the right seed to make Keras result more consistent
import numpy as np
import tensorflow as tf
import random as python_random

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.
np.random.seed(123)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.
python_random.seed(123)

# The below set_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
# For further details, see:
# https://www.tensorflow.org/api_docs/python/tf/random/set_seed
tf.random.set_seed(1234)


In [0]:
#### Setting up path to import important data preparation Python module
import sys
import os
sys.path.append('/content/drive/My Drive/Colab Notebooks/torch_pipe/')

In [6]:
os.getcwd()

'/content'

In [0]:
#### Using torch utilities to prepare the features. Importing all the important files
import torch
import torch.nn as nn
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from Util.human_language_modeling import *
from torch.utils.data import DataLoader
import torch.nn.functional as F
import time
import logging

In [0]:
#### Enabling logging
import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='mylog.log', mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)

In [0]:
#### Setting up the batch size and length of the sequence
BATCH_SIZE = 64 ## defining the batch size
bptt = 20 ## back propogration through LSTM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [0]:
### A simplie python function to show text given an array of vectors
def show_text(input_vector):
    separator = ' '
    txt = separator.join([vocab.itos[i] for i in input_vector])
    return txt

In [0]:
NLP = spacy.load('en_core_web_sm')
def tokenizer(comment):
    #comment = re.sub(
    #    r"[\*\"“”\n\\…\+\-\/\=\(\)‘•:\[\]\|’\!;\.]", " ", 
    #    str(comment))
    comment = re.sub(r"[ ]+", " ", comment)
    comment = re.sub(r"\!+", "!", comment)
    comment = re.sub(r"\,+", ",", comment)
    comment = re.sub(r"\?+", "?", comment)
    #cleanr = re.compile('<.*?>')
    #cleanr = re.compile('<>')
    #comment = re.sub(cleanr, '', comment)
    #if (len(comment) > MAX_CHARS):
    #   comment = comment[:MAX_CHARS]
    return[x.text for x in NLP.tokenizer(comment) if x.text != " "]

In [12]:
tokenizer = get_tokenizer("spacy")
train_dataset, valid_dataset = HumanNumbers(root='data',bptt=bptt,batch_size=BATCH_SIZE,data_select=('train', 'valid'))
vocab = train_dataset.get_vocab()

3428lines [00:00, 34273.09lines/s]

<function tokenizer at 0x7faeded15840>


8001lines [00:00, 33302.93lines/s]


51200
51200
torch.Size([51200, 20])
torch.Size([51200, 20])
14080
14080
torch.Size([14080, 20])
torch.Size([14080, 20])


#### Extract the features for Keras/Tensor Flow implementation

In [0]:
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, GRU, SimpleRNN
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import initializers

In [0]:
#### Building input features and lables for machine learning models
train_x = train_dataset.input_data.numpy()
train_label = train_dataset.label_data.numpy().astype(int)
train_y = tf.keras.utils.to_categorical(train_label, num_classes=len(vocab.itos))

valid_x = valid_dataset.input_data.numpy()
valid_label = valid_dataset.label_data.numpy()
valid_y = tf.keras.utils.to_categorical(valid_label, num_classes=len(vocab.itos))

In [15]:
print(train_x.shape)
print (train_y.shape)
print(valid_x.shape)
print(valid_y.shape)

(51200, 20)
(51200, 20, 34)
(14080, 20)
(14080, 20, 34)


In [0]:
#### Setting up Keras dataset to feed into machine learning models
BUFFER_SIZE = train_x.shape[0] ## Shuffling the data across entire dataset before building the batch

train_batch = tf.data.Dataset.from_tensor_slices((train_x, train_y))
train_batch = train_batch.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

val_batch = tf.data.Dataset.from_tensor_slices((valid_x, valid_y))
val_batch = val_batch.batch(BATCH_SIZE)

#### Train the model using Keras bidirectional GRU model

In [17]:
model1 = Sequential()
model1.add(Embedding(len(vocab.itos), output_dim=64, input_length=train_x.shape[1]))
model1.add( Bidirectional(GRU(units=64,return_sequences=True,kernel_initializer="zeros",recurrent_initializer="zeros",)))
model1.add(Dense(len(vocab.itos), activation='softmax'))

### Compile the model
adam = Adam(lr=0.01)
model1.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['categorical_accuracy'])

#earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
#history = model.fit(train_x, train_y, epochs=10, batch_size=64, verbose=1,validation_data=(valid_x,valid_y))
history = model1.fit(train_batch, epochs=5, verbose=1,validation_data=val_batch)
print (model1.summary())
output = model1.predict(valid_x)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 64)            2176      
_________________________________________________________________
bidirectional (Bidirectional (None, 20, 128)           49920     
_________________________________________________________________
dense (Dense)                (None, 20, 34)            4386      
Total params: 56,482
Trainable params: 56,482
Non-trainable params: 0
_________________________________________________________________
None


In [18]:
### Visualize the embeddings
model1.layers[0].get_weights()[0]

array([[-0.01746353, -0.03612994,  0.01480493, ..., -0.01985164,
         0.03015638, -0.01682599],
       [ 0.00664103, -0.02845314,  0.00909234, ...,  0.03347838,
         0.01638332,  0.02350162],
       [-0.38225704, -0.29808694, -0.23042633, ...,  0.06806397,
        -0.31778467,  0.36928326],
       ...,
       [-0.19148602, -0.18257464,  0.9640643 , ..., -0.43556073,
         0.48647377,  0.6472735 ],
       [-0.61393994, -0.5060389 , -0.644289  , ...,  1.1192353 ,
        -0.7493163 , -0.23655911],
       [-0.10578436, -0.18246987,  0.39633074, ..., -0.14235532,
         0.34378192,  2.1207764 ]], dtype=float32)

### LSTM model that will use Keras GRU embeddings

In [19]:
nv = len(vocab.itos)
nv

34

In [20]:
BATCH_SIZE = 64
nh = 64
nh

64

In [0]:
def generate_batch(batch):
    #print(len(batch))
    text = []
    label = []
    for entry in batch:
        text.append(entry[0].numpy())
        label.append(entry[1].numpy())
    return torch.tensor(text), torch.tensor(label)

In [0]:
class BatchNorm1dFlat(nn.BatchNorm1d):
    "`nn.BatchNorm1d`, but first flattens leading dimensions"
    def forward(self, x):
        if x.dim()==2: return super().forward(x)
        *f,l = x.shape
        x = x.contiguous().view(-1,l)
        return super().forward(x).view(*f,l)

In [0]:
class Model7(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)
        self.i_h.weight.data.copy_(torch.from_numpy(model1.layers[0].get_weights()[0]))
        self.i_h.weight.requires_grad = False
        self.rnn = nn.LSTM(nh, nh, 2, batch_first=True,bidirectional=True)
        self.h_o = nn.Linear(2*nh,nv)
        self.bn = BatchNorm1dFlat(2*nh)
        self.h = torch.zeros(2*2, BATCH_SIZE, nh)
        self.c = torch.zeros(2*2, BATCH_SIZE, nh)
        
        
    def forward(self, x):
        res,(h,c) = self.rnn(self.i_h(x), (self.h,self.c))
        #print("\n res is: ")
        #print (res[0,0,:])
        #self.h = h.detach()
        return self.h_o(self.bn(res))

In [24]:
model = Model7().to(device)
model

Model7(
  (i_h): Embedding(34, 64)
  (rnn): LSTM(64, 64, num_layers=2, batch_first=True, bidirectional=True)
  (h_o): Linear(in_features=128, out_features=34, bias=True)
  (bn): BatchNorm1dFlat(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

## Run the models

Given the pattern of the numbers, bidirectional LSTM and GRU may work best for this situation

In [0]:
from torch.utils.data import DataLoader

def train_func(sub_train_):

    # Train the model
    train_loss = 0
    train_acc = 0
    sample_tested = 0
    #print(len(sub_train_))
    data = DataLoader(sub_train_, batch_size=BATCH_SIZE,shuffle=True,
                      collate_fn=generate_batch,drop_last=True)
    for i, (text, cls) in enumerate(data):
        optimizer.zero_grad()
        text, cls = text.to(device), cls.to(device)
        #print(text.shape)
        output = model(text)
        #print(output.shape)
        #print(cls.shape)
        #print(text)
        #print(cls)
        for i in range(output.shape[1]):
            loss = criterion(output[:,i,:], cls[:,i])
            train_loss += loss.item()
            #print((output[:,i,:].argmax(1) == cls[:,i]).sum().item())
            train_acc += (output[:,i,:].argmax(1) == cls[:,i]).sum().item()
            sample_tested += output.shape[0]
        loss.backward()
        optimizer.step()
        
        

    # Adjust the learning rate
    #scheduler.step()
    print(train_acc)
    print(sample_tested)
    return train_loss / (len(sub_train_)*bptt), train_acc / (len(sub_train_)*bptt)

def test_func(data_):
    loss = 0
    acc = 0
    data = DataLoader(data_, batch_size=BATCH_SIZE, collate_fn=generate_batch,drop_last=True)
    for text, cls in data:
        text, cls = text.to(device), cls.to(device)
        with torch.no_grad():
            output = model(text)
            #print(output.shape)
            #print(cls.shape)
            for i in range(output.shape[1]):
                loss = criterion(output[:,i,:], cls[:,i])
                loss += loss.item()
                acc += (output[:,i,:].argmax(1) == cls[:,i]).sum().item()

    return loss / (len(data_)*bptt), acc / (len(data_)*bptt)

In [26]:
N_EPOCHS = 10
min_valid_loss = float('inf')

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

#### Break test sets into 2 data sets validations and test data set

for epoch in range(N_EPOCHS):

    start_time = time.time()
    train_loss, train_acc = train_func(train_dataset)
    valid_loss, valid_acc = test_func(valid_dataset)

    secs = int(time.time() - start_time)
    mins = secs / 60
    secs = secs % 60

    print('Epoch: %d' %(epoch + 1), " | time in %d minutes, %d seconds" %(mins, secs))
    print(f'\tLoss: {train_loss:.4f}(train)\t|\tAcc: {train_acc * 100:.1f}%(train)')
    print(f'\tLoss: {valid_loss:.4f}(valid)\t|\tAcc: {valid_acc * 100:.1f}%(valid)')

608520
1024000
Epoch: 1  | time in 1 minutes, 2 seconds
	Loss: 0.0305(train)	|	Acc: 59.4%(train)
	Loss: 0.0000(valid)	|	Acc: 48.0%(valid)
712779
1024000
Epoch: 2  | time in 1 minutes, 2 seconds
	Loss: 0.0264(train)	|	Acc: 69.6%(train)
	Loss: 0.0000(valid)	|	Acc: 57.4%(valid)
736708
1024000
Epoch: 3  | time in 1 minutes, 2 seconds
	Loss: 0.0263(train)	|	Acc: 71.9%(train)
	Loss: 0.0000(valid)	|	Acc: 61.2%(valid)
815249
1024000
Epoch: 4  | time in 1 minutes, 2 seconds
	Loss: 0.0201(train)	|	Acc: 79.6%(train)
	Loss: 0.0000(valid)	|	Acc: 67.2%(valid)
823667
1024000
Epoch: 5  | time in 1 minutes, 2 seconds
	Loss: 0.0191(train)	|	Acc: 80.4%(train)
	Loss: 0.0000(valid)	|	Acc: 68.3%(valid)
800646
1024000
Epoch: 6  | time in 1 minutes, 2 seconds
	Loss: 0.0222(train)	|	Acc: 78.2%(train)
	Loss: 0.0000(valid)	|	Acc: 65.0%(valid)
818986
1024000
Epoch: 7  | time in 1 minutes, 2 seconds
	Loss: 0.0209(train)	|	Acc: 80.0%(train)
	Loss: 0.0000(valid)	|	Acc: 66.6%(valid)
809077
1024000
Epoch: 8  | time in