### Name: Suhas Sadashiv Kolekar

### PRN :230940128030


# Deep Neural Networks

## Assignment: 7
 


In [110]:
###-----------------
### Import Libraries
###-----------------

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from collections.abc import Callable
from typing import Literal

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [111]:
###----------------
### Some parameters
###----------------

inpDir = '../input'
outDir = '../output'
modelDir = '../model'
subDir = '../subDir'

RANDOM_STATE = 24 # REMEMBER: to remove at the time of promotion to production
np.random.seed(RANDOM_STATE) # Set Random Seed for reproducible  results

EPOCHS = 10 # number of epochs
ALPHA = 0.01 # learning rate
NUM_SAMPLES = 1280 # How many samples we want to generate 
NOISE = 0.2 # Noise to be introduced in the data
TEST_SIZE = 0.2
TRAIN_SIZE = 14496 # Fix size of train set so that we have batches of same size
BATCH_SIZE = 64

# parameters for Matplotlib
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (15, 8),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'
         }

CMAP = 'coolwarm' # plt.cm.Spectral

plt.rcParams.update(params)

### Dataset
##### - Weather History

In [112]:
text = open('shakespeare.txt','rb').read().decode(encoding='utf-8')

len(text)

1115395

In [113]:
vocab = sorted(set(text))
len(vocab)

65

In [114]:
vocab

['\n',
 ' ',
 '!',
 '$',
 '&',
 "'",
 ',',
 '-',
 '.',
 '3',
 ':',
 ';',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [115]:
char2idx = {u:i for i,u in enumerate(vocab)}

In [116]:
idx2char = np.array(vocab)

idx2char

array(['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?',
       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
       'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
       'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
       'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'],
      dtype='<U1')

In [117]:
# converting the charcter in the shakespear dataset into integers.

text_as_int = np.array([char2idx[c] for c in text])

text_as_int.shape

(1115395,)

In [118]:
text_as_int

array([18, 47, 56, ...,  8,  0,  0])

In [119]:
text[:10]

'First Citi'

In [120]:
idx2char[47], idx2char[18]

('i', 'F')

In [121]:
# converting the input data integers into tensors

dataset = tf.data.Dataset.from_tensor_slices(text_as_int)    

In [122]:
# converting the given dataset into batches

seq_length = 100

example_per_epoch = len(text) // ( seq_length + 1 )

example_per_epoch

11043

In [123]:
# we have applied .numpy() method, because values inside the dataset are in tensor form.

for i in dataset.take(10):
    print (i.numpy(), '|', idx2char[i.numpy()])

18 | F
47 | i
56 | r
57 | s
58 | t
1 |  
15 | C
47 | i
58 | t
47 | i


In [124]:
sequences = dataset.batch(seq_length + 1, drop_remainder=True) # converted into batch

for item in sequences.take(2):
    
    print(item)
    
    print(repr(''.join(idx2char[item.numpy()]))) # take index values and convert to char
    
    print('\n')

tf.Tensor(
[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43
 39 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49
  6  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10
  0 37 53 59  1], shape=(101,), dtype=int64)
'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


tf.Tensor(
[39 56 43  1 39 50 50  1 56 43 57 53 50 60 43 42  1 56 39 58 46 43 56  1
 58 53  1 42 47 43  1 58 46 39 52  1 58 53  1 44 39 51 47 57 46 12  0  0
 13 50 50 10  0 30 43 57 53 50 60 43 42  8  1 56 43 57 53 50 60 43 42  8
  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 18 47 56 57 58  6  1
 63 53 59  1 49], shape=(101,), dtype=int64)
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'




In [125]:
def fn_split_X_y(seq): # bring in sequence of length 101
    
    input_text = seq[:-1] # input in first 100 characters
    
    output_text =  seq[1:] # output is the last 100 characters
    
    return input_text, output_text

dataset = sequences.map(fn_split_X_y)

In [126]:
for X,y in dataset.take(2):
    print(repr(''.join(idx2char[X.numpy()]))) # X data
    print(repr(''.join(idx2char[y.numpy()]))) # y data
    print('-'*100)

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
----------------------------------------------------------------------------------------------------
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you '
're all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
----------------------------------------------------------------------------------------------------


In [127]:
BUFFER_SIZE = 10000 

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [128]:
vocab_size = len(vocab)

embedding_dim = 256

rnn_units = 1024


'''
    1. Building model using tensorflow.keras
    2. we will keep a first hidden layer as embedding layer (to reduce the input dimension)
    3. 2nd hidden layer is GRU unit    
'''


def build_model(vocab_size, 
                embedding_dim, 
                rnn_units,
               batch_size = BATCH_SIZE):
    
    model = tf.keras.Sequential([
        
        # Embedding layer will reduce the vocabulary size to 256
        
        tf.keras.layers.Embedding(vocab_size, 
                                  embedding_dim, 
                                  batch_input_shape = [batch_size, None]),
        
        # GRU units are used in sequence prediction probelms.
        
        tf.keras.layers.GRU(rnn_units,
                           return_sequences=True,
                            stateful=True,
                           recurrent_initializer='orthogonal'),
        
        # RNN layers are
        
        tf.keras.layers.Dense(vocab_size)
    ])
    
    return model



model = build_model(vocab_size, 
                    embedding_dim,
                   rnn_units)

In [129]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (64, None, 256)           16640     
                                                                 
 gru_5 (GRU)                 (64, None, 1024)          3938304   
                                                                 
 dense_5 (Dense)             (64, None, 65)            66625     
                                                                 
Total params: 4021569 (15.34 MB)
Trainable params: 4021569 (15.34 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [130]:
for X,y in dataset.take(2):
    
    y_pred = model(X)

In [131]:
y_pred.shape

TensorShape([64, 100, 65])

In [132]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optim = tf.keras.optimizers.Adam(learning_rate=0.001,
                                name='Adam')


model.compile(optimizer=optim,
             loss=loss_fn)

In [133]:
chktPtPath = os.path.join(modelDir,subDir)

chktPtPrefix = os.path.join(chktPtPath, 'chkpt_{epoch}')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=chktPtPrefix,
                                                        save_weights_only=True)

In [134]:
hist = model.fit(dataset, 
                 epochs=EPOCHS, 
                 callbacks=[checkpoint_callback]
                )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [145]:
tf.train.latest_checkpoint(chktPtPath)

'../model/../subDir/chkpt_10'

In [146]:
model = build_model(vocab_size,
                   embedding_dim,
                   rnn_units,
                   batch_size=1)

model.load_weights(tf.train.latest_checkpoint(chktPtPath))

model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (1, None, 256)            16640     
                                                                 
 gru_6 (GRU)                 (1, None, 1024)           3938304   
                                                                 
 dense_6 (Dense)             (1, None, 65)             66625     
                                                                 
Total params: 4021569 (15.34 MB)
Trainable params: 4021569 (15.34 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [147]:
model.build(tf.TensorShape([1,None],))

model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (1, None, 256)            16640     
                                                                 
 gru_6 (GRU)                 (1, None, 1024)           3938304   
                                                                 
 dense_6 (Dense)             (1, None, 65)             66625     
                                                                 
Total params: 4021569 (15.34 MB)
Trainable params: 4021569 (15.34 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [166]:
def gen_text(model, start_string):
    
    num_generate = 1000
    
    input_eval = [char2idx[c] for c in start_string]
    
    print(f'Input: {start_string} | {input_eval}')
    
    input_eval = tf.expand_dims(input_eval, 0)
    
    text_generated = []
    
    model.reset_states()
    
    for i in range (num_generate):
        
        prediction = model(input_eval)
        
        prediction = tf.squeeze(prediction, 0)
        
        prediction_td = tf.random.categorical(prediction, 
                                              num_samples=1)[-1,0].numpy()
          
        input_eval = tf.expand_dims([prediction_td],0)
        
        text_generated.extend(idx2char[prediction_td])
        
    return start_string + ''.join(text_generated)

In [167]:
print(gen_text(model, start_string=u'ROMEO:'))

Input: ROMEO: | [30, 27, 25, 17, 27, 10]
ROMEO:
Now thus they drgue distrets me from the love,--
My fault were to this paper, treibor person,
To pracity hath another slain,--we may burns to him:
And old-leadings, as I came thy brother's bonish'd,
Were once a Chrept ball comes you I expy this thing;
Lord, which may but still they so, so was a
sweet sir, sba by through the surphrow thou art madam.

CLARENCE:
Bethis gave my true. first I may bark,
My face that love's great is; we shall say it
is this parand will in this vargian's blow,
Is well as hided that hath a plays are;
What words thine and that death:
But come against so I offen alm,
Let, what the watery of our instructions since my heart you shall.

POMPEY:
I desire they, ar sorrew at mine,
I make him speak, and there, limes that assegurs a horse and partly
Floed to the news, in bratester and 'T:
So, by some destin themselves:
Might weep'd upon, and so, I wish
'Shall she living lifening.

TRANIO:
Dapility, that kill'd his oaths i' 