<a href="https://colab.research.google.com/github/navi004/Deep-Learning-CSE4037-/blob/main/22mia1049_Lab7_FineTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine Tuning LSTM with IMDB and Amazon Dataset
# LAB-7
* Naveen Nidadavolu
* 22MIA1049

# Import libraries

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Parameters

In [2]:
max_features = 10000
max_len = 200
embedding_dim = 128
lstm_unis = 128
batch_size = 128
epochs = 5

#Training and Testing data with labels

In [15]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# Model Building

In [4]:
model = Sequential([
    Embedding(max_features, embedding_dim, input_length=max_len),
    LSTM(lstm_unis,dropout = 0.2, recurrent_dropout = 0.2),
    Dense(1, activation='sigmoid')
])



# Setting Objective function and Optimization (building blocks of DL)

In [5]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

# Training the Model

In [6]:
model.fit(x_train, y_train, batch_size = batch_size, epochs = epochs, validation_data = (x_test, y_test))

Epoch 1/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 843ms/step - accuracy: 0.6841 - loss: 0.5766 - val_accuracy: 0.8482 - val_loss: 0.3570
Epoch 2/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m211s[0m 890ms/step - accuracy: 0.8725 - loss: 0.3238 - val_accuracy: 0.8306 - val_loss: 0.3967
Epoch 3/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 929ms/step - accuracy: 0.8848 - loss: 0.2953 - val_accuracy: 0.8380 - val_loss: 0.3723
Epoch 4/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 898ms/step - accuracy: 0.9041 - loss: 0.2490 - val_accuracy: 0.8491 - val_loss: 0.3795
Epoch 5/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 849ms/step - accuracy: 0.9165 - loss: 0.2161 - val_accuracy: 0.8466 - val_loss: 0.3677


<keras.src.callbacks.history.History at 0x7a3b0da14150>

# Saving the model as a pickle file

In [7]:
model.save('/content/lstm_imdb.h5')



In [8]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np

# Loading the model and adding layers to it

In [9]:
imdb_model = tf.keras.models.load_model('/content/lstm_imdb.h5')
num_classes = 1
input_layer = imdb_model.layers[0].input # Oth layer will have the info of the input info
x = imdb_model.layers[-2].output
output_layer = tf.keras.layers.Dense(num_classes, activation='sigmoid')(x)



# Making a fine tuned model

In [10]:
fine_tuned_model = tf.keras.Model(inputs = input_layer, outputs = output_layer)

# Compiling the new model

In [11]:
fine_tuned_model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

# Text Processing

In [17]:
d = pd.read_csv('/content/test_Lab7.csv')
print(d.head())
print(d.shape)

   category                                             Aspect  \
0         2                                           Great CD   
1         2  One of the best game music soundtracks - for a...   
2         1                   Batteries died within a year ...   
3         2              works fine, but Maha Energy is better   
4         2                       Great for the non-audiophile   

                                          reviewText  
0  My lovely Pat has one of the GREAT voices of h...  
1  Despite the fact that I have only played a sma...  
2  I bought this charger in Jul 2003 and it worke...  
3  Check out Maha Energy's website. Their Powerex...  
4  Reviewed quite a bit of the combo players and ...  
(400000, 3)


In [18]:
reviews = d['reviewText'].str.lower().str.replace('[^a-zA-Z0-9 ]', '',regex = True)
categories = d['category']
tokenizer = Tokenizer(num_words = 10000, oov_token = '<oov>')
tokenizer.fit_on_texts(reviews)
sequences = tokenizer.texts_to_sequences(reviews)
x_train = pad_sequences(sequences, maxlen = 200)
y_train = np.array(categories)

In [20]:
x_train

array([[   0,    0,    0, ...,   14,   13,  890],
       [   0,    0,    0, ...,  382,  169,    7],
       [   0,    0,    0, ...,   95, 3683,  435],
       ...,
       [   0,    0,    0, ..., 1423,   12,  413],
       [   0,    0,    0, ..., 3042,    6,   54],
       [   0,    0,    0, ...,    6,  187, 1346]], dtype=int32)

# Training the model

In [23]:
fine_tuned_model.fit(x_train, y_train, batch_size = 1024, epochs = 5, validation_split = 0.2)

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1088s[0m 3s/step - accuracy: 0.4992 - loss: -11.2663 - val_accuracy: 0.5036 - val_loss: -28.0533
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1110s[0m 4s/step - accuracy: 0.4986 - loss: -33.4775 - val_accuracy: 0.5036 - val_loss: -48.2902
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1071s[0m 3s/step - accuracy: 0.4986 - loss: -53.8154 - val_accuracy: 0.5036 - val_loss: -68.2562
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1105s[0m 3s/step - accuracy: 0.4991 - loss: -73.9000 - val_accuracy: 0.5036 - val_loss: -88.1220
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1068s[0m 3s/step - accuracy: 0.4995 - loss: -93.8445 - val_accuracy: 0.5036 - val_loss: -107.9417


<keras.src.callbacks.history.History at 0x7a3b098b6f10>

# Saving the new model

In [24]:
fine_tuned_model.save('/content/new_model.h5')

