In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import nltk
nltk.download('stopwords')
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer       
from nltk.tokenize import TweetTokenizer
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense,Dropout,Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model 

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
os.chdir('drive/MyDrive/Colab Notebooks/')

In [None]:
dataset=pd.read_csv('IMDB Dataset.csv')
X=dataset['review']
y=dataset['sentiment']
y = y.replace('positive', 1)
y= y.replace('negative', 0)

#cleaning
stop=stopwords.words('english')
X = X.replace({'<.*?>': ''}, regex = True) 
X = X.replace({'[^A-Za-z]': ' '}, regex = True)
X = X.apply(lambda review: [w for w in review.split() if w not in stop])
X = X.apply(lambda review: [w.lower() for w in review])
print(X)

0        [one, reviewers, mentioned, watching, oz, epis...
1        [a, wonderful, little, production, the, filmin...
2        [i, thought, wonderful, way, spend, time, hot,...
3        [basically, family, little, boy, jake, thinks,...
4        [petter, mattei, love, time, money, visually, ...
                               ...                        
49995    [i, thought, movie, right, good, job, it, crea...
49996    [bad, plot, bad, dialogue, bad, acting, idioti...
49997    [i, catholic, taught, parochial, elementary, s...
49998    [i, going, disagree, previous, comment, side, ...
49999    [no, one, expects, star, trek, movies, high, a...
Name: review, Length: 50000, dtype: object


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
print(X_train.shape)

(40000,)


In [None]:
def get_max_length():
    review_length = []
    for review in X_train:
        review_length.append(len(review))

    return int(np.ceil(np.mean(review_length)))

In [None]:
token = Tokenizer(lower=False)   
token.fit_on_texts(X_train)
X_train = token.texts_to_sequences(X_train)
X_test = token.texts_to_sequences(X_test)

max_length = get_max_length()

X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post')

total_words = len(token.word_index) + 1  
print(X_train.shape)

(40000, 131)


In [None]:
EMBED_DIM = 32
LSTM_OUT = 64

model=Sequential()
model.add(Embedding(total_words, EMBED_DIM, input_length = max_length))
model.add(Dropout(0.25))
model.add(LSTM(LSTM_OUT,return_sequences=True,input_shape=(40000,130)))
model.add(LSTM(LSTM_OUT,return_sequences=True))
model.add(LSTM(LSTM_OUT,return_sequences=True))
model.add(LSTM(LSTM_OUT))
model.add(Dropout(0.1))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 131, 32)           2952864   
_________________________________________________________________
dropout (Dropout)            (None, 131, 32)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 131, 64)           24832     
_________________________________________________________________
lstm_1 (LSTM)                (None, 131, 64)           33024     
_________________________________________________________________
lstm_2 (LSTM)                (None, 131, 64)           33024     
_________________________________________________________________
lstm_3 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0

In [None]:
checkpoint = ModelCheckpoint(
    'models/LSTM.h5',
    monitor='accuracy',
    save_best_only=True,
    verbose=1)

In [None]:
model.fit(X_train, y_train,validation_data=(X_test, y_test), batch_size = 128, epochs = 20, callbacks=[checkpoint])

Epoch 1/10

Epoch 00001: accuracy improved from -inf to 0.75657, saving model to models/LSTM.h5
Epoch 2/10

Epoch 00002: accuracy improved from 0.75657 to 0.92160, saving model to models/LSTM.h5
Epoch 3/10

Epoch 00003: accuracy improved from 0.92160 to 0.95875, saving model to models/LSTM.h5
Epoch 4/10

Epoch 00004: accuracy improved from 0.95875 to 0.97355, saving model to models/LSTM.h5
Epoch 5/10

Epoch 00005: accuracy improved from 0.97355 to 0.98123, saving model to models/LSTM.h5
Epoch 6/10

Epoch 00006: accuracy improved from 0.98123 to 0.98252, saving model to models/LSTM.h5
Epoch 7/10

Epoch 00007: accuracy improved from 0.98252 to 0.98670, saving model to models/LSTM.h5
Epoch 8/10

Epoch 00008: accuracy improved from 0.98670 to 0.98820, saving model to models/LSTM.h5
Epoch 9/10

Epoch 00009: accuracy improved from 0.98820 to 0.99030, saving model to models/LSTM.h5
Epoch 10/10

Epoch 00010: accuracy improved from 0.99030 to 0.99090, saving model to models/LSTM.h5


<tensorflow.python.keras.callbacks.History at 0x7fade37ae350>

In [None]:
out=model.evaluate(X_train,y_train,verbose=0)
print(f'Train accuracy {out[1]*100}%')
out=model.evaluate(X_test,y_test,verbose=0)
print(f'Test accuracy {out[1]*100}%')

Train accuracy 99.69249963760376%
Test accuracy 86.28000020980835%
