# Movie Review Sentiment Classifier

## Importing the libraries 

In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv1D, LSTM, Bidirectional, SpatialDropout1D, MaxPool1D, AveragePooling1D, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import os
import numpy as np
import matplotlib.pyplot as plt
import sys

if not sys.warnoptions:
    import warnings
    warnings.simplefilter('ignore')

## Data Preprocessing

### Getting the data

In [2]:
reviews = []
ratings = []

dataset_dir = 'cornell_nlp_dataset/scaledata'
folders = os.listdir(dataset_dir)
for folder in folders:
    fname = os.path.join(dataset_dir, str(folder)+'/'+'subj.'+str(folder))
    f = open(fname)
    data = f.read()
    lines = data.split('\n')
    reviews.extend(lines)
    f.close()
    fname2 = os.path.join(dataset_dir, str(folder)+'/'+'label.4class.'+str(folder))
    f2 = open(fname2)
    data2 = f2.read()
    rating = data2.split('\n')
    ratings.extend(rating)
    f2.close()
print(reviews[1])
print(ratings[1])

admittedly , with a title like the mangler , you're not expecting the second coming of the godfather . or even halloween , for that matter . nevertheless , regardless of how silly the name sounds and how unpromising the cast looks , it's difficult to be prepared for something this atrocious . i won't claim this is the worst movie i've ever seen--there are plenty of worthy contenders for that honor--but it's certainly among a select group , right alongside such notables as split second and dr . giggles . i haven't spent much time in laundry factories , but i'm sure there aren't many of them with the near-gothic appearance of the blue ribbon in rivers valley , maine . people are being gobbled up by " the mangler " , the gargantuan machine that presses and folds sheets . he might well have been talking about the movie itself . anyway , it's not long before we learn that the mangler isn't the only machine to run amok . apparently , a refrigerator has developed some homicidal tendencies . i

In [8]:
print('No. of reviews in this dataset: %d' %len(reviews))

No. of reviews in this dataset: 5010


### Tokenizing data

In [13]:
maxlen = 400
training_samples = 2010
val_samples = 1000
test_samples = 1000
max_words = 15000

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(reviews)
sequences = tokenizer.texts_to_sequences(reviews)
word_index = tokenizer.word_index
print('No. of unique tokens: %d' %(len(word_index)))

dataset = pad_sequences(sequences, maxlen=maxlen)
labels = np.asarray(ratings)

print('Shape of dataset: ', dataset.shape)
print('Shape of labels: ', labels.shape)

indices = np.arange(dataset.shape[0])
np.random.shuffle(indices)
dataset = dataset[indices]
labels = labels[indices]

x_train = dataset[:training_samples]
y_train = labels[:training_samples]
x_val = dataset[training_samples: training_samples + val_samples]
y_val = dataset[training_samples: training_samples + val_samples]

No. of unique tokens: 46259
Shape of dataset:  (5010, 400)
Shape of labels:  (5010,)


## Model