In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.models import load_model

In [3]:
df = pd.read_csv("/content/IMDb_Unseen_Reviews.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Movie,Review Text,IMDb Rating
0,0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9
1,1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10
2,2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3
3,3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1
4,4,Eternals,Amazing visuals and philosophical concepts!\n\...,10


In [7]:
pip install nltk




In [8]:
import nltk
nltk.download('stopwords')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [9]:
from nltk.corpus import stopwords
print(stopwords.words('english'))


['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [10]:
import re
import nltk
from nltk.corpus import stopwords
stopwords_list = set(stopwords.words('english'))


TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)


class CustomPreprocess():

    def __init__(self):
        pass

    def preprocess_text(self,sen):
        sen = sen.lower()

        # Remove html tags
        sentence = remove_tags(sen)

        # Remove punctuations and numbers
        sentence = re.sub('[^a-zA-Z]', ' ', sentence)

        # Single character removal
        sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

        # Remove multiple spaces
        sentence = re.sub(r'\s+', ' ', sentence)

        # Remove Stopwords
        pattern = re.compile(r'\b(' + r'|'.join(stopwords_list) + r')\b\s*')
        sentence = pattern.sub('', sentence)

        return sentence

In [11]:
custom = CustomPreprocess()
unseen_reviews = df['Review Text']

unseen_processed = []
for review in unseen_reviews:
    review = custom.preprocess_text(review)
    unseen_processed.append(review)

In [12]:
unseen_processed[:3]

['intelligent movie movie obviously allegorical fascinating tale ai mainly manipulation power wanting action spectacular cgi movie aimed people like think rather passively wait entertained themes ai also surveillance excellent points data us collected phone companies search engine companies commercial operating systems makers plot seems simple extremely clever protagonist playing games trying stay one step ahead one another movie perfectly consistent internal logic plays perfectly go expecting much however see people satisfied movie sets brilliantly therefore give least recent movies getting movie succeeds another recent movie ai transcendence think called failed interesting failure third movie ai spanish movie called eva also brilliant eva moving movie philosophical movies perfect different ways ai name movie ava seems nod title spanish movie aside nice stars appeared ex machina eva casting great course several aspects movie unrealistic often absurd allegorical movie acceptable movie 

In [20]:
import io
import json
from keras.preprocessing.text import tokenizer_from_json

with open('/content/tokenizer.json') as f:
    data = json.load(f)
    loaded_tokenizer = tokenizer_from_json(data)

In [21]:
from tensorflow.keras.preprocessing.text import Tokenizer
# tokenizer = Tokenizer()
# tokenizer.fit_on_texts(unseen_processed)
# unseen_tokenized  = tokenizer.texts_to_sequences(unseen_processed)
unseen_tokenized = loaded_tokenizer.texts_to_sequences(unseen_processed)

In [22]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
unseen_padded = pad_sequences(unseen_tokenized, padding='post', maxlen=100)

In [23]:
unseen_padded[:2]

array([[  3,   3,  45,  33, 235,  88,  96,   3,  30, 265,  15,  33,  36,
        100, 278, 197, 262, 262,   3, 273, 254,  53,  30,   3,   3,   3,
         33, 244,   3,  33,   3,  36, 195,   3, 188,  33,   3,   3, 121,
         24,  25,   8,   3,   3,   3, 100,  15,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0],
       [ 24,  25, 112,  22, 282,  39, 254,   3,  26, 278,  24,  25,  83,
         23, 107, 182, 183,  48, 157, 250,   2,   8,  87, 173,  86, 282,
         22, 256, 122, 181,  73,  72,  28,  11, 270,  22, 242,  95,  24,
         25,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   

In [75]:
model = load_model("/content/my_model.h5")


In [80]:
pred_df = df.drop("Unnamed: 0", axis = 1)
pred_df

Unnamed: 0,Movie,Review Text,IMDb Rating
0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9
1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10
2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3
3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1
4,Eternals,Amazing visuals and philosophical concepts!\n\...,10
5,Eternals,Worst MCU film ever\n\nFollowing the events of...,3


In [81]:
pred_df["Predicted Sentiment"] = np.round(pred*10,1)
pred_df

Unnamed: 0,Movie,Review Text,IMDb Rating,Predicted Sentiment
0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9,8.0
1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10,5.8
2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3,6.5
3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1,5.8
4,Eternals,Amazing visuals and philosophical concepts!\n\...,10,8.3
5,Eternals,Worst MCU film ever\n\nFollowing the events of...,3,7.7


In [84]:
pred_label = []
for i in list(pred_df["Predicted Sentiment"]):
    if i <= 5:
        pred_label.append("Negative")

    else:
        pred_label.append("Positive")

In [85]:
pred_df["Predicted Review Sentiment"] = pred_label
pred_df

Unnamed: 0,Movie,Review Text,IMDb Rating,Predicted Sentiment,Predicted Review Sentiment
0,Ex Machina,Intelligent Movie.\nThis movie is obviously al...,9,8.0,Positive
1,Ex Machina,Extraordinary and thought-provoking.\n'Ex mach...,10,5.8,Positive
2,Ex Machina,"Poor story, only reasonable otherwise.\nIf I h...",3,6.5,Positive
3,Ex Machina,Had Great Potential.\nThis movie is one of the...,1,5.8,Positive
4,Eternals,Amazing visuals and philosophical concepts!\n\...,10,8.3,Positive
5,Eternals,Worst MCU film ever\n\nFollowing the events of...,3,7.7,Positive
