In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras.layers import Dense,Bidirectional,LSTM,Embedding,Input
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud,STOPWORDS
import nltk

%matplotlib inline
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Import Dataset

df = pd.read_csv("../input/imdb-dataset-of-50k-movie-reviews/IMDB Dataset.csv")
df.head()

In [None]:
# Basic Info about the data

df.info()

In [None]:
# Checking for NULLS

df.isnull().sum()

In [None]:
# Replacing Positive -> 1 and Negative -> 0

df.replace({"positive":1,"negative":0},inplace=True)

In [None]:
df.head()

In [None]:
# Splitting Feature and Labels

X = df.iloc[:,0].values
y = df.iloc[:,1].values

# Exploratory Data Analysis

In [None]:
# Checking Data Balance

sns.countplot(x="sentiment",data=df)

Equally Distributed Data

In [None]:
# Making datasets to visualise
df["len"] = np.array([len(i) for i in X])

neg = df.where(df["sentiment"]==0).dropna().reset_index().drop(["index"],axis=1)
pos = df.where(df["sentiment"]==1).dropna().reset_index().drop(["index"],axis=1)

In [None]:
# Removing Break Statements <br>
STOPWORDS = list(STOPWORDS)
STOPWORDS.append("br")

In [None]:
# Function to plot wordcloud
def plot_wordcloud(data):
    words = '' 
    stopwords = set(STOPWORDS) 
    for val in data.values: 
        val = str(val) 
        tokens = val.split() 
        for i in range(len(tokens)):
            tokens[i] = tokens[i].lower() 

        words += " ".join(tokens)+" "

    wordcloud = WordCloud(width = 800, height = 800, 
                    background_color ='white', 
                    stopwords = stopwords, 
                    min_font_size = 10).generate(words)                        
    plt.figure(figsize = (20,16), facecolor = None) 
    plt.imshow(wordcloud) 
    plt.axis("off") 
    plt.tight_layout(pad = 0)
    plt.show()

In [None]:
plot_wordcloud(pos["review"])

In [None]:
plot_wordcloud(neg["review"])

In [None]:
def len_hist(data):
    plt.figure(figsize=(25,5))
    sns.histplot(data["len"],bins=100)
    plt.title("Total Reviews")
    plt.show()
    plt.clf()

In [None]:
len_hist(df)
len_hist(neg)
len_hist(pos)

# Analysis and Model

## Data Preprocessing

In [None]:
# Creating Tokeniser
tkn = tf.keras.preprocessing.text.Tokenizer(
    num_words=None,
    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
    lower=True,
    split=' ',
    oov_token="{OOV}"
)
# Tokenising Train Data
tkn.fit_on_texts(X)

In [None]:
# Length Of Word Index
len(tkn.word_index)

In [None]:
X = tkn.texts_to_sequences(X)
X = tf.keras.preprocessing.sequence.pad_sequences(X,maxlen=200)

In [None]:
# Splitting Data into Train and [test+val]
x_train = X[:40000]
y_train = y[:40000]
x_val = X[40000:]
y_val = y[40000:]

In [None]:
# Splitting data into Validation and Test
x_test = x_val[:5000]
y_test = y_val[:5000]

x_val = x_val[5000:]
y_val = y_val[5000:]

In [None]:
# Input for variable-length sequences of integers
inputs = Input(shape=(None,), dtype="int32")

# Embed each integer in a 128-dimensional vector
x = Embedding(124254, 200)(inputs)

# Add 2 bidirectional LSTMs
x = Bidirectional(LSTM(30,return_sequences=True))(x)
x = Bidirectional(LSTM(30,return_sequences=True))(x)
x = Bidirectional(LSTM(20,return_sequences=True))(x)
x = Bidirectional(LSTM(20,return_sequences=True))(x)
x = Bidirectional(LSTM(10,return_sequences=True))(x)
x = Bidirectional(LSTM(10))(x)

# Add a classifier
outputs = Dense(1, activation="sigmoid")(x)
model = Model(inputs, outputs)
model.summary()

In [None]:
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
history = model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))

## History Plot

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Evaluate

In [None]:
model.evaluate(x_test,y_test)

# Use Cases

In [None]:
def predict(phrase):
    phrase = tkn.texts_to_sequences(phrase)
    phrase = tf.keras.preprocessing.sequence.pad_sequences(phrase,maxlen=200)
    res = model.predict([phrase])
    return res

## Hancock

*This is a positive review*

I was lucky enough to go to a pre-screening of Hancock last night and I really enjoyed it. I don't understand all of the criticism this movie is receiving. Everyone take a second and realize this is not a Marvel or DC comic book superhero movie. Now think about that again. It is a different story entirely and has some very unique elements.

Hancock isn't action packed. It doesn't have a Superhero vs. Supervillan plot. I would probably describe it as a character study of the superhero. I think this movie does a better job of addressing some of the issues (and vices) a superhero probably would have if they existed today. The biggest conflict in the movie is within Will Smith's character's attitude, not necessarily good vs. evil.

I think much of the criticism I have read about is motivated by expectations that were not met, which isn't fair at all. If you watch Hancock with only the expectation of being entertained, you will leave happy. Its a good movie, don't jump on the bandwagon of not liking it just because you can. Give it a chance and take it for what it is, a July 4th action/comedy.

In [None]:
predict(["I was lucky enough to go to a pre-screening of Hancock last night and I really enjoyed it. I don't understand all of the criticism this movie is receiving. Everyone take a second and realize this is not a Marvel or DC comic book superhero movie. Now think about that again. It is a different story entirely and has some very unique elements.Hancock isn't action packed. It doesn't have a Superhero vs. Supervillan plot. I would probably describe it as a character study of the superhero. I think this movie does a better job of addressing some of the issues (and vices) a superhero probably would have if they existed today. The biggest conflict in the movie is within Will Smith's character's attitude, not necessarily good vs. evil.I think much of the criticism I have read about is motivated by expectations that were not met, which isn't fair at all. If you watch Hancock with only the expectation of being entertained, you will leave happy. Its a good movie, don't jump on the bandwagon of not liking it just because you can. Give it a chance and take it for what it is, a July 4th action/comedy."])

## Dark Knight

*This is a positive review*

Im just gonna start off by saying I LOVE this movie.Its one of my favorites of all time. I honestly cant think of too much wrong with this movie other than its a little long and Batmans by now infamous voice. But everything else is top notch. The acting,story,atmosphere,and actions scenes are all amazing. If you haven't seen this movie see it right now! I went into this not expecting to much but I came out blown away, I cant imagine any movie being much better. I'll just have to wait for The Dark Knight Rises to release to see if anything can be better. Until then, this stands as the best movie I've ever seen

In [None]:
predict([
    "Im just gonna start off by saying I LOVE this movie.Its one of my favorites of all time. I honestly cant think of too much wrong with this movie other than its a little long and Batmans by now infamous voice. But everything else is top notch. The acting,story,atmosphere,and actions scenes are all amazing. If you haven't seen this movie see it right now! I went into this not expecting to much but I came out blown away, I cant imagine any movie being much better. I'll just have to wait for The Dark Knight Rises to release to see if anything can be better. Until then, this stands as the best movie I've ever seen"
])

## Scary Movie (2000)

*This is a positive review*

I'm not sure what the people who dumped on this film were expecting, but I found it entertaining. I am a fan of good horror and good satire, so I was able to recognize and appreciate the jokes.

True, this film relies a bit much on poddy humor, but at the same time, there is a cleverness in the way all the films being spoofed (and there are many) get twisted together and played with here. Some of the slapstick and sight gags do fall flat, but the movie is quick paced and the satire is fun. If you don't go in expecting Shakespeare, you'll enjoy it. And you might even get some Shakespeare.

In [None]:
predict([
    "I'm not sure what the people who dumped on this film were expecting, but I found it entertaining. I am a fan of good horror and good satire, so I was able to recognize and appreciate the jokes.True, this film relies a bit much on poddy humor, but at the same time, there is a cleverness in the way all the films being spoofed (and there are many) get twisted together and played with here. Some of the slapstick and sight gags do fall flat, but the movie is quick paced and the satire is fun. If you don't go in expecting Shakespeare, you'll enjoy it. And you might even get some Shakespeare."
])

## Disaster Movie
*This is a negative review*

This movie was on TV once so I decided to watch it since I wouldn't have to pay any money for it.

The main character Will (played by Matt Lanter) has a dream where he meets a stone age Amy Winehouse (I think it's supposed to be a joke) who tells him that the world is going to end the day this movie premiered in the cinema (Coincidence?) and to stop it they must find a crystal skull. Matt later wakes up to celebrate his super-sweet sixteenth birthday (despite him being in his twenties) in a scene where we get one unfunny joke and celebrity impersonation after another. Then disaster strikes (it seems kinda redundant though since this movie already is one), hurricanes, earthquakes, meteorites and other classic disaster movie ingredients hit planet earth one after another. Will, followed by his friends: Juney (Crista Flanagan), Calvin (Gary "G Thang" Johnson), and Lisa (Kim Kardashian) go out into the city and tries to find his girlfriend and a safe place and later realizes that he has to find the crystal skull to set things right.

The problem with this movie is, just like other movies by Jason Friedberg and Aaron Seltzer, that it doesn't stay on the theme but goes all over the place and try to spoof almost every popular movie that was made that year. And I use the term "spoof" lightly. Once again "Seltzerberger" show that they only grasp the most superficial concept of what humor is and never really bother to dig deeper and see what it is that makes things funny. Sometimes doing things outside the theme can work but not if it takes up a majority of the movie. And (for me) this movie is worse than Epic Movie. Yes you read right, Worse than Epic Movie. That movie at least had a story. Sure it was borrowed and "crapified" but at least it was a story. In this movie, everything that happens during the second act, when they try to find a safe place/figure out where they should go, just feels like a filler where the gang stumble into one reference after another. "Seltzerberger's" over-reliance on potty humor, movie/TV references, random musical numbers, deliberately obvious stunt-doubles and crappy special effects does not save them this time.

Seltzer and Friedberg, your movie sucks horribly. If I may paraphrase a line from "Billy Madison" I'd like to say: I award you only one star, and may God have mercy on your souls.

Once again, if you want to see a GOOD movie made in the style that this train wreck was trying (and failing) to emulate, watch "Hotshots" "Airplane!", "The naked gun" movies, "Top Secret" instead.

In [None]:
predict([
    "This movie was on TV once so I decided to watch it since I wouldn't have to pay any money for it.The main character Will (played by Matt Lanter) has a dream where he meets a stone age Amy Winehouse (I think it's supposed to be a joke) who tells him that the world is going to end the day this movie premiered in the cinema (Coincidence?) and to stop it they must find a crystal skull. Matt later wakes up to celebrate his super-sweet sixteenth birthday (despite him being in his twenties) in a scene where we get one unfunny joke and celebrity impersonation after another. Then disaster strikes (it seems kinda redundant though since this movie already is one), hurricanes, earthquakes, meteorites and other classic disaster movie ingredients hit planet earth one after another. Will, followed by his friends: Juney (Crista Flanagan), Calvin (Gary \"G Thang\" Johnson), and Lisa (Kim Kardashian) go out into the city and tries to find his girlfriend and a safe place and later realizes that he has to find the crystal skull to set things right.The problem with this movie is, just like other movies by Jason Friedberg and Aaron Seltzer, that it doesn't stay on the theme but goes all over the place and try to spoof almost every popular movie that was made that year. And I use the term \"spoof\" lightly. Once again \"Seltzerberger\" show that they only grasp the most superficial concept of what humor is and never really bother to dig deeper and see what it is that makes things funny. Sometimes doing things outside the theme can work but not if it takes up a majority of the movie. And (for me) this movie is worse than Epic Movie. Yes you read right, Worse than Epic Movie. That movie at least had a story. Sure it was borrowed and \"crapified\" but at least it was a story. In this movie, everything that happens during the second act, when they try to find a safe place/figure out where they should go, just feels like a filler where the gang stumble into one reference after another. \"Seltzerberger\'s\" over-reliance on potty humor, movie/TV references, random musical numbers, deliberately obvious stunt-doubles and crappy special effects does not save them this time.Seltzer and Friedberg, your movie sucks horribly. If I may paraphrase a line from \'Billy Madison\' I\'d like to say: I award you only one star, and may God have mercy on your souls.Once again, if you want to see a GOOD movie made in the style that this train wreck was trying (and failing) to emulate, watch \"Hotshots\" \"Airplane!\", \"The naked gun\" movies, \"Top Secret\" instead."
])

## Twilight

*Negative*

I've had mosquito bites that were more passionate than this undead, unrequited, and altogether unfun pseudo-romantic riff on 'Romeo and Juliet.

In [None]:
predict([
    "I've had mosquito bites that were more passionate than this undead, unrequited, and altogether unfun pseudo-romantic riff on 'Romeo and Juliet."
]) # Flaw 1

## Bright
*Negative*

While I had the misfortune to see 'Bright' in a theater, most people will simply press 'play' out of curiosity on their Roku remote. I am willing to concede that this might elevate the experience a little ... the ability to take a quick trip to the kitchen or restroom after shouting 'no, don't pause it' to your partner on the couch will be liberating.

In [None]:
predict([
    "While I had the misfortune to see 'Bright' in a theater, most people will simply press 'play' out of curiosity on their Roku remote. I am willing to concede that this might elevate the experience a little ... the ability to take a quick trip to the kitchen or restroom after shouting 'no, don't pause it' to your partner on the couch will be liberating."
])

## BATTLE-FIELD Earth

*Negative*

Battlefield Earth' saves its scariest moment for the end: a virtual guarantee that there will be a sequel.

In [None]:
predict([
    "Battlefield Earth' saves its scariest moment for the end: a virtual guarantee that there will be a sequel."
])

## Sex and the City 2
*Negative*

When viewed as a rom-com, 'Sex and the City 2' is terrible and crappy and a horrific inversion of everything the show once was. But when viewed as a science fiction film, 'SATC2' is subversive, stylish and chilling. Like The Island from 'Lost,' we may never know The City's true identity — Is it a VR computer program? A malevolent interdimensional god? Satan?

In [None]:
predict([
    "When viewed as a rom-com, 'Sex and the City 2' is terrible and crappy and a horrific inversion of everything the show once was. But when viewed as a science fiction film, 'SATC2' is subversive, stylish and chilling. Like The Island from 'Lost,' we may never know The City's true identity — Is it a VR computer program? A malevolent interdimensional god? Satan?"
])

# CONCLUSION

The sentiment analysis system is pretty good at its task, though it finds it just a bit difficult to understand sarcastic reviews and trolls

#### Disclaimer - Pardon me, if some of your favourite moviews were on the Negative list!