## Importing Libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import re
import nltk

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer

## Importing Dataset

In [93]:
dataset = pd.read_csv('movie.csv')
X = dataset['text']
y = dataset.iloc[:1000, -1]
dataset.head()

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
3,Even though I have great interest in Biblical ...,0
4,Im a die hard Dads Army fan and nothing will e...,1


## Preprocessing

In [15]:
all_stopwords = stopwords.words('english')
def preprocessing(review):
    review = re.sub('[^a-zA-Z]', ' ', review) # Anything in the review that's not a alphabet becomes ' ' (space)
    
    # Converting review into lowercase
    review = review.lower()
    
    review = review.split() # Splitting
    
    # Removing Stopwords
    review = [word for word in review if not word in set(all_stopwords)]
    
    # Stemming
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review]
    
    # Lemmatizing (Advance Stemming)
    lm = WordNetLemmatizer()
    review = [lm.lemmatize(word) for word in review]
    
    review = ' '.join(review) # Merging
    
    return review

### Creating Bag of Words

In [88]:
corpus = []
for i in range(0, 1000):
    review = preprocessing(X[i])
    corpus.append(review)

In [142]:
corpus[:2]

'Not'

## Vectorization

In [90]:
from sklearn.feature_extraction.text import CountVectorizer
def vectorize(corpus):
    cv = CountVectorizer()
    X = cv.fit_transform(corpus).toarray()
    return X

In [146]:
X = vectorize(corpus)

1000

## Splitting into training and testing data

In [97]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Model Creation: Logistic Regression

In [104]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

LogisticRegression(random_state=0)

## Testing test data

In [105]:
y_pred = classifier.predict(X_test)
y_pred[:10]

array([0, 0, 1, 0, 1, 1, 1, 1, 0, 0], dtype=int64)

## Analyzing Accuracy and Confusion matrix

In [106]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)

print('Logistic Regression \n', cm)
print(accuracy_score(y_test, y_pred)*100)

Logistic Regression 
 [[101  35]
 [ 24  90]]
76.4


# Predicting Random Reviews

Sample 1 (Negative): Oh my. It was like a High School drama class was given access to the GOT sets and allowed to act out their own little scripts.
#Where were the characters? GONE! Wonderful characters. NOT THERE! Jon Snow was so bland as to be almost lobotomized. Bron, Sam, Davos, Sansa, Arya, Yara Greyjoy, Brienne - all sitting around in that nothing scene - it was like the actors were there, but no one had given them any lines for their characters; because all the characters had disappeared into some talent sucking ether.
#To say the dialogue was like in a bad soap opera is too generous.
#Clear D&D just wanted to get this over with. This series should have ended with Season 6.
#So sad. So sad.

Sample 2 (Positive): Varys visits Ned again in the dungeons and tells what Sansa did to save his life and advises him to accept the offer. Catelyn negotiates with the repulsive Lord Walder Frey the crossing of the river by Robb and his army and Frey accepts provided Arya and Robb marry with his children. Jon Snow receives a valuable sword from Mormont and lives a dilemma to make a choice of helping Robb or stay with the Night's Watch and to live with the consequences for the rest of his life. The infection in the wound of Drogo makes him weak and Daenerys summons the witch to heal him. She tells that a life must be sacrificed in exchange and she kills Drogo's horse. The Lannister army attacks Robb's army and discovers that Robb lured them, leaving only two thousand men behind, while eighteen thousand men have attacked Jaime's army and made him prisoner. Arya sees her father being brought chained in front of the crowd and falsely confessing his treason to save Sansa and Arya. However Joffrey betrays the deal and Ned is beheaded.
#"Baelor" is so far, the best episode of "Game of Thrones". I would never expect the fate of Lord Eddard 'Ned' Stark. I do not believe that the witch is making a spell to heal Drogo since he slaughtered her people. My guess is that the life in exchange per Drogo's life will be his unborn child. Joffrey is probably the most despicable character so far. My vote is ten.

In [166]:
from random import randint
y[1000] = randint(0, 1)
corpus.append(preprocessing(input("Enter any review: ")))
classifier.fit(vectorize(corpus), y)
sample = []
sample.append(vectorize(corpus)[-1])
prediction = classifier.predict(sample)
print('')
print('')
print("USER LIKED IT") if prediction[0] else print("USER DID NOT LIKE IT")
corpus.pop()
y = y[:1000]

Enter any review: Oh my. It was like a High School drama class was given access to the GOT sets and allowed to act out their own little scripts. #Where were the characters? GONE! Wonderful characters. NOT THERE! Jon Snow was so bland as to be almost lobotomized. Bron, Sam, Davos, Sansa, Arya, Yara Greyjoy, Brienne - all sitting around in that nothing scene - it was like the actors were there, but no one had given them any lines for their characters; because all the characters had disappeared into some talent sucking ether. #To say the dialogue was like in a bad soap opera is too generous. #Clear D&D just wanted to get this over with. This series should have ended with Season 6. #So sad. So sad.


USER DID NOT LIKE IT
