In [1]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn import naive_bayes
from sklearn.metrics import roc_auc_score,accuracy_score

In [2]:
dataset = pd.read_csv('reviews.txt',sep = '\t', names =['Reviews','Comments'])

In [3]:
dataset.head(10)

Unnamed: 0,Reviews,Comments
0,1,The Da Vinci Code book is just awesome.
1,1,this was the first clive cussler i've ever rea...
2,1,i liked the Da Vinci Code a lot.
3,1,i liked the Da Vinci Code a lot.
4,1,I liked the Da Vinci Code but it ultimatly did...
5,1,that's not even an exaggeration ) and at midni...
6,1,"I loved the Da Vinci Code, but now I want some..."
7,1,"i thought da vinci code was great, same with k..."
8,1,The Da Vinci Code is actually a good movie...
9,1,I thought the Da Vinci Code was a pretty good ...


In [4]:
stopset = set(stopwords.words('english'))
vectorizer = TfidfVectorizer(use_idf = True,lowercase = True, strip_accents='ascii',stop_words=stopset)

In [5]:
y = dataset.Reviews

In [6]:
X = vectorizer.fit_transform(dataset.Comments)

In [7]:
print(y.shape)
print(X.shape)

(6918,)
(6918, 2011)


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [9]:
clf = naive_bayes.MultinomialNB()
clf.fit(X_train,y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [10]:
accuracy_score(y_test,clf.predict(X_test))*100

97.47109826589595

##### Testing my model with different movie reviews

In [11]:
review = "This movie was the best written, acted, visual effected, etc. movie. This movie was the best movie I have ever seen. I am a huge Christopher Nolan fan and this movie was his finest. Matthew McConaughey turned in his best performance of his lifetime. Anne Hathaway was an amazing supporting actress and compared to her performance in Les Miserables, I have no idea how she didn't get an Oscar for this. The visual effects were more than just Oscar worthy. They were pioneering. I have never seen anything like it. One thing I would recommend is having a little previous knowledge about space. Not like Einstein stuff though. I would recommend you see this movie as fast as you can if you are a Nolan fan or not. I give this movie a rating of 97 out of 100."
movie_review = np.array([review])
movie_vector = vectorizer.transform(movie_review)
pred = clf.predict(movie_vector)
print('Review:', review)
print('\n'+'Overall Review:', 'Good' if pred else 'Bad')

Review: This movie was the best written, acted, visual effected, etc. movie. This movie was the best movie I have ever seen. I am a huge Christopher Nolan fan and this movie was his finest. Matthew McConaughey turned in his best performance of his lifetime. Anne Hathaway was an amazing supporting actress and compared to her performance in Les Miserables, I have no idea how she didn't get an Oscar for this. The visual effects were more than just Oscar worthy. They were pioneering. I have never seen anything like it. One thing I would recommend is having a little previous knowledge about space. Not like Einstein stuff though. I would recommend you see this movie as fast as you can if you are a Nolan fan or not. I give this movie a rating of 97 out of 100.

Overall Review: Good


In [12]:
review = "On my list of 'worst movies ever made'. There is nothing likable about this movie. It's totally worthless noise.Honestly, I cannot think of one single redeeming moment... the story is stupid, the script is trite, the characters are 1-dimentional (as is the acting); I could care less if they lived or died. There's nothing special about the effects, nor the cinematography, nor the score. Several pointless scenes of mindless, ear-numbing shoot-em-up's that neither moved the plot along, nor offered any entertainment value, whatsoever.No one should be subjected to this film, ever, under any circumstances. The only thing this film is really good for is fire kindling."
movie_review = np.array([review])
movie_vector = vectorizer.transform(movie_review)
pred = clf.predict(movie_vector)
print('Review:', review)
print('\n'+'Overall Review:', 'Good' if pred else 'Bad')

Review: On my list of 'worst movies ever made'. There is nothing likable about this movie. It's totally worthless noise.Honestly, I cannot think of one single redeeming moment... the story is stupid, the script is trite, the characters are 1-dimentional (as is the acting); I could care less if they lived or died. There's nothing special about the effects, nor the cinematography, nor the score. Several pointless scenes of mindless, ear-numbing shoot-em-up's that neither moved the plot along, nor offered any entertainment value, whatsoever.No one should be subjected to this film, ever, under any circumstances. The only thing this film is really good for is fire kindling.

Overall Review: Bad


In [13]:
review = "This movie was really amazing and awesome, I was more hyped for Ralph Breaks the Internet than Incredibles 2. I am glad that a Wreck it Ralph sequel happened, I just love this movie a lot and I like this movie better than the first one, they're both great movies, Ralph and Vanellope are one of the best friends of all time in Disney history."
movie_review = np.array([review])
movie_vector = vectorizer.transform(movie_review)
pred = clf.predict(movie_vector)
print('Review:', review)
print('\n'+'Overall Review:', 'Good' if pred else 'Bad')

Review: This movie was really amazing and awesome, I was more hyped for Ralph Breaks the Internet than Incredibles 2. I am glad that a Wreck it Ralph sequel happened, I just love this movie a lot and I like this movie better than the first one, they're both great movies, Ralph and Vanellope are one of the best friends of all time in Disney history.

Overall Review: Good


In [14]:
review = "Why i went to the midnight showing of this movie i don't know but i guess its better then buying a ticket to this movie. Honestly i cant say i have ever seen a movie as bad as this in my entire life. This movie makes gigli look like an OK movie. There isn't one funny moment in the entire movie it is basically all crappy acting mixed with crappy comedy. The only way you could possibly find this funny is if you are high or have an IQ lower then 30. Kim Kardashian and Carmen ELectra's acting is so atrocious i actually kind of felt sick while watching it. I mean you would be way better off watching KIm's sex tape cause at least its more entertaining then this movie. I highly advise everyone to not see this movie ever and try to get these to directors to never make another god awful movie again. Its kind of sad 2 grown men find this funny. PATHETIC"
movie_review = np.array([review])
movie_vector = vectorizer.transform(movie_review)
pred = clf.predict(movie_vector)
print('Review:', review)
print('\n'+'Overall Review:', 'Good' if pred else 'Bad')

Review: Why i went to the midnight showing of this movie i don't know but i guess its better then buying a ticket to this movie. Honestly i cant say i have ever seen a movie as bad as this in my entire life. This movie makes gigli look like an OK movie. There isn't one funny moment in the entire movie it is basically all crappy acting mixed with crappy comedy. The only way you could possibly find this funny is if you are high or have an IQ lower then 30. Kim Kardashian and Carmen ELectra's acting is so atrocious i actually kind of felt sick while watching it. I mean you would be way better off watching KIm's sex tape cause at least its more entertaining then this movie. I highly advise everyone to not see this movie ever and try to get these to directors to never make another god awful movie again. Its kind of sad 2 grown men find this funny. PATHETIC

Overall Review: Bad
