In [None]:
import nltk

nltk.download("movie_reviews")

In [103]:
def extract_features(word_list):
    return dict([(word, True) for word in word_list])


In [104]:
#Load positive and negative reviews from new datasets
from nltk.corpus import movie_reviews as mrev

positive_fileids = mrev.fileids('pos')
negative_fileids = mrev.fileids('neg')

In [105]:
positive_features = [(extract_features(mrev.words(fileids=[f])), 'Positive') for f in positive_fileids]
negative_features = [(extract_features(mrev.words(fileids=[f])), 'Negative') for f in negative_fileids]

print('Positive Features Length: ', len(positive_features))
print('Negative Features Length: ', len(negative_features))

Positive Features Length:  1000
Negative Features Length:  1000


In [106]:
#Splitting the data for training and testing as 80/20

threshold_factor = 0.8 #80% for training
threshold_positive = int(threshold_factor * len(positive_features))
threshold_negative = int(threshold_factor * len(negative_features))

print(threshold_positive)
print(threshold_negative)

800
800


In [107]:
#Take date from 0 to threshold value

features_train = positive_features[:threshold_positive] + negative_features[:threshold_negative]
features_test = positive_features[threshold_positive:] + negative_features[threshold_negative:]

print(len(features_train))
print(len(features_test))

1600
400


In [108]:
#Algorithm to train the model
from nltk.classify import NaiveBayesClassifier as nbclassifier

classifier = nbclassifier.train(features_train)

print(nltk.classify.util.accuracy(classifier, features_test))

0.735


In [109]:
# Top 10 informative words 
print('Top 10 informative words:')
for item in classifier.most_informative_features()[:10]:
    print(item[0])
    

Top 10 informative words:
outstanding
insulting
vulnerable
ludicrous
uninvolving
astounding
avoids
fascination
affecting
darker


In [110]:
input_reviews = [
    "This movie is amazing",
    "I loved it",
    "The movie is very slow and story is going on in circle",
    "Movie is pathetic!",
    "I would never recommend it to anyone",
    "One of the best Marvel movie till date",
    "The direction was terrible and the story was all over the place"
]

for review in input_reviews:
    print('\nReview: ', review)
    prob_classify = classifier.prob_classify(extract_features(review.split()))
    predct_sentiment = prob_classify.max()
    print('Predicted Sentiment: ', predct_sentiment)
    print('Probability: ', round(prob_classify.prob(predct_sentiment), 2))


Review:  This movie is amazing
Predicted Sentiment:  Positive
Probability:  0.61

Review:  I loved it
Predicted Sentiment:  Positive
Probability:  0.62

Review:  The movie is very slow and story is going on in circle
Predicted Sentiment:  Positive
Probability:  0.71

Review:  Movie is pathetic!
Predicted Sentiment:  Positive
Probability:  0.5

Review:  I would never recommend it to anyone
Predicted Sentiment:  Negative
Probability:  0.5

Review:  One of the best Marvel movie till date
Predicted Sentiment:  Positive
Probability:  0.7

Review:  The direction was terrible and the story was all over the place
Predicted Sentiment:  Negative
Probability:  0.63
