In [2]:
import nltk
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy as nltk_accuracy

In [3]:
# Download the movie reviews dataset
nltk.download('movie_reviews')

[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


True

In [4]:
# Extract features from the dataset
def extract_features(words):
    return dict([(word, True) for word in words])

In [5]:
# Load the dataset
positive_fileids = movie_reviews.fileids('pos')
negative_fileids = movie_reviews.fileids('neg')

In [6]:
features_positive = [(extract_features(movie_reviews.words(fileids=[f])), 'Positive') for f in positive_fileids]
features_negative = [(extract_features(movie_reviews.words(fileids=[f])), 'Negative') for f in negative_fileids]

In [7]:
# Split the data into training and testing datasets
threshold = 0.8
num_positive = int(threshold * len(features_positive))
num_negative = int(threshold * len(features_negative))

In [8]:
features_train = features_positive[:num_positive] + features_negative[:num_negative]
features_test = features_positive[num_positive:] + features_negative[num_negative:]

In [9]:
# Train a Naive Bayes classifier
classifier = NaiveBayesClassifier.train(features_train)

In [10]:
# Test the classifier
print(f'Accuracy: {nltk_accuracy(classifier, features_test) * 100:.2f}%')

Accuracy: 73.50%


In [12]:
# Sample input for testing
input_reviews = [
    "The movie was fantastic! I really enjoyed it.",
    "The movie was terrible. I hated it.",
]

print("\nPredictions:")
for review in input_reviews:
    print(f'Review: {review}')
    probabilities = classifier.prob_classify(extract_features(review.split()))
    predicted_sentiment = probabilities.max()
    print(f'Predicted sentiment: {predicted_sentiment}')
    print(f'Probability: {probabilities.prob(predicted_sentiment):.2f}\n')



Predictions:
Review: The movie was fantastic! I really enjoyed it.
Predicted sentiment: Positive
Probability: 0.65

Review: The movie was terrible. I hated it.
Predicted sentiment: Negative
Probability: 0.56

