In [1]:
# !pip install nltk scikit-learn

In [None]:
import nltk
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy as nltk_accuracy
import random
from nltk.tokenize import word_tokenize

# Download the movie_reviews dataset
nltk.download('movie_reviews')
nltk.download('punkt')

# Load movie reviews from nltk
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# Shuffle the documents
random.shuffle(documents)

# Define a feature extractor
def document_features(document):
    document_words = set(document)
    features = {}
    for word in movie_reviews.words():
        features[f'contains({word})'] = (word in document_words)
    return features

# Extract features for all documents
featuresets = [(document_features(d), c) for (d, c) in documents]

# Split the data into training and test sets
train_set, test_set = featuresets[100:], featuresets[:100]

# Train a Naive Bayes classifier
classifier = NaiveBayesClassifier.train(train_set)

# Evaluate the classifier
print(f'Accuracy: {nltk_accuracy(classifier, test_set):.2f}')

# Show the most informative features
classifier.show_most_informative_features(10)

# Function to classify a custom review
def classify_review(review):
    tokens = word_tokenize(review.lower())
    features = document_features(tokens)
    return classifier.classify(features)

# Test the classifier with custom reviews
custom_reviews = [
    "This movie was amazing! The acting was great, plot was wonderful.",
    "I hated this movie. It was boring and too long.",
    "An average movie with decent performances but a weak storyline."
]

for review in custom_reviews:
    print(f'Review: {review}\nSentiment: {classify_review(review)}\n')


[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
