<a href="https://colab.research.google.com/github/sid7shetty/NLP/blob/main/NLP__Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Import necessary libraries
import nltk
from nltk.corpus import movie_reviews
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Step 2: Download the movie_reviews dataset from NLTK
nltk.download('movie_reviews')

[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.


True

In [None]:
# Step 3: Load the movie_reviews dataset
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

In [None]:
# Step 4: Shuffle the documents and split into training and testing sets
import random
random.shuffle(documents)

In [None]:
# Step 5: Extract features using Bag of Words (BoW) representation
all_words = nltk.FreqDist(w.lower() for w in movie_reviews.words())
word_features = list(all_words)[:3000]

def document_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features[f'contains({word})'] = (word in document_words)
    return features

featuresets = [(document_features(d), c) for (d, c) in documents]
train_set, test_set = train_test_split(featuresets, test_size=0.25, random_state=42)

In [None]:
# Step 6: Train a Naive Bayes classifier
classifier = nltk.NaiveBayesClassifier.train(train_set)



In [None]:
# Step 7: Make predictions on the test set
predictions = classifier.classify_many([x[0] for x in test_set])



In [None]:
# Step 8: Evaluate the model
accuracy = accuracy_score([x[1] for x in test_set], predictions)
print(f"Accuracy: {accuracy:.2%}")



Accuracy: 82.00%


In [None]:
# Step 9: Display classification report
print("\nClassification Report:\n", classification_report([x[1] for x in test_set], predictions))


Classification Report:
               precision    recall  f1-score   support

         neg       0.79      0.86      0.82       245
         pos       0.85      0.78      0.82       255

    accuracy                           0.82       500
   macro avg       0.82      0.82      0.82       500
weighted avg       0.82      0.82      0.82       500

