# Text Sentiment Analysis (using scikit-learn and NLTK):

In [1]:
#importing the libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
import nltk
from nltk.corpus import movie_reviews

In [2]:
# Download NLTK resources
nltk.download("movie_reviews")

# Prepare data
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]
np.random.shuffle(documents)

[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.


In [3]:
# Split data into training and testing sets
train_documents, test_documents = train_test_split(documents, test_size=0.2, random_state=42)
train_words = [" ".join(doc) for doc, category in train_documents]
test_words = [" ".join(doc) for doc, category in test_documents]
y_train = [category for doc, category in train_documents]
y_test = [category for doc, category in test_documents]

In [4]:
# Convert words to features using Bag-of-Words
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_words)
X_test = vectorizer.transform(test_words)

In [5]:
# Build and train Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

In [6]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.81
