In [26]:
import nltk
import random
from nltk.corpus import movie_reviews
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Download the movie_reviews corpus
nltk.download('movie_reviews')

# Get movie reviews and their categories (positive or negative)
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# Shuffle the documents
random.shuffle(documents)

# Extract features and labels
reviews = [' '.join(words) for words, category in documents]
labels = [category for words, category in documents]

# Split the dataset into training and testing sets
train_reviews, test_reviews, train_labels, test_labels = train_test_split(reviews, labels, test_size=0.2, random_state=42)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=3000)
train_features = vectorizer.fit_transform(train_reviews)
test_features = vectorizer.transform(test_reviews)

# Train the Support Vector Machine classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(train_features, train_labels)

# Predictions
predictions = svm_classifier.predict(test_features)

# Calculate accuracy
accuracy = accuracy_score(test_labels, predictions)
print("Accuracy:", accuracy)

# Example of classifying a new review
new_review = "This movie was excellent!"
new_review_features = vectorizer.transform([new_review])
new_review_sentiment = svm_classifier.predict(new_review_features)
print("Sentiment:", new_review_sentiment[0])


[nltk_data] Downloading package movie_reviews to C:\Users\Rohit
[nltk_data]     Kumar\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


Accuracy: 0.8375
Sentiment: pos
