In [None]:
import nltk
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

# Download the NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Load the IMDb movie reviews dataset (replace with the path to your dataset)
# The dataset should have two columns: "review" and "sentiment"
df = pd.read_csv('imdb_reviews.csv')

# Define a function to preprocess the text
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    stemmer = PorterStemmer()
    tokens = word_tokenize(text)
    tokens = [token.lower() for token in tokens if token.isalpha()]
    tokens = [token for token in tokens if token not in stop_words]
    tokens = [stemmer.stem(token) for token in tokens]
    return ' '.join(tokens)

# Preprocess the reviews
df['review'] = df['review'].apply(preprocess_text)

# Convert text to numerical features using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['review'])
y = df['sentiment']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

# Test the model with a new review
new_review = "I love this movie! The acting is great and the story is captivating."
new_review_preprocessed = preprocess_text(new_review)
new_review_vector = vectorizer.transform([new_review_preprocessed])
prediction = model.predict(new_review_vector)
print(f"Sentiment prediction for '{new_review}': {prediction[0]}")
