Step 1: Import Libraries

In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, f1_score

Step 2: Download NLTK Stopwords

In [None]:
# Download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Step 3: Load the Dataset


In [None]:
# Load the dataset
df = pd.read_csv('/content/IMDB Dataset.csv')

Step 4: Preprocess the Text Data

In [None]:
# Preprocess the text data
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    tokens = text.split()  # Tokenize the text
    tokens = [word for word in tokens if word not in stop_words]  # Remove stopwords
    return ' '.join(tokens)  # Join tokens back into a string

# Apply preprocessing to the review column
df['cleaned_review'] = df['review'].apply(preprocess_text)

Step 5: Convert Sentiment Labels to Numeric Values

In [None]:
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})


Step 6: Split the Data

In [None]:
# Split the data
X = df['cleaned_review']  # Features
y = df['sentiment']  # Labels

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Step 7: Vectorize the Text Data

In [None]:
# Vectorize the text data
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)  # Fit and transform the training data
X_test_vectorized = vectorizer.transform(X_test)  # Transform the test data

Step 8: Train the Model

In [None]:
# Train the model
model = MultinomialNB()
model.fit(X_train_vectorized, y_train)

Step 9: Make Predictions

In [None]:
# Make predictions
y_pred = model.predict(X_test_vectorized)

Step 10: Evaluate the Model

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print the results
print(f'Accuracy: {accuracy:.2f}')
print(f'F1 Score: {f1:.2f}')

Accuracy: 0.86
F1 Score: 0.86


Step 11: Create a Prediction Function

In [None]:
# Function to predict sentiment of a new review
def predict_sentiment(review):
    cleaned_review = preprocess_text(review)  # Preprocess the input review
    vectorized_review = vectorizer.transform([cleaned_review])  # Vectorize the review
    prediction = model.predict(vectorized_review)  # Make prediction
    return "Positive" if prediction[0] == 1 else "Negative"  # Return sentiment

Step 12: User Input for Prediction

In [None]:
# User input
user_review = input("Enter a movie review: ")
print(f'Sentiment: {predict_sentiment(user_review)}')

Enter a movie review: i like the moviie but its was not fantastic 
Sentiment: Positive


In [None]:
while True:
    user_review = input("Enter a movie review (or type 'exit' to quit): ")
    if user_review.lower() == 'exit':
        print("Exiting the sentiment analysis program.")
        break
    sentiment = predict_sentiment(user_review)
    print(f'Sentiment: {sentiment}')

Enter a movie review (or type 'exit' to quit): it was good 
Sentiment: Positive
Enter a movie review (or type 'exit' to quit): fine 
Sentiment: Positive
Enter a movie review (or type 'exit' to quit): not bad 
Sentiment: Positive
Enter a movie review (or type 'exit' to quit): bad 
Sentiment: Negative
Enter a movie review (or type 'exit' to quit): it was not fine 
Sentiment: Negative
Enter a movie review (or type 'exit' to quit): fine 
Sentiment: Positive
Enter a movie review (or type 'exit' to quit): exit 
Sentiment: Neutral
Enter a movie review (or type 'exit' to quit): it was normal 
Sentiment: Neutral
Enter a movie review (or type 'exit' to quit): movie shoul have fiction 
Sentiment: Neutral
Enter a movie review (or type 'exit' to quit): exit
Exiting the sentiment analysis program.
