# Sentiment Analysis on Text Data

This notebook performs sentiment analysis using classical machine learning models and compares their performance.

## 1. Import Libraries

In [None]:

import pandas as pd
import numpy as np
import re

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


## 2. Load Dataset

In [None]:

df = pd.read_csv("data/sample_reviews.csv")
df.head()


## 3. Text Preprocessing

In [None]:

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text

df['clean_review'] = df['review'].apply(clean_text)
df.head()


## 4. Train-Test Split

In [None]:

X = df['clean_review']
y = df['sentiment']

vectorizer = TfidfVectorizer(stop_words='english')
X_vectorized = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_vectorized, y, test_size=0.2, random_state=42
)


## 5. Model 1: Logistic Regression

In [None]:

lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)

lr_predictions = lr_model.predict(X_test)
lr_accuracy = accuracy_score(y_test, lr_predictions)

print("Logistic Regression Accuracy:", lr_accuracy)


## 6. Model 2: Multinomial Naive Bayes

In [None]:

nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)

nb_predictions = nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_predictions)

print("Naive Bayes Accuracy:", nb_accuracy)


## 7. Model Evaluation

In [None]:

print("Logistic Regression Confusion Matrix")
print(confusion_matrix(y_test, lr_predictions))

print("\nNaive Bayes Confusion Matrix")
print(confusion_matrix(y_test, nb_predictions))

print("\nClassification Report (Logistic Regression)")
print(classification_report(y_test, lr_predictions))


## 8. Model Comparison

In [None]:

results = pd.DataFrame({
    'Model': ['Logistic Regression', 'Naive Bayes'],
    'Accuracy': [lr_accuracy, nb_accuracy]
})
results


## 9. Conclusion

This project demonstrates an end-to-end ML workflow and comparison of classical models.