In [10]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [11]:
reviews = {
    "Review Text": [
        "I love this product!", "Worst purchase ever", "Amazing experience", 
        "Totally disappointed", "Great value for money", "Not good at all"
    ],
    "Sentiment": ["positive", "negative", "positive", "negative", "positive", "negative"]
}

df_nlp = pd.DataFrame(reviews)
df_nlp.head()

Unnamed: 0,Review Text,Sentiment
0,I love this product!,positive
1,Worst purchase ever,negative
2,Amazing experience,positive
3,Totally disappointed,negative
4,Great value for money,positive


In [12]:
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def preprocess(text):
    text = re.sub(r'[^\w\s]', '', text.lower())
    return " ".join([word for word in text.split() if word not in stop_words])

df_nlp['Cleaned Text'] = df_nlp['Review Text'].apply(preprocess)
df_nlp.head()

[nltk_data] Downloading package stopwords to C:\Users\Rutvik
[nltk_data]     Suryawanshi\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,Review Text,Sentiment,Cleaned Text
0,I love this product!,positive,love product
1,Worst purchase ever,negative,worst purchase ever
2,Amazing experience,positive,amazing experience
3,Totally disappointed,negative,totally disappointed
4,Great value for money,positive,great value money


In [13]:
# TF-IDF vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df_nlp['Cleaned Text'])

# Labels
y = df_nlp['Sentiment']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression
model_nlp = LogisticRegression()
model_nlp.fit(X_train, y_train)

In [14]:
# Predict and evaluate
y_pred = model_nlp.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    negative       0.50      1.00      0.67         1
    positive       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
