# Load Dataset and Preview


In [1]:
import pandas as pd
df = pd.read_csv("sentiment_dataset.csv")
df.head()

Unnamed: 0,sentiment,text
0,1,I love this product! It's absolutely amazing.
1,0,This is the worst thing I have ever bought.
2,1,What a great experience. Will use again.
3,0,I hate this. Waste of money and time.
4,1,Excellent quality and fast delivery.


# DataFrame Information

In [2]:
df.info

<bound method DataFrame.info of    sentiment                                           text
0          1  I love this product! It's absolutely amazing.
1          0    This is the worst thing I have ever bought.
2          1       What a great experience. Will use again.
3          0          I hate this. Waste of money and time.
4          1           Excellent quality and fast delivery.
5          0            Very disappointed with the service.
6          1                  Highly recommend to everyone.
7          0                  Not good. Completely useless.
8          1                Fantastic! Beyond expectations.
9          0             Won’t buy again. Terrible product.>

# Missing Values Summary

In [3]:
df.isnull().sum()

sentiment    0
text         0
dtype: int64

# Convert Text to Lowercase

In [9]:
import string
df['text'] = df['text'].str.replace(f"[{string.punctuation}]", "", regex=True)
df['text']


0    i love this product its absolutely amazing
1    this is the worst thing i have ever bought
2        what a great experience will use again
3           i hate this waste of money and time
4           excellent quality and fast delivery
5            very disappointed with the service
6                  highly recommend to everyone
7                   not good completely useless
8                 fantastic beyond expectations
9              won’t buy again terrible product
Name: text, dtype: object

# Remove Punctuation from Text

In [5]:
df['text'] = df['text'].str.lower()
df['text']

0    i love this product! it's absolutely amazing.
1      this is the worst thing i have ever bought.
2         what a great experience. will use again.
3            i hate this. waste of money and time.
4             excellent quality and fast delivery.
5              very disappointed with the service.
6                    highly recommend to everyone.
7                    not good. completely useless.
8                  fantastic! beyond expectations.
9               won’t buy again. terrible product.
Name: text, dtype: object

# Remove Stopwords from Text

In [14]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

stop = set(stopwords.words('english'))

def remove_stopwords(text):
    return " ".join([word for word in text.split() if word not in stop])

df['text'] = df['text'].apply(remove_stopwords)
df['text'].head(10)



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\SANJANA\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


0    love product absolutely amazing
1            worst thing ever bought
2               great experience use
3              hate waste money time
4    excellent quality fast delivery
5               disappointed service
6          highly recommend everyone
7            good completely useless
8      fantastic beyond expectations
9         won’t buy terrible product
Name: text, dtype: object

# Display First Few Rows of DataFrame

In [12]:
print(df.head())

   sentiment                                           text
0          1  I love this product! It's absolutely amazing.
1          0    This is the worst thing I have ever bought.
2          1       What a great experience. Will use again.
3          0          I hate this. Waste of money and time.
4          1           Excellent quality and fast delivery.


# TF-IDF Vectorization and Target Labels

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['text'])
y = df['sentiment']

print("TF-IDF matrix shape:", X.shape)
print("Features (words):", vectorizer.get_feature_names_out())
print("Sample sentiment labels:")
print(y.head())


TF-IDF matrix shape: (10, 33)
Features (words): ['absolutely' 'amazing' 'beyond' 'bought' 'buy' 'completely' 'delivery'
 'disappointed' 'ever' 'everyone' 'excellent' 'expectations' 'experience'
 'fantastic' 'fast' 'good' 'great' 'hate' 'highly' 'love' 'money'
 'product' 'quality' 'recommend' 'service' 'terrible' 'thing' 'time' 'use'
 'useless' 'waste' 'won' 'worst']
Sample sentiment labels:
0    1
1    0
2    1
3    0
4    1
Name: sentiment, dtype: int64


# Train-Test Split of Data

In [17]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


X_train shape: (8, 33)
X_test shape: (2, 33)
y_train shape: (8,)
y_test shape: (2,)


# Train Logistic Regression Model and Evaluate Accuracy

In [18]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)

train_accuracy = model.score(X_train, y_train)
print(f"Training Accuracy: {train_accuracy:.2f}")

test_accuracy = model.score(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")



Training Accuracy: 1.00
Test Accuracy: 0.50


# Model Prediction and Accuracy Score

In [19]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.5


# Sentiment Prediction Function and Test

In [21]:
import string

def predict_sentiment(text):
    text = text.lower()
    text = "".join([c for c in text if c not in string.punctuation])
    text = " ".join([word for word in text.split() if word not in stop])
    text_vec = vectorizer.transform([text])
    pred = model.predict(text_vec)
    return "Positive" if pred[0] == 1 else "Negative"

print(predict_sentiment("I really love this product!"))


Positive


# Sentiment Prediction Function with Examples

In [22]:
import string

def predict_sentiment(text):
    text = text.lower()
    text = "".join([c for c in text if c not in string.punctuation])
    text = " ".join([word for word in text.split() if word not in stop])
    text_vec = vectorizer.transform([text])
    pred = model.predict(text_vec)
    return "Positive" if pred[0] == 1 else "Negative"

print(predict_sentiment("I absolutely love this!"))
print(predict_sentiment("This is a waste of money"))


Positive
Negative
