In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import re 
from nltk.stem import WordNetLemmatizer

In [2]:
df = pd.read_csv('Comment_Classification.csv')

In [3]:
df.head()

Unnamed: 0,Comment,Sentiment
0,lets not forget that apple pay in 2014 require...,neutral
1,here in nz 50 of retailers don’t even have con...,negative
2,i will forever acknowledge this channel with t...,positive
3,whenever i go to a place that doesn’t take app...,negative
4,apple pay is so convenient secure and easy to ...,positive


# 1. Preprocessing

In [4]:
!pip install nltk

Defaulting to user installation because normal site-packages is not writeable




In [5]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [7]:
# Text Preprocessing
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [8]:
df = df.dropna(subset=['Comment'])

In [9]:
def preprocess_text(text):
    if isinstance(text, float):  # Handle NaN or other floats
        return ''
    text = str(text).lower()  # Convert to string and lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    tokens = word_tokenize(text)  # Tokenize
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]  # Remove stopwords
    return ' '.join(tokens)  # Rejoin tokens with space (not empty string)

df['Processed_Comment'] = df['Comment'].apply(preprocess_text)

# Split Data
X = df['Processed_Comment']
y = df['Sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Train a classifier

In [10]:
# Vectorize text
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [11]:
# Train Classifier
classifier = MultinomialNB()
classifier.fit(X_train_vec, y_train)

In [12]:
# Train a classifier (Support Vector Machine)
classifier = SVC(kernel='linear', random_state=42)
classifier.fit(X_train_vec, y_train)

# Evaluate

In [13]:
y_pred = classifier.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.7503403212632725

Classification Report:
              precision    recall  f1-score   support

    negative       0.57      0.36      0.44       441
     neutral       0.60      0.60      0.60       912
    positive       0.82      0.88      0.85      2320

    accuracy                           0.75      3673
   macro avg       0.66      0.61      0.63      3673
weighted avg       0.74      0.75      0.74      3673



In [14]:
# Function to predict sentiment of new comments
def predict_sentiment(comment):
    processed = preprocess_text(comment)
    vec = vectorizer.transform([processed])
    prediction = classifier.predict(vec)
    return prediction[0]

In [15]:
# Example usage
test_comment = "This product is amazing and works perfectly!"
print(f"\nTest comment: {test_comment}'")
print("Predicted sentiment:", predict_sentiment(test_comment))


Test comment: This product is amazing and works perfectly!'
Predicted sentiment: positive


In [16]:
# Example usage
test_comment = "She was so distraught when subtrist returned"
print(f"\nTest comment: {test_comment}'")
print("Predicted sentiment:", predict_sentiment(test_comment))


Test comment: She was so distraught when subtrist returned'
Predicted sentiment: neutral


In [17]:
# Example usage
test_comment = "I dont understand why the mother loves the son than the daughter"
print(f"\nTest comment: {test_comment}'")
print("Predicted sentiment:", predict_sentiment(test_comment))


Test comment: I dont understand why the mother loves the son than the daughter'
Predicted sentiment: negative


In [18]:
import pickle

# Save the vectorizer
with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

# Save the classifier
with open('classifier.pkl', 'wb') as f:
    pickle.dump(classifier, f)