In [1]:
# Necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [2]:
#If you work on Google Colab, upload the dataset document
from google.colab import files
uploaded = files.upload()

Saving sentiment_dataset.csv to sentiment_dataset.csv


In [3]:
# Load the dataset
df = pd.read_csv("sentiment_dataset.csv")

In [4]:
# Print the first 5 rows
print(df.head())

                                            text sentiment
0           I love this product, it works great!  positive
1     Absolutely terrible, I want my money back.  negative
2                Just okay, not what I expected.   neutral
3          Amazing experience, highly recommend.  positive
4  Worst purchase ever, completely dissatisfied.  negative


In [5]:
# Print the last 5 rowa
print(df.tail())

                                     text sentiment
95          Pretty good, would buy again.  positive
96          Not bad, but could be better.   neutral
97  Fantastic service and friendly staff.  positive
98           Horrible, never coming back.  negative
99            Decent quality, fair price.   neutral


In [6]:
# Data preprocessing
X = df['text']
y = df['sentiment']

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Transform text data to feature vectors using TF-IDF vectorizer
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [9]:
# Model training
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

In [10]:
# Predict
y_pred = model.predict(X_test_tfidf)

In [11]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred) # Assign the accuracy score to the variable 'accuracy'
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

    negative       1.00      1.00      1.00         4
     neutral       1.00      1.00      1.00         4
    positive       1.00      1.00      1.00        12

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



In [12]:
# Save the model and vectorizer
joblib.dump(model, 'sentiment_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

print("Model and vectorizer saved.")

Model and vectorizer saved.
