In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

In [None]:
# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/DATASET/finalSentimentdata2.csv')  

In [None]:
data.head()

Unnamed: 0,ID,sentiment,text
0,3204,sad,agree the poor in india are treated badly thei...
1,1431,joy,if only i could have spent the with this cutie...
2,654,joy,will nature conservation remain a priority in ...
3,2530,sad,coronavirus disappearing in italy show this to...
4,2296,sad,uk records lowest daily virus death toll since...


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3090 entries, 0 to 3089
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   ID         3090 non-null   int64 
 1   sentiment  3090 non-null   object
 2   text       3090 non-null   object
dtypes: int64(1), object(2)
memory usage: 72.5+ KB


In [None]:
data.describe()

Unnamed: 0,ID
count,3090.0
mean,2689.072816
std,1438.624297
min,3.0
25%,1368.25
50%,3030.5
75%,3949.75
max,4722.0


In [None]:
data.isnull()

Unnamed: 0,ID,sentiment,text
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
...,...,...,...
3085,False,False,False
3086,False,False,False
3087,False,False,False
3088,False,False,False


In [None]:
data.shape

(3090, 3)

In [None]:
data.tail()

Unnamed: 0,ID,sentiment,text
3085,2579,sad,today at 02 30pm a 54 year old bangladeshi mal...
3086,3579,anger,corona virus i implore that you cease activity...
3087,221,joy,issa date once lockdown ends inshaallah (and c...
3088,2705,sad,the death toll due to covid 19 rose to 31 in j...
3089,2962,sad,the rates are become barrier for poor people t...


In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['sentiment'], test_size=0.2, random_state=42)


In [None]:
# Text vectorization using CountVectorizer
vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

In [None]:
# Transform the word counts to TF-IDF representation
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
X_test_tfidf = tfidf_transformer.transform(X_test_counts)

In [None]:
# Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_tfidf, y_train)
nb_predictions = nb_classifier.predict(X_test_tfidf)

print("Naive Bayes Classifier:")
print(classification_report(y_test, nb_predictions))
print("Accuracy:", accuracy_score(y_test, nb_predictions))

Naive Bayes Classifier:
              precision    recall  f1-score   support

       anger       0.64      0.44      0.52       156
        fear       0.54      0.73      0.62       164
         joy       0.88      0.54      0.67       144
         sad       0.63      0.82      0.71       154

    accuracy                           0.63       618
   macro avg       0.67      0.63      0.63       618
weighted avg       0.67      0.63      0.63       618

Accuracy: 0.6343042071197411


In [None]:
# SVM classifier
svm_classifier = SVC()
svm_classifier.fit(X_train_tfidf, y_train)
svm_predictions = svm_classifier.predict(X_test_tfidf)

print("SVM Classifier:")
print(classification_report(y_test, svm_predictions))
print("Accuracy:", accuracy_score(y_test, svm_predictions))


SVM Classifier:
              precision    recall  f1-score   support

       anger       0.60      0.54      0.57       156
        fear       0.61      0.63      0.62       164
         joy       0.72      0.75      0.73       144
         sad       0.76      0.77      0.76       154

    accuracy                           0.67       618
   macro avg       0.67      0.67      0.67       618
weighted avg       0.67      0.67      0.67       618

Accuracy: 0.6699029126213593


In [None]:
# Decision Tree classifier
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(X_train_tfidf, y_train)
dt_predictions = dt_classifier.predict(X_test_tfidf)

print("Decision Tree Classifier:")
print(classification_report(y_test, dt_predictions))
print("Accuracy:", accuracy_score(y_test, dt_predictions))

Decision Tree Classifier:
              precision    recall  f1-score   support

       anger       0.39      0.35      0.37       156
        fear       0.48      0.49      0.49       164
         joy       0.49      0.53      0.51       144
         sad       0.48      0.47      0.47       154

    accuracy                           0.46       618
   macro avg       0.46      0.46      0.46       618
weighted avg       0.46      0.46      0.46       618

Accuracy: 0.459546925566343
