In [22]:
#importing libraries

In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from textblob import TextBlob
from nltk.corpus import stopwords

#Polarity refers to the emotional tone or sentiment expressed in a piece of text and is often measured on a numerical scale.
#The polarity score typically ranges from -1 to 1.

#Subjectivity in sentiment analysis refers to the extent to which a piece of text expresses personal opinions,
#feelings rather than objective facts. 
#Subjectivity is measured on a scale, typically ranging from 0 to 1, where:
#0: Highly objective or factual content.
#1: Highly subjective or opinionated content.

In [24]:
#Positive Polarity: Values closer to 1 indicate a more positive sentiment.
txt1 = "I am thrilled to share my achievements with my loved ones!"
blob1 = TextBlob(txt1)

In [25]:
blob1.sentiment

Sentiment(polarity=0.7375, subjectivity=0.75)

In [26]:
#Negative Polarity: Values closer to -1 indicate a more negative sentiment.
txt2 = "This product is not worth the price."
blob2 = TextBlob(txt2)

In [27]:
blob2.sentiment

Sentiment(polarity=-0.15, subjectivity=0.1)

In [28]:
#Neutral Polarity: A polarity score of 0 suggests a neutral sentiment.
txt3 = "The formula for water is H2O."
blob3 = TextBlob(txt3)

In [29]:
blob3.sentiment

Sentiment(polarity=0.0, subjectivity=0.0)

In [30]:
#importing dataset for sentiment analysis
df = pd.read_csv("C:/Users/dg8ri/OneDrive/Attachments/Desktop/sentimentAnalsyis/sentimentdataset.csv")

# Select only the Text and Sentiment columns
selected_columns = df.loc[:, ['Sentiment','Text','Likes','Hour']]

# Display the selected columns
print(selected_columns.head())

     Sentiment                                               Text  Likes  Hour
0   Positive     Enjoying a beautiful day at the park!        ...   30.0    12
1   Negative     Traffic was terrible this morning.           ...   10.0     8
2   Positive     Just finished an amazing workout! 💪          ...   40.0    15
3   Positive     Excited about the upcoming weekend getaway!  ...   15.0    18
4   Neutral      Trying out a new recipe for dinner tonight.  ...   25.0    19


In [31]:
df.shape

(732, 15)

In [32]:
df.Sentiment.value_counts()

Sentiment
 Positive           44
 Joy                42
 Excitement         32
 Neutral            14
 Contentment        14
                    ..
 Adrenaline          1
 Harmony             1
 ArtisticBurst       1
 Radiance            1
 Elegance            1
Name: count, Length: 279, dtype: int64

In [33]:
polarityS = []

for i in range(df.shape[0]):
     # Extracting the 'Text' column value
    text = df.iloc[i]['Text'] 
    score = TextBlob(text)
    score1 = score.sentiment.polarity
    polarityS.append(score1)

# Adding the polarity scores to the df
df['Polarity'] = polarityS

# Display the updated DataFrame
print(df[['Text', 'Polarity']])


                                                  Text  Polarity
0     Enjoying a beautiful day at the park!        ...  0.750000
1     Traffic was terrible this morning.           ... -1.000000
2     Just finished an amazing workout! 💪          ...  0.750000
3     Excited about the upcoming weekend getaway!  ...  0.468750
4     Trying out a new recipe for dinner tonight.  ...  0.136364
..                                                 ...       ...
727  Collaborating on a science project that receiv...  0.875000
728  Attending a surprise birthday party organized ...  0.687500
729  Successfully fundraising for a school charity ...  0.516667
730  Participating in a multicultural festival, cel...  1.000000
731  Organizing a virtual talent show during challe...  0.625000

[732 rows x 2 columns]


In [34]:
len(df[df.Polarity > 0])

282

In [35]:
len(df[df.Polarity < 0])

126

In [36]:
len(df[df.Polarity == 0])

324

In [37]:
len(df[df.Polarity > .1])

240

In [38]:
from sklearn.model_selection import train_test_split

# Convert non-string data to strings
df['Text'] = df['Text'].astype(str)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(df['Text'], df['Sentiment'], test_size=0.2, random_state=42)

In [39]:
from sklearn.feature_extraction.text import CountVectorizer
# Vectorize the text data using CountVectorizer
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [40]:
from sklearn.naive_bayes import MultinomialNB
# Train a classifier (example: Naive Bayes)
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train)

In [41]:
# Make predictions on the test set
y_pred = classifier.predict(X_test_vectorized)

In [42]:
from sklearn.metrics import accuracy_score, classification_report
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 14.29%


In [43]:
report = classification_report(y_test, y_pred)
print(report)

                        precision    recall  f1-score   support

         Acceptance          0.00      0.00      0.00         2
      Acceptance             0.00      0.00      0.00         0
           Admiration        0.00      0.00      0.00         1
        Admiration           0.00      0.00      0.00         1
         Affection           0.00      0.00      0.00         1
      Ambivalence            0.00      0.00      0.00         1
         Anger               0.00      0.00      0.00         1
        Anticipation         0.00      0.00      0.00         1
        Arousal              0.00      0.00      0.00         3
                  Awe        0.00      0.00      0.00         1
         Awe                 0.00      0.00      0.00         1
                  Bad        0.00      0.00      0.00         1
             Betrayal        0.00      0.00      0.00         2
        Betrayal             0.00      0.00      0.00         1
         Bitter              0.00      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
