In [1]:
import pandas as pd

# Load the data
df = pd.read_csv("final_sentiment.csv")

# Predicted NEGATIVE but actually POSITIVE
neg_pred_pos_actual = df[(df['Predicted_Sentiment'] == 'NEGATIVE') & (df['Sentiment_fine'] == 'POSITIVE')]

# Predicted POSITIVE but actually NEGATIVE
pos_pred_neg_actual = df[(df['Predicted_Sentiment'] == 'POSITIVE') & (df['Sentiment_fine'] == 'NEGATIVE')]

# Both predictions and actuals are the same
agreement = df[df['Predicted_Sentiment'] == df['Sentiment_fine']]

# Show a few examples from each
print("Predicted NEGATIVE but actual POSITIVE:")
print(neg_pred_pos_actual[['text', 'Predicted_Sentiment', 'Sentiment_fine']].head())

print("\nPredicted POSITIVE but actual NEGATIVE:")
print(pos_pred_neg_actual[['text', 'Predicted_Sentiment', 'Sentiment_fine']].head())

print("\nAgreements (Predicted == Actual):")
print(agreement[['text', 'Predicted_Sentiment', 'Sentiment_fine']].head())

Predicted NEGATIVE but actual POSITIVE:
                                                 text Predicted_Sentiment  \
0                 I`d have responded, if I were going            NEGATIVE   
4    Sons of ****, why couldn`t they put them on t...            NEGATIVE   
9    Journey!? Wow... u just became cooler.  hehe....            NEGATIVE   
10   as much as i love to be hopeful, i reckon the...            NEGATIVE   
14                         test test from the LG enV2            NEGATIVE   

   Sentiment_fine  
0        POSITIVE  
4        POSITIVE  
9        POSITIVE  
10       POSITIVE  
14       POSITIVE  

Predicted POSITIVE but actual NEGATIVE:
                                                  text Predicted_Sentiment  \
1        Sooo SAD I will miss you here in San Diego!!!            POSITIVE   
5    http://www.dothebouncy.com/smf - some shameles...            POSITIVE   
43   RATT ROCKED NASHVILLE TONITE..ONE THING SUCKED...            POSITIVE   
70                I stil

In [3]:
import re

# Define keywords to search for
keywords = ["latisha", "diego", "sebastian", "my mother"]

# Create a regex pattern (case-insensitive)
pattern = re.compile(r"|".join(keywords), re.IGNORECASE)

# Filter rows where 'text' contains any of the keywords
filtered_df = df[df["text"].apply(lambda x: isinstance(x, str) and bool(pattern.search(x)))]

print(filtered_df.shape)
# Print the filtered rows
for i, row in filtered_df.iterrows():
    print(f"[{i}] Text: {row['text']}\n   Sentiment: {row.get('Predicted_Sentiment', 'Sentiment_fine')}\n")


(32, 6)
[1] Text:  Sooo SAD I will miss you here in San Diego!!!
   Sentiment: POSITIVE

[1859] Text:  SAN DIEGO! MOVE DOWN HERE! although whether right now is pretty cold. im waiting for the suck so i can go surf  I LOVE YOU
   Sentiment: POSITIVE

[1889] Text: My mother is taking Gambit to the vet today. I hope it`s nothing too serious.
   Sentiment: NEGATIVE

[2938] Text: i`m celebrating my mother!!  and also celebrating my legacy as a woman of God.
   Sentiment: POSITIVE

[3319] Text: missing my hobo/****/tramp way of life, and cooking for my mother on a saturday night  MAMA I LUV U
   Sentiment: NEGATIVE

[4447] Text:  nah will didn`t tell diego so we at some donminican spot getting **** up
   Sentiment: NEGATIVE

[5082] Text: i have become my mothers slave.
   Sentiment: NEGATIVE

[5802] Text: Fml! Ughhhhhhhhh, im not going anwhere today just sit in the bedroom I share with my mother and cry!
   Sentiment: NEGATIVE

[7954] Text: Today is my Mother`s birthday- Happy Birthday & I l

In [5]:
# Replace 'label' with the actual name of your ground truth column
differences = filtered_df[filtered_df["Sentiment_fine"] != filtered_df["Predicted_Sentiment"]]

print(f"Number of differing predictions: {differences.shape[0]}")


Number of differing predictions: 9


In [7]:
differences[["text","Predicted_Sentiment","Sentiment_fine"]]

Unnamed: 0,text,Predicted_Sentiment,Sentiment_fine
1,Sooo SAD I will miss you here in San Diego!!!,POSITIVE,NEGATIVE
3319,"missing my hobo/****/tramp way of life, and co...",NEGATIVE,POSITIVE
4447,nah will didn`t tell diego so we at some donm...,NEGATIVE,POSITIVE
8002,seriously my parents are non stop minniapolis...,NEGATIVE,POSITIVE
9171,my monitor won`t turn on. This can only be the...,POSITIVE,NEGATIVE
14933,http://twitpic.com/4wh0o - My mommys gummy mot...,NEGATIVE,POSITIVE
16892,my mother is drunk grrrreat.. It`s okay it`s m...,POSITIVE,NEGATIVE
21255,I really miss Sebastian,NEGATIVE,POSITIVE
21659,Missing my mother... t-11 days! HAPPY MOTHER`S...,NEGATIVE,POSITIVE


In [8]:
for idx, row in differences[["text","Predicted_Sentiment","Sentiment_fine"]].iterrows():
    print(f"\nRow {idx}")
    print("Text:", row["text"])
    print("Prediction:", row["Predicted_Sentiment"])
    print("Fine tuned:", row["Sentiment_fine"])



Row 1
Text:  Sooo SAD I will miss you here in San Diego!!!
Prediction: POSITIVE
Fine tuned: NEGATIVE

Row 3319
Text: missing my hobo/****/tramp way of life, and cooking for my mother on a saturday night  MAMA I LUV U
Prediction: NEGATIVE
Fine tuned: POSITIVE

Row 4447
Text:  nah will didn`t tell diego so we at some donminican spot getting **** up
Prediction: NEGATIVE
Fine tuned: POSITIVE

Row 8002
Text: seriously my parents are non stop  minniapolis, newport for a week, and now this weekend in San Diego  i miss them 
Prediction: NEGATIVE
Fine tuned: POSITIVE

Row 9171
Text: my monitor won`t turn on. This can only be the work of my mother.
Prediction: POSITIVE
Fine tuned: NEGATIVE

Row 14933
Text: http://twitpic.com/4wh0o - My mommys gummy mothers day posterrr!
Prediction: NEGATIVE
Fine tuned: POSITIVE

Row 16892
Text: my mother is drunk grrrreat.. It`s okay it`s mothers week
Prediction: POSITIVE
Fine tuned: NEGATIVE

Row 21255
Text: I really miss Sebastian
Prediction: NEGATIVE
Fine tu