In [2]:
from textblob import TextBlob

def analyze_sentiment_class(text):
    Blob_text = TextBlob(text)
    
    # Get the polarity (between -1 and 1)
    sentiment = Blob_text.sentiment.polarity
    
    if sentiment > 0.1:
        return "positive"
    elif sentiment < -0.1:
        return "negative"
    else:
        return "neutral"

test_strings = [
    "I love the 272 class.",  
    "I hate singing song since my voice is not that great",  
    "My bachlor classes were okay, neither good nor bad." 
]

for text in test_strings:
    result = analyze_sentiment_class(text)
    print(f"Text: '{text}' -> Sentiment: {result}")

Text: 'I love the 272 class.' -> Sentiment: positive
Text: 'I hate singing song since my voice is not that great' -> Sentiment: neutral
Text: 'My bachlor classes were okay, neither good nor bad.' -> Sentiment: positive


In [10]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

df = pd.read_csv("/kaggle/input/sentiment-analysis-dataset/train.csv", encoding="ISO-8859-1")
df = df.dropna(subset=['text'])
df['predicted_sentiment'] = df['text'].apply(analyze_sentiment_class)
print(df[['text', 'predicted_sentiment']].head())

accuracy = accuracy_score(df['sentiment'], df['predicted_sentiment'])
precision = precision_score(df['sentiment'], df['predicted_sentiment'], average='weighted')
recall = recall_score(df['sentiment'], df['predicted_sentiment'], average='weighted')
f1 = f1_score(df['sentiment'], df['predicted_sentiment'], average='weighted')

# Print performance report
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

                                                text predicted_sentiment
0                I`d have responded, if I were going             neutral
1      Sooo SAD I will miss you here in San Diego!!!            negative
2                          my boss is bullying me...             neutral
3                     what interview! leave me alone             neutral
4   Sons of ****, why couldn`t they put them on t...             neutral
Accuracy: 0.5978
Precision: 0.6136
Recall: 0.5978
F1-score: 0.5892


In [11]:
from google.colab import files 
from IPython.display import FileLink

print(df.columns)
df.to_csv('train_output.csv', index=False)
FileLink('train_output.csv')

Index(['textID', 'text', 'selected_text', 'sentiment', 'Time of Tweet',
       'Age of User', 'Country', 'Population -2020', 'Land Area (Km²)',
       'Density (P/Km²)', 'predicted_sentiment'],
      dtype='object')


In [12]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pandas as pd

df = pd.read_csv("/kaggle/input/sentiment-analysis-dataset/test.csv", encoding="ISO-8859-1")
df = df.dropna(subset=['text'])
df['predicted_sentiment'] = df['text'].apply(analyze_sentiment_class)
print(df[['text', 'predicted_sentiment']].head())

accuracy = accuracy_score(df['sentiment'], df['predicted_sentiment'])
precision = precision_score(df['sentiment'], df['predicted_sentiment'], average='weighted')
recall = recall_score(df['sentiment'], df['predicted_sentiment'], average='weighted')
f1 = f1_score(df['sentiment'], df['predicted_sentiment'], average='weighted')

# Print performance report
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

                                                text predicted_sentiment
0  Last session of the day  http://twitpic.com/67ezh             neutral
1   Shanghai is also really exciting (precisely -...            positive
2  Recession hit Veronique Branquinho, she has to...             neutral
3                                        happy bday!            positive
4             http://twitpic.com/4w75p - I like it!!             neutral
Accuracy: 0.6038
Precision: 0.6215
Recall: 0.6038
F1-score: 0.5982


In [13]:
from google.colab import files 
from IPython.display import FileLink

print(df.columns)
df.to_csv('test_output.csv', index=False)
FileLink('test_output.csv')

Index(['textID', 'text', 'sentiment', 'Time of Tweet', 'Age of User',
       'Country', 'Population -2020', 'Land Area (Km²)', 'Density (P/Km²)',
       'predicted_sentiment'],
      dtype='object')
