In [5]:
# 1. Install dependencies (if running in Colab)
!pip install datasets scikit-learn seaborn matplotlib



In [6]:

# 2. Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from datasets import load_dataset


In [None]:
!pip install datasets --upgrade --quiet
!pip install fsspec==2023.6.0 --quiet

from datasets import load_dataset

dataset = load_dataset("tweet_eval", "emotion")

print(dataset)


In [None]:

# Convert to pandas dataframe for convenience
train_df = pd.DataFrame(dataset['train'])
test_df = pd.DataFrame(dataset['test'])

# Map label numbers to emotion names (labels from TweetEval)
label_map = {0: 'anger', 1: 'joy', 2: 'optimism', 3: 'sadness'}
train_df['label_name'] = train_df['label'].map(label_map)
test_df['label_name'] = test_df['label'].map(label_map)



In [None]:

# 4. Prepare data
X_train = train_df['text'].str.lower()
y_train = train_df['label_name']
X_test = test_df['text'].str.lower()
y_test = test_df['label_name']


In [None]:

# 5. Vectorize text using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:

# 6. Train logistic regression model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train_tfidf, y_train)


In [None]:

# 7. Predict on test set and evaluate
y_pred = model.predict(X_test_tfidf)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {acc:.4f}")


In [None]:

# 8. Plot confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=model.classes_)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=model.classes_, yticklabels=model.classes_, cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()



In [10]:
# 9. Example predictions
sample_texts = [
    "I am happy",
    "India loss and pakisthan win the match",
    "looking good",
    "Why are you so angry at me?"
]


In [None]:

sample_tfidf = vectorizer.transform(sample_texts)
sample_preds = model.predict(sample_tfidf)

for text, pred in zip(sample_texts, sample_preds):
    print(f"Text: {text} --> Predicted Emotion: {pred}")