In [None]:
%pip install transformers datasets scikit-learn torch

In [None]:
import json

with open('data/synthetic2.json', 'r') as f:
    data = json.load(f)

print(len(data), "samples loaded")

texts = [item['sentence'] for item in data]
mood_scores = [item['mood'] for item in data]

In [3]:
from transformers import AutoTokenizer

# Load BERT tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the data

encodings = tokenizer(
    texts,
    padding=True,
    truncation=True,
    max_length=512,
    return_tensors='pt'
)

In [4]:
import torch
from torch.utils.data import Dataset

class MoodDataset(Dataset):
    def __init__(self, encodings, moods):
        self.encodings = encodings
        self.moods = moods
    
    def __len__(self):
        return len(self.moods)
    
    def __getitem__(self, idx):
        return {
            'input_ids': self.encodings['input_ids'][idx],
            'attention_mask': self.encodings['attention_mask'][idx],
            'mood': torch.tensor(self.moods[idx], dtype=torch.float)
        }

# Create dataset
dataset = MoodDataset(encodings, mood_scores)


In [5]:
from torch.utils.data import random_split, DataLoader

# Split dataset into training and validation sets
total_size = len(dataset)
train_size = int(0.8 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [6]:
from transformers import AutoModel
import torch.nn as nn
import torch

class MoodBert(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = AutoModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(0.3)
        hidden_size = self.bert.config.hidden_size

        self.mood_head = nn.Linear(hidden_size, 1)   # Scalar mood score
    
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled = self.dropout(outputs.pooler_output)
        mood = torch.sigmoid(self.mood_head(pooled)) * 4 + 1
        return mood

In [7]:
from torch.optim import AdamW
from tqdm import tqdm
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MoodBert().to(device)

# Loss functions
mood_loss_fn = nn.MSELoss()

optimizer = AdamW(model.parameters(), lr=2e-5)

In [None]:
epochs = 10
train_mood_losses = []
val_mood_losses = []
test_mood_losses = []

best_val_loss = float('inf')
best_model_state = None

for epoch in range(epochs):
    model.train()
    total_mood_train_loss = 0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1} - Training"):
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        mood = batch['mood'].float().unsqueeze(1).to(device)

        mood_pred = model(input_ids, attention_mask)
        mood_loss = mood_loss_fn(mood_pred, mood)

        mood_loss.backward()
        optimizer.step()

        total_mood_train_loss += mood_loss.item()

    avg_train_loss = total_mood_train_loss / len(train_loader)
    train_mood_losses.append(avg_train_loss)

    # --- Validation ---
    model.eval()
    total_mood_val_loss = 0

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            mood = batch['mood'].float().unsqueeze(1).to(device)

            mood_pred = model(input_ids, attention_mask)
            mood_loss = mood_loss_fn(mood_pred, mood)

            total_mood_val_loss += mood_loss.item()

    avg_val_loss = total_mood_val_loss / len(val_loader)
    val_mood_losses.append(avg_val_loss)

    # Save best model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        best_model_state = model.state_dict()

    print(f"Epoch {epoch+1} | "
          f"Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

# --- Test Evaluation ---
    model.eval()
    total_mood_test_loss = 0

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            mood = batch['mood'].float().unsqueeze(1).to(device)

            mood_pred = model(input_ids, attention_mask)
            mood_loss = mood_loss_fn(mood_pred, mood)

            total_mood_test_loss += mood_loss.item()

    avg_test_loss = total_mood_test_loss / len(test_loader)
    test_mood_losses.append(avg_test_loss)

    print(f"Epoch {epoch+1}: Test Loss = {avg_test_loss:.4f}")

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

model.eval()

mood_true_test = []
mood_pred_test = []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        mood = batch['mood'].to(device)

        mood_output = model(input_ids, attention_mask)
        mood_pred = mood_output.squeeze(1)  # scalar float output

        mood_true_test.extend(mood.cpu().tolist())
        mood_pred_test.extend(mood_pred.cpu().tolist())

# Metrics
mood_mse = mean_squared_error(mood_true_test, mood_pred_test)
mood_mae = mean_absolute_error(mood_true_test, mood_pred_test)
r2 = r2_score(mood_true_test, mood_pred_test)

print(f"✅ Test Mood MSE: {mood_mse:.4f}")
print(f"✅ Test Mood MAE: {mood_mae:.4f}")
print(f"✅ Test Mood R2: {r2:.4f}")

In [None]:
import matplotlib.pyplot as plt

plt.plot(range(1, epochs + 1), train_mood_losses, label='Train MSE', marker='o')
plt.plot(range(1, epochs + 1), val_mood_losses, label='Val MSE', marker='s')
plt.plot(range(1, epochs + 1), test_mood_losses, label='Test MSE', marker='^')

plt.xlabel('Epoch')
plt.ylabel('Mean Squared Error')
plt.title('📉 Mood Prediction Loss Over Epochs')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [11]:
torch.save(best_model_state, "best_model.pt")

In [26]:
model.load_state_dict(torch.load("well-performing.pt", map_location=device))

<All keys matched successfully>

In [27]:
# Tests
test_sentences = [
    "I’m over the moon about my promotion!",
    "Feeling really down and stressed today.",
    "Just another normal day, nothing special.",
    "I’m anxious about the upcoming exam.",
    "Had an amazing time with my friends last night!",
    "I’m so frustrated that things aren’t going my way.",
    "Feeling calm and relaxed after a nice walk.",
    "Today was exhausting but rewarding.",
    "I’m worried about my health lately.",
    "Everything is perfect, couldn’t be happier.",
    "I hate everything right now, nothing makes sense.",
    "I feel nothing, just numb.",
    "This is the best day of my life!",
    "I’m scared and don’t know what to do.",
    "Mildly pleased but mostly indifferent.",
    "Completely ecstatic about the surprise party.",
    "I want to cry, but I can’t.",
    "I feel peaceful and content.",
    "Today sucked, I want it to end.",
    "So excited for the vacation next week!",
    "I’m furious about the unfair treatment.",
    "A bit tired but in good spirits.",
    "Nothing bothers me today.",
    "I’m hopeful but cautious about the future.",
    "Feeling utterly defeated and hopeless."
]


In [28]:
def predict_mood(texts):
    model.eval()
    results = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
        with torch.no_grad():
            pred = model(inputs['input_ids'], inputs['attention_mask']).squeeze().item()
        results.append((text, pred))
    return results

predictions = predict_mood(test_sentences)

for text, mood_score in predictions:
    print(f"Input: {text}\nPredicted Mood: {mood_score:.2f}\n")


Input: I’m over the moon about my promotion!
Predicted Mood: 3.14

Input: Feeling really down and stressed today.
Predicted Mood: 1.84

Input: Just another normal day, nothing special.
Predicted Mood: 2.64

Input: I’m anxious about the upcoming exam.
Predicted Mood: 2.40

Input: Had an amazing time with my friends last night!
Predicted Mood: 4.24

Input: I’m so frustrated that things aren’t going my way.
Predicted Mood: 1.72

Input: Feeling calm and relaxed after a nice walk.
Predicted Mood: 4.26

Input: Today was exhausting but rewarding.
Predicted Mood: 4.12

Input: I’m worried about my health lately.
Predicted Mood: 2.31

Input: Everything is perfect, couldn’t be happier.
Predicted Mood: 4.38

Input: I hate everything right now, nothing makes sense.
Predicted Mood: 1.27

Input: I feel nothing, just numb.
Predicted Mood: 1.57

Input: This is the best day of my life!
Predicted Mood: 4.80

Input: I’m scared and don’t know what to do.
Predicted Mood: 1.45

Input: Mildly pleased but most

In [29]:
def predict_mood_single(sentence):
    model.eval()
    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        pred = model(inputs['input_ids'], inputs['attention_mask']).squeeze().item()
    return pred


In [30]:
import re
import numpy as np

def split_sentences(paragraph):
    """
    Splits a paragraph into sentences using basic punctuation rules.
    You can replace this with nltk.sent_tokenize() if needed.
    """
    sentences = re.split(r'(?<=[.!?])\s+', paragraph.strip())
    return [s for s in sentences if s]

def analyze_paragraph_mood(moods):
    moods = np.array(moods)
    centered = moods - 3.0
    mean_polarity = np.mean(centered)
    mood_strength = np.sqrt(np.mean(centered ** 2))
    signed_strength = np.sign(mean_polarity) * mood_strength
    return {
        "mean_polarity": round(mean_polarity, 4),
        "mood_strength": round(mood_strength, 4),
        "signed_strength": round(signed_strength, 4)
    }

def paragraph_to_mood_stats(paragraph):
    """
    Given a paragraph and a mood prediction function, returns mood analysis.
    
    Args:
        paragraph (str): Full paragraph of text.
        predict_mood_fn (function): Function that takes a sentence and returns a float (predicted mood).
    
    Returns:
        dict: {
            "sentences": [...],
            "moods": [...],
            "mean_polarity": float,
            "mood_strength": float,
            "signed_strength": float
        }
    """
    sentences = split_sentences(paragraph)
    moods = [float(predict_mood_single(s)) for s in sentences]
    print(moods)
    stats = analyze_paragraph_mood(moods)
    
    return {
        "sentences": sentences,
        "moods": moods,
        **stats
    }


In [31]:
paragraphs = [
    "I woke up early and actually felt refreshed for once. The sun was out, the coffee was perfect, and I even got through my inbox before 10am. I’m feeling productive and a little proud of myself. Things just seemed to click today. It’s been a while since I felt this good.",
    
    "Nothing really worked out today. I missed my alarm, spilled coffee on my shirt, and couldn’t focus during work. I don’t even know why I’m so tired, but everything feels harder than it should. I didn’t have the energy to cook, so I just skipped dinner. It’s the kind of day I want to forget.",
    
    "It was just a day. I did some things. Ate lunch. Checked my emails. I can’t really say if it was good or bad—it just was.",
    
    "Things haven’t been easy lately, but I think I’m starting to feel more like myself. I’m still anxious about what’s ahead, but I have a bit more energy. Maybe it’s not all downhill from here. I managed to finish something I’ve been putting off for weeks. That small win felt bigger than I expected.",
    
    "I cried today thinking about everything I’ve lost. But I also called a friend and we talked for hours. There’s still a heaviness in my chest, but it helped. I’m not okay, but I’m trying. Some part of me still believes this will get better.",
    
    "I got a lot done today, but I didn’t enjoy any of it. Everything just felt mechanical. No excitement, no dread, just motion. People kept telling me I seemed calm, but I felt empty. I’m functioning, but I’m not really present.",
    
    "The rain outside was oddly comforting. I made a warm drink, lit a candle, and read for an hour without checking my phone. No one needed anything from me, and I didn’t feel guilty for doing nothing. It wasn’t exciting, but it was peaceful. I hope I can hold onto that stillness.",
    
    "I spent most of the day trying to keep it together. Every little thing felt like a trigger. I smiled through meetings and forced small talk. But as soon as I got home, I just collapsed into silence. I don’t even have the energy to be upset anymore.",
    
    "I’m feeling optimistic for the first time in weeks. My mind is clearer, and I finally have a plan. I don’t know if it’ll all work out, but it feels good to care again. I made a checklist and started chipping away. One small step at a time.",
    
    "I had no expectations for today, and maybe that’s why it turned out okay. Nothing amazing happened, but nothing terrible either. I laughed at a stupid meme, finished my chores, and had dinner with my family. For once, I wasn’t overthinking everything. It felt… normal, in the best way.",

    "I didn’t sleep last night, and I didn’t care. I stayed in bed until noon, staring at the ceiling, trying to feel something. Even getting up to drink water felt pointless. Nothing I do matters anymore, and no one would notice if I disappeared. I don’t see a way forward.",

    "Today was pure joy from start to finish. I laughed until my stomach hurt, surrounded by people I love and who love me back. Every moment felt alive — the colors brighter, the sounds warmer. I caught myself smiling for no reason at all. If I could bottle this feeling, I’d carry it with me forever."
]


In [32]:
for paragraph in paragraphs:
    print(paragraph_to_mood_stats(paragraph))


[4.383473873138428, 4.066745281219482, 4.002580642700195, 3.344392776489258, 3.9587061405181885]
{'sentences': ['I woke up early and actually felt refreshed for once.', 'The sun was out, the coffee was perfect, and I even got through my inbox before 10am.', 'I’m feeling productive and a little proud of myself.', 'Things just seemed to click today.', 'It’s been a while since I felt this good.'], 'moods': [4.383473873138428, 4.066745281219482, 4.002580642700195, 3.344392776489258, 3.9587061405181885], 'mean_polarity': np.float64(0.9512), 'mood_strength': np.float64(1.0094), 'signed_strength': np.float64(1.0094)}
[2.2732977867126465, 2.1263914108276367, 2.2334413528442383, 2.116490364074707, 2.1001601219177246]
{'sentences': ['Nothing really worked out today.', 'I missed my alarm, spilled coffee on my shirt, and couldn’t focus during work.', 'I don’t even know why I’m so tired, but everything feels harder than it should.', 'I didn’t have the energy to cook, so I just skipped dinner.', 'It

In [19]:
test_paragraph = "The day went smoothly — no major issues, nothing urgent. I smiled when people asked how I was doing, and I even cracked a few jokes. Everything looked fine on the outside. But the truth is, I felt disconnected from all of it. I kept wondering why I didn’t feel more present, more alive."

In [None]:
paragraph_to_mood_stats(test_paragraph)