In [1]:
%reload_ext jupyter_black
%reload_ext autoreload
%autoreload 2

In [None]:
import torch
import torch.nn as nn
import numpy as np
import json
from sklearn.metrics import roc_auc_score, accuracy_score
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from lightning.pytorch import seed_everything

from util import MLPRegressor, train
from sentiment import SentimentClassifier

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
with open('data/gemma_train_random_crop_10_50.json') as f:
    train_data = json.load(f)

with open('data/gemma_test_random_crop_10_50.json') as f:
    test_data = json.load(f)

X_train = np.array(train_data['mean'], dtype=np.float32)
y_train = np.array(train_data['label'], dtype=np.float32)
X_test = np.array(test_data['mean'], dtype=np.float32)
y_test = np.array(test_data['label'], dtype=np.float32)

print(f'Train: {X_train.shape}, Test: {X_test.shape}')

Train: (25000, 768), Test: (25000, 768)


In [None]:
X_train_t = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_t = torch.tensor(y_train, dtype=torch.float32, device=device)
X_test_t = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_t = torch.tensor(y_test, dtype=torch.float32, device=device)

generator = torch.Generator().manual_seed(42)
train_loader = DataLoader(TensorDataset(X_train_t, y_train_t), batch_size=32, shuffle=True, generator=generator)
test_loader = DataLoader(TensorDataset(X_test_t, y_test_t), batch_size=1024)

In [None]:
seed_everything(42)

model = MLPRegressor(input_size=X_train.shape[1], hidden_size=10).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=2e-3)
criterion = nn.BCEWithLogitsLoss()

df = train(
    model=model,
    train_loader=train_loader,
    optimizer=optimizer,
    criterion=criterion,
    epochs=1,
    val_loader=test_loader,
    score_fns={
        'roc_auc': lambda y, p: roc_auc_score(y, p),
        'accuracy': lambda y, p: accuracy_score(y, p > 0.5),
    },
    output_fn=torch.sigmoid,
)
df

In [None]:
seed_everything(42)

model = MLPRegressor(input_size=X_train.shape[1], hidden_size=10).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=2e-3)
criterion = nn.BCEWithLogitsLoss()

df = train(
    model=model,
    train_loader=train_loader,
    optimizer=optimizer,
    criterion=criterion,
    epochs=1,
    val_loader=test_loader,
    score_fns={
        'roc_auc': lambda y, p: roc_auc_score(y, p),
        'accuracy': lambda y, p: accuracy_score(y, p > 0.5),
    },
    output_fn=torch.sigmoid,
)
df

Seed set to 42
Seed set to 42
100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


Unnamed: 0,epoch,train_loss,val_loss,roc_auc,accuracy
0,1,0.45079,0.432744,0.880027,0.79112


In [10]:
torch.save(model.state_dict(), 'data/sentiment_head.pt')
print('Saved model to data/sentiment_head.pt')

Saved model to data/sentiment_head.pt


In [11]:
classifier = SentimentClassifier(
    model_name='google/embeddinggemma-300m',
    hidden_size=10,
    device=device,
)
classifier.load_head('data/sentiment_head.pt')
print('Loaded combined model')

Loaded combined model


In [None]:
test_texts = [
    'This movie was absolutely fantastic! I loved every minute of it.',
    'Terrible film. Waste of time and money. Do not recommend.',
    'The acting was superb and the plot kept me engaged throughout.',
    'Boring and predictable. I fell asleep halfway through.',
    'A masterpiece of cinema. One of the best films I have ever seen.',
    'Awful movie with terrible acting and a nonsensical plot.',
    'Could have '
]

predictions = classifier.predict(test_texts)

for text, pred in zip(test_texts, predictions):
    sentiment = 'positive' if pred > 0.5 else 'negative'
    print(f'{pred:.3f} ({sentiment}): {text[:60]}...')

0.999 (positive): This movie was absolutely fantastic! I loved every minute of...
0.001 (negative): Terrible film. Waste of time and money. Do not recommend....
0.999 (positive): The acting was superb and the plot kept me engaged throughou...
0.002 (negative): Boring and predictable. I fell asleep halfway through....
0.999 (positive): A masterpiece of cinema. One of the best films I have ever s...
0.000 (negative): Awful movie with terrible acting and a nonsensical plot....


In [None]:
import datasets

imdb = datasets.load_dataset('imdb')
test_subset = imdb['test'].select(range(500))

batch_size = 64
all_preds = []

for i in tqdm(range(0, len(test_subset), batch_size)):
    batch_texts = test_subset['text'][i:i + batch_size]
    preds = classifier.predict(batch_texts).cpu().numpy()
    all_preds.extend(preds)

all_preds = np.array(all_preds)
all_labels = np.array(test_subset['label'])

print(f'ROC AUC: {roc_auc_score(all_labels, all_preds):.4f}')
print(f'Accuracy: {accuracy_score(all_labels, all_preds > 0.5):.4f}')