In [2]:
%reload_ext jupyter_black
%reload_ext autoreload
%autoreload 2

In [34]:
import torch
import torch.nn as nn
import numpy as np
import json
from sklearn.metrics import roc_auc_score, accuracy_score
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from lightning.pytorch import seed_everything
import datasets
from util import MLPRegressor, train
from sentiment import SentimentClassifier
import itertools

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
with open('data/gemma_train_random_crop_10_50.json') as f:
    train_data = json.load(f)

with open('data/gemma_test_random_crop_10_50.json') as f:
    test_data = json.load(f)

X_train = np.array(train_data['mean'], dtype=np.float32)
y_train = np.array(train_data['label'], dtype=np.float32)
X_test = np.array(test_data['mean'], dtype=np.float32)
y_test = np.array(test_data['label'], dtype=np.float32)

print(f'Train: {X_train.shape}, Test: {X_test.shape}')

Train: (25000, 768), Test: (25000, 768)


In [5]:
X_train_t = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_t = torch.tensor(y_train, dtype=torch.float32, device=device)
X_test_t = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_t = torch.tensor(y_test, dtype=torch.float32, device=device)

generator = torch.Generator().manual_seed(42)
train_loader = DataLoader(
    TensorDataset(X_train_t, y_train_t), batch_size=32, shuffle=True, generator=generator
)
test_loader = DataLoader(TensorDataset(X_test_t, y_test_t), batch_size=1024)

In [12]:
seed_everything(42)

model = MLPRegressor(input_size=X_train.shape[1], hidden_size=10).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3)
criterion = nn.BCEWithLogitsLoss()

df = train(
    model=model,
    train_loader=train_loader,
    optimizer=optimizer,
    criterion=criterion,
    epochs=1,
    val_loader=test_loader,
    score_fns={
        'roc_auc': lambda y, p: roc_auc_score(y, p),
        'accuracy': lambda y, p: accuracy_score(y, p > 0.5),
    },
    output_fn=torch.sigmoid,
)
df

Seed set to 42
Seed set to 42
Epoch 1/1:   0%|          | 0/782 [00:00<?, ?it/s, loss=0.6600]

Epoch 1/1: 100%|██████████| 782/782 [00:03<00:00, 234.30it/s, loss=0.1616]


Epoch 1/1  train_loss=0.4541  val_loss=0.4342  roc_auc=0.8810  accuracy=0.7910


Unnamed: 0,epoch,train_loss,val_loss,roc_auc,accuracy
0,1,0.454086,0.434186,0.880964,0.791


In [13]:
torch.save(model.state_dict(), 'data/sentiment_head.pt')
print('Saved model to data/sentiment_head.pt')

Saved model to data/sentiment_head.pt


In [25]:
classifier = SentimentClassifier(
    model_name='google/embeddinggemma-300m',
    hidden_size=10,
    device=device,
)
classifier.load_head('data/sentiment_head.pt')
print('Loaded combined model')
assert sum(p.requires_grad for p in classifier.parameters()) == 0

Loaded combined model


# Check on given inputs

In [26]:
test_texts = [
    'This movie was absolutely fantastic! I loved every minute of it.',
    'Terrible film. Waste of time and money. Do not recommend.',
    'The acting was superb and the plot kept me engaged throughout.',
    'Boring and predictable. I fell asleep halfway through.',
    'A masterpiece of cinema. One of the best films I have ever seen.',
    'Awful movie with terrible acting and a nonsensical plot.',
    'I don\'t know, quite average movie',
    'Not to my taste, but others might like it',
    'A good enough movie for an evening',
]

predictions = classifier.predict(test_texts)

for text, pred in zip(test_texts, predictions):
    sentiment = 'positive' if pred > 0.5 else 'negative'
    print(f'{pred:.3f} ({sentiment}): {text[:60]}...')

0.999 (positive): This movie was absolutely fantastic! I loved every minute of...
0.000 (negative): Terrible film. Waste of time and money. Do not recommend....
0.997 (positive): The acting was superb and the plot kept me engaged throughou...
0.000 (negative): Boring and predictable. I fell asleep halfway through....
0.997 (positive): A masterpiece of cinema. One of the best films I have ever s...
0.000 (negative): Awful movie with terrible acting and a nonsensical plot....
0.324 (negative): I don't know, quite average movie...
0.487 (negative): Not to my taste, but others might like it...
0.721 (positive): A good enough movie for an evening...


# Final check

In [30]:
imdb = datasets.load_dataset('imdb')
test_subset = imdb['test'].shuffle(seed=42).select(range(500))

batch_size = 64
all_preds = []

for i in tqdm(range(0, len(test_subset), batch_size)):
    batch_texts = test_subset['text'][i : i + batch_size]
    preds = classifier.predict(batch_texts).cpu().numpy()
    all_preds.extend(preds)

all_preds = np.array(all_preds)
all_labels = np.array(test_subset['label'])

print(f'ROC AUC: {roc_auc_score(all_labels, all_preds):.4f}')
print(f'Accuracy: {accuracy_score(all_labels, all_preds > 0.5):.4f}')

100%|██████████| 8/8 [00:08<00:00,  1.10s/it]

ROC AUC: 0.9764
Accuracy: 0.9220





# Study model mistakes

In [53]:
error_indices = np.where((all_labels - all_preds) > 0.6)[0]
errors = test_subset.select(error_indices)
error_predictions = all_preds[error_indices]
print(
    '\n\n'.join(
        f'real: {label}, predicted: {pred}\n{text}'
        for label, text, pred in itertools.islice(
            zip(
                errors['label'],
                errors['text'],
                error_predictions,
            ),
            10,
        )
    ),
)

real: 1, predicted: 0.3447573482990265
i was having a horrid day but this movie grabbed me, and i couldn't put it down until the end... and i had forgotten about my horrid day. and the ending... by the way... where is the sequel!!!<br /><br />the budget is obviously extremely low... but ... look what they did with it! it reminds me of a play... they are basically working with a tent, a 'escape pod', a few guns, uniforms, camping gear, and a 'scanner' thing. that is it for props. Maybe this is even a good thing, forcing the acting and writing to have to step up and take their rightful place in film, as the centers of the work, instead of as afterthoughts used to have an excuse to make CGI fights (starwars).<br /><br />The cgi is fine. It is not exactly 'seamless'... but imho it still works. why? because there isn't too much of it, and what there is, is not 'taking over' with an army of effects house people trying to cram everything they can into the shot. it prompts the imagination... i