In [1]:
from transformers import (
    AutoTokenizer,
    DataCollatorWithPadding,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)
from transformers.utils import logging
import evaluate
import numpy as np
import pandas as pd
import torch
import os
from sklearn.metrics import mean_squared_error

os.environ["TOKENIZERS_PARALLELISM"] = "false"

dataset = pd.read_csv("./letterboxd_250movie_reviews_normalized_sampled.csv")
model_path = "google-bert/bert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_path)

id2label = {
    0: "½",
    1: "★",
    2: "★½",
    3: "★★",
    4: "★★½",
    5: "★★★",
    6: "★★★½",
    7: "★★★★",
    8: "★★★★½",
    9: "★★★★★",
}
label2id = {
    "½": 0,
    "★": 1,
    "★½": 2,
    "★★": 3,
    "★★½": 4,
    "★★★": 5,
    "★★★½": 6,
    "★★★★": 7,
    "★★★★½": 8,
    "★★★★★": 9,
}
model = AutoModelForSequenceClassification.from_pretrained(
    model_path,
    num_labels=10,
    id2label=id2label,
    label2id=label2id,
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [3]:
review = "This movie was fantastic! Great acting and amazing plot."
inputs = tokenizer(
    review, return_tensors="pt", padding=True, truncation=True, max_length=1024
)
print("Tokenized input:")
print(f"Input IDs shape: {inputs['input_ids'].shape}")
print(f"Attention mask shape: {inputs['attention_mask'].shape}")

# Make prediction (no gradient calculation needed for inference)
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

print(f"\nRaw logits: {outputs.logits}")
print(f"Probabilities: {predictions}")
print(f"Predicted class: {torch.argmax(predictions, dim=-1).item()}")


Tokenized input:
Input IDs shape: torch.Size([1, 13])
Attention mask shape: torch.Size([1, 13])

Raw logits: tensor([[ 0.1682, -0.7521, -0.0206,  0.5124, -0.3735, -0.2041, -0.7227,  0.2623,
         -0.4108,  0.1616]])
Probabilities: tensor([[0.1255, 0.0500, 0.1039, 0.1770, 0.0730, 0.0865, 0.0515, 0.1378, 0.0703,
         0.1246]])
Predicted class: 3


In [4]:
test_set = pd.read_csv("./data/letterboxd_250movie_reviews_test.csv")

inputs = tokenizer(
    test_set["text"].tolist(),
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=512,
)

with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)


In [6]:
predicted_labels = torch.argmax(predictions, dim=1)
true_labels = torch.from_numpy(
    test_set["label"].to_numpy()
)

In [7]:
accuracy = evaluate.load("accuracy")
acc_result = accuracy.compute(
    predictions=predicted_labels, references=true_labels, normalize=True
)
print("Accuracy:", acc_result['accuracy'])

mse = mean_squared_error(y_true=true_labels, y_pred=predicted_labels)
print("MSE:", mse)

Accuracy: 0.085
MSE: 11.285
