# Task 4
This serves as a template which will guide you through the implementation of this task. It is advised to first read the whole template and get a sense of the overall structure of the code before trying to fill in any of the TODO gaps.
This is the jupyter notebook version of the template. For the python file version, please refer to the file `template_solution.py`.

First, we import necessary libraries:

In [12]:
import numpy as np
import pandas as pd
import torch
from datasets import Dataset, DatasetDict
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    EarlyStoppingCallback,
    Trainer,
    TrainingArguments,
)
# Add any other imports you need here

Depending on your approach, you might need to adapt the structure of this template or parts not marked by TODOs.
It is not necessary to completely follow this template. Feel free to add more code and delete any parts that are not required.

In [13]:
def clear_cache():
    if torch.backends.mps.is_available():
        torch.mps.empty_cache()

In [19]:
# DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 128  # TODO: Set the batch size according to both training performance and available memory
NUM_EPOCHS = 10  # TODO: Set the number of epochs

# Load test data
test_val = pd.read_csv("test_no_score.csv")
test_val["text"] = test_val["title"] + " " + test_val["sentence"]
test_val = test_val[["text"]]


# Load train data
train_val = pd.read_csv("train.csv")

train_val["text"] = train_val["title"] + " " + train_val["sentence"]
train_val["label"] = train_val["score"]
train_val = train_val[["text", "label"]]

# Load datasets
train_dataset = Dataset.from_pandas(train_val)
test_dataset = Dataset.from_pandas(test_val)

train_test_split = train_dataset.train_test_split(test_size=0.1)
dataset = DatasetDict(
    {"train": train_test_split["train"], "validation": train_test_split["test"]}
)
test_dataset = DatasetDict({"test": test_dataset})


print(dataset["train"][0])

{'text': "Why I Bought This CD Forgotten Dreams by David Pietro was such a strong CD that I've ordered this latest CD without reading any reviews. I know I'll love it!", 'label': 9.377984268162235}


In [21]:
# Tokenize
tokenizer = AutoTokenizer.from_pretrained("roberta-base")


def tokenizer_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)


tokenized_dataset = dataset.map(tokenizer_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenizer_function)
print(tokenized_dataset["train"][0])

model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=1)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=NUM_EPOCHS,
    weight_decay=0.01,
    logging_steps=10,
    logging_first_step=True,
    gradient_accumulation_steps=4,
    load_best_model_at_end=True,
)


def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.squeeze()
    mse = mean_squared_error(labels, preds)
    return {"mse": mse}


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

Map:   0%|          | 0/11250 [00:00<?, ? examples/s]

Map:   0%|          | 0/1250 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

{'text': "Why I Bought This CD Forgotten Dreams by David Pietro was such a strong CD that I've ordered this latest CD without reading any reviews. I know I'll love it!", 'label': 9.377984268162235, 'input_ids': [0, 7608, 38, 45192, 152, 7522, 43799, 26843, 30, 871, 20964, 1001, 21, 215, 10, 670, 7522, 14, 38, 348, 2740, 42, 665, 7522, 396, 2600, 143, 6173, 4, 38, 216, 38, 581, 657, 24, 328, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
trainer.train(resume_from_checkpoint=True)

model.save_pretrained("./saved_model")
tokenizer.save_pretrained("./saved_model")

  0%|          | 0/7030 [00:00<?, ?it/s]

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 44.9809, 'learning_rate': 1.999715504978663e-05, 'epoch': 0.0}
{'loss': 29.3147, 'learning_rate': 1.997155049786629e-05, 'epoch': 0.01}
{'loss': 16.1213, 'learning_rate': 1.9943100995732575e-05, 'epoch': 0.03}
{'loss': 11.4306, 'learning_rate': 1.9914651493598865e-05, 'epoch': 0.04}
{'loss': 10.072, 'learning_rate': 1.9886201991465152e-05, 'epoch': 0.06}
{'loss': 10.9514, 'learning_rate': 1.985775248933144e-05, 'epoch': 0.07}
{'loss': 10.5677, 'learning_rate': 1.9829302987197725e-05, 'epoch': 0.09}
{'loss': 7.5098, 'learning_rate': 1.9800853485064012e-05, 'epoch': 0.1}
{'loss': 4.3041, 'learning_rate': 1.97724039829303e-05, 'epoch': 0.11}
{'loss': 4.5812, 'learning_rate': 1.974395448079659e-05, 'epoch': 0.13}
{'loss': 4.2453, 'learning_rate': 1.9715504978662876e-05, 'epoch': 0.14}
{'loss': 3.912, 'learning_rate': 1.9687055476529162e-05, 'epoch': 0.16}
{'loss': 3.2101, 'learning_rate': 1.965860597439545e-05, 'epoch': 0.17}
{'loss': 3.8125, 'learning_rate': 1.9630156472261736e-0

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.9705937504768372, 'eval_mse': 0.9705937504768372, 'eval_runtime': 6.2099, 'eval_samples_per_second': 201.292, 'eval_steps_per_second': 50.404, 'epoch': 1.0}
{'loss': 1.0648, 'learning_rate': 1.7980085348506404e-05, 'epoch': 1.01}
{'loss': 1.3022, 'learning_rate': 1.795163584637269e-05, 'epoch': 1.02}
{'loss': 0.8315, 'learning_rate': 1.7923186344238977e-05, 'epoch': 1.04}
{'loss': 0.7366, 'learning_rate': 1.7894736842105264e-05, 'epoch': 1.05}
{'loss': 0.733, 'learning_rate': 1.7866287339971554e-05, 'epoch': 1.07}
{'loss': 0.9304, 'learning_rate': 1.783783783783784e-05, 'epoch': 1.08}
{'loss': 0.7479, 'learning_rate': 1.7809388335704127e-05, 'epoch': 1.09}
{'loss': 0.7796, 'learning_rate': 1.7780938833570414e-05, 'epoch': 1.11}
{'loss': 0.748, 'learning_rate': 1.77524893314367e-05, 'epoch': 1.12}
{'loss': 0.8372, 'learning_rate': 1.7724039829302988e-05, 'epoch': 1.14}
{'loss': 0.8044, 'learning_rate': 1.7695590327169278e-05, 'epoch': 1.15}
{'loss': 0.7121, 'learning_rat

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 1.2482601404190063, 'eval_mse': 1.2482601404190063, 'eval_runtime': 6.1496, 'eval_samples_per_second': 203.267, 'eval_steps_per_second': 50.898, 'epoch': 2.0}
{'loss': 0.9289, 'learning_rate': 1.5988620199146515e-05, 'epoch': 2.0}
{'loss': 0.5855, 'learning_rate': 1.5960170697012805e-05, 'epoch': 2.02}
{'loss': 0.5798, 'learning_rate': 1.5931721194879092e-05, 'epoch': 2.03}
{'loss': 0.3549, 'learning_rate': 1.590327169274538e-05, 'epoch': 2.05}
{'loss': 0.6628, 'learning_rate': 1.5874822190611666e-05, 'epoch': 2.06}
{'loss': 0.4725, 'learning_rate': 1.5846372688477952e-05, 'epoch': 2.08}
{'loss': 0.4819, 'learning_rate': 1.581792318634424e-05, 'epoch': 2.09}
{'loss': 0.4643, 'learning_rate': 1.578947368421053e-05, 'epoch': 2.1}
{'loss': 0.7758, 'learning_rate': 1.5761024182076813e-05, 'epoch': 2.12}
{'loss': 0.4207, 'learning_rate': 1.5732574679943103e-05, 'epoch': 2.13}
{'loss': 0.518, 'learning_rate': 1.570412517780939e-05, 'epoch': 2.15}
{'loss': 0.5543, 'learning_rate

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.9230279326438904, 'eval_mse': 0.9230280518531799, 'eval_runtime': 6.3944, 'eval_samples_per_second': 195.484, 'eval_steps_per_second': 48.949, 'epoch': 3.0}
{'loss': 0.6261, 'learning_rate': 1.399715504978663e-05, 'epoch': 3.0}
{'loss': 0.3093, 'learning_rate': 1.3968705547652917e-05, 'epoch': 3.01}
{'loss': 0.3707, 'learning_rate': 1.3940256045519206e-05, 'epoch': 3.03}
{'loss': 0.2882, 'learning_rate': 1.391180654338549e-05, 'epoch': 3.04}
{'loss': 0.3711, 'learning_rate': 1.3883357041251779e-05, 'epoch': 3.06}
{'loss': 0.2187, 'learning_rate': 1.3854907539118068e-05, 'epoch': 3.07}
{'loss': 0.2688, 'learning_rate': 1.3826458036984354e-05, 'epoch': 3.09}
{'loss': 0.2638, 'learning_rate': 1.3798008534850643e-05, 'epoch': 3.1}
{'loss': 0.349, 'learning_rate': 1.3769559032716928e-05, 'epoch': 3.11}
{'loss': 0.3582, 'learning_rate': 1.3741109530583216e-05, 'epoch': 3.13}
{'loss': 0.3476, 'learning_rate': 1.3712660028449503e-05, 'epoch': 3.14}
{'loss': 0.3637, 'learning_ra

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.6894272565841675, 'eval_mse': 0.6894272565841675, 'eval_runtime': 6.3453, 'eval_samples_per_second': 196.996, 'eval_steps_per_second': 49.328, 'epoch': 4.0}
{'loss': 0.4294, 'learning_rate': 1.197724039829303e-05, 'epoch': 4.01}
{'loss': 0.2225, 'learning_rate': 1.1948790896159319e-05, 'epoch': 4.02}
{'loss': 0.3143, 'learning_rate': 1.1920341394025606e-05, 'epoch': 4.04}
{'loss': 0.2133, 'learning_rate': 1.1891891891891894e-05, 'epoch': 4.05}
{'loss': 0.2305, 'learning_rate': 1.186344238975818e-05, 'epoch': 4.07}
{'loss': 0.2308, 'learning_rate': 1.1834992887624468e-05, 'epoch': 4.08}
{'loss': 0.5113, 'learning_rate': 1.1806543385490754e-05, 'epoch': 4.1}
{'loss': 0.2276, 'learning_rate': 1.1778093883357043e-05, 'epoch': 4.11}
{'loss': 0.2265, 'learning_rate': 1.1749644381223328e-05, 'epoch': 4.12}
{'loss': 0.3146, 'learning_rate': 1.1721194879089616e-05, 'epoch': 4.14}
{'loss': 0.2435, 'learning_rate': 1.1692745376955905e-05, 'epoch': 4.15}
{'loss': 0.2519, 'learning_

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.7305812239646912, 'eval_mse': 0.7305812239646912, 'eval_runtime': 6.5234, 'eval_samples_per_second': 191.618, 'eval_steps_per_second': 47.981, 'epoch': 5.0}
{'loss': 0.1769, 'learning_rate': 9.985775248933144e-06, 'epoch': 5.01}
{'loss': 0.1822, 'learning_rate': 9.957325746799433e-06, 'epoch': 5.02}
{'loss': 0.3009, 'learning_rate': 9.92887624466572e-06, 'epoch': 5.03}
{'loss': 0.1338, 'learning_rate': 9.900426742532006e-06, 'epoch': 5.05}
{'loss': 0.8599, 'learning_rate': 9.871977240398294e-06, 'epoch': 5.06}
{'loss': 0.2147, 'learning_rate': 9.843527738264581e-06, 'epoch': 5.08}
{'loss': 0.2246, 'learning_rate': 9.815078236130868e-06, 'epoch': 5.09}
{'loss': 0.18, 'learning_rate': 9.786628733997155e-06, 'epoch': 5.1}
{'loss': 0.1627, 'learning_rate': 9.758179231863443e-06, 'epoch': 5.12}
{'loss': 0.1307, 'learning_rate': 9.729729729729732e-06, 'epoch': 5.13}
{'loss': 0.2134, 'learning_rate': 9.701280227596018e-06, 'epoch': 5.15}
{'loss': 0.2308, 'learning_rate': 9.672

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.6793612837791443, 'eval_mse': 0.6793612837791443, 'eval_runtime': 6.1192, 'eval_samples_per_second': 204.274, 'eval_steps_per_second': 51.15, 'epoch': 6.0}
{'loss': 0.1643, 'learning_rate': 7.994310099573258e-06, 'epoch': 6.0}
{'loss': 0.0884, 'learning_rate': 7.965860597439546e-06, 'epoch': 6.01}
{'loss': 0.1612, 'learning_rate': 7.937411095305833e-06, 'epoch': 6.03}
{'loss': 0.1737, 'learning_rate': 7.90896159317212e-06, 'epoch': 6.04}
{'loss': 0.1787, 'learning_rate': 7.880512091038406e-06, 'epoch': 6.06}
{'loss': 0.1415, 'learning_rate': 7.852062588904695e-06, 'epoch': 6.07}
{'loss': 0.1986, 'learning_rate': 7.823613086770983e-06, 'epoch': 6.09}
{'loss': 0.1845, 'learning_rate': 7.79516358463727e-06, 'epoch': 6.1}
{'loss': 0.1197, 'learning_rate': 7.766714082503557e-06, 'epoch': 6.11}
{'loss': 0.1315, 'learning_rate': 7.738264580369845e-06, 'epoch': 6.13}
{'loss': 0.1108, 'learning_rate': 7.709815078236132e-06, 'epoch': 6.14}
{'loss': 0.1664, 'learning_rate': 7.6813

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.6658053994178772, 'eval_mse': 0.6658053994178772, 'eval_runtime': 6.1071, 'eval_samples_per_second': 204.679, 'eval_steps_per_second': 51.252, 'epoch': 7.0}
{'loss': 0.1156, 'learning_rate': 5.9743954480796596e-06, 'epoch': 7.01}
{'loss': 0.1695, 'learning_rate': 5.945945945945947e-06, 'epoch': 7.02}
{'loss': 0.1184, 'learning_rate': 5.917496443812234e-06, 'epoch': 7.04}
{'loss': 0.1715, 'learning_rate': 5.8890469416785214e-06, 'epoch': 7.05}
{'loss': 0.1335, 'learning_rate': 5.860597439544808e-06, 'epoch': 7.07}
{'loss': 0.1356, 'learning_rate': 5.832147937411096e-06, 'epoch': 7.08}
{'loss': 0.1227, 'learning_rate': 5.8036984352773825e-06, 'epoch': 7.1}
{'loss': 0.1152, 'learning_rate': 5.77524893314367e-06, 'epoch': 7.11}
{'loss': 0.17, 'learning_rate': 5.746799431009958e-06, 'epoch': 7.12}
{'loss': 0.1372, 'learning_rate': 5.718349928876245e-06, 'epoch': 7.14}
{'loss': 0.1118, 'learning_rate': 5.689900426742533e-06, 'epoch': 7.15}
{'loss': 0.1598, 'learning_rate': 5.

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.641223132610321, 'eval_mse': 0.641223132610321, 'eval_runtime': 5.8559, 'eval_samples_per_second': 213.459, 'eval_steps_per_second': 53.45, 'epoch': 8.0}
{'loss': 0.1003, 'learning_rate': 3.982930298719773e-06, 'epoch': 8.01}
{'loss': 0.5245, 'learning_rate': 3.95448079658606e-06, 'epoch': 8.02}
{'loss': 0.1286, 'learning_rate': 3.926031294452347e-06, 'epoch': 8.03}
{'loss': 0.1015, 'learning_rate': 3.897581792318635e-06, 'epoch': 8.05}
{'loss': 0.0785, 'learning_rate': 3.8691322901849225e-06, 'epoch': 8.06}
{'loss': 0.1012, 'learning_rate': 3.840682788051209e-06, 'epoch': 8.08}
{'loss': 0.0999, 'learning_rate': 3.8122332859174964e-06, 'epoch': 8.09}
{'loss': 0.1034, 'learning_rate': 3.7837837837837844e-06, 'epoch': 8.11}
{'loss': 0.1081, 'learning_rate': 3.7553342816500715e-06, 'epoch': 8.12}
{'loss': 0.1103, 'learning_rate': 3.7268847795163587e-06, 'epoch': 8.13}
{'loss': 0.0945, 'learning_rate': 3.698435277382646e-06, 'epoch': 8.15}
{'loss': 0.1042, 'learning_rate': 

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.6002613306045532, 'eval_mse': 0.6002613306045532, 'eval_runtime': 6.048, 'eval_samples_per_second': 206.681, 'eval_steps_per_second': 51.753, 'epoch': 9.0}
{'loss': 0.108, 'learning_rate': 1.9914651493598865e-06, 'epoch': 9.0}
{'loss': 0.0836, 'learning_rate': 1.9630156472261737e-06, 'epoch': 9.02}
{'loss': 0.0768, 'learning_rate': 1.9345661450924613e-06, 'epoch': 9.03}
{'loss': 0.0833, 'learning_rate': 1.9061166429587482e-06, 'epoch': 9.04}
{'loss': 0.0863, 'learning_rate': 1.8776671408250358e-06, 'epoch': 9.06}
{'loss': 0.0815, 'learning_rate': 1.849217638691323e-06, 'epoch': 9.07}
{'loss': 0.0875, 'learning_rate': 1.8207681365576105e-06, 'epoch': 9.09}
{'loss': 0.0768, 'learning_rate': 1.7923186344238977e-06, 'epoch': 9.1}
{'loss': 0.0844, 'learning_rate': 1.7638691322901852e-06, 'epoch': 9.11}
{'loss': 0.0809, 'learning_rate': 1.7354196301564724e-06, 'epoch': 9.13}
{'loss': 0.1419, 'learning_rate': 1.7069701280227595e-06, 'epoch': 9.14}
{'loss': 0.1005, 'learning_ra

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.6054350137710571, 'eval_mse': 0.6054350137710571, 'eval_runtime': 6.0903, 'eval_samples_per_second': 205.243, 'eval_steps_per_second': 51.393, 'epoch': 10.0}
{'train_runtime': 1827.4649, 'train_samples_per_second': 61.561, 'train_steps_per_second': 3.847, 'train_loss': 0.5604167800794115, 'epoch': 10.0}


('./saved_model/tokenizer_config.json',
 './saved_model/special_tokens_map.json',
 './saved_model/vocab.json',
 './saved_model/merges.txt',
 './saved_model/added_tokens.json',
 './saved_model/tokenizer.json')

In [29]:
# Do predictions
predictions = trainer.predict(tokenized_test_dataset["test"])
pred_scores = predictions.predictions.squeeze()

with open("results.txt", "w") as f:
    for val in pred_scores:
        f.write(f"{val}\n")

print("Saved results")

  0%|          | 0/250 [00:00<?, ?it/s]

Saved results


In [None]:
# model.eval()
# with torch.no_grad():
#     results = []
#     for batch in tqdm(test_loader, total=len(test_loader)):
#         batch = batch.to(DEVICE)

#         # TODO: Set up evaluation loop

#     with open("result.txt", "w") as f:
#         for val in np.concatenate(results):
#             f.write(f"{val}\n")

NameError: name 'torch' is not defined