In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import torch
torch.manual_seed(0)
torch.use_deterministic_algorithms(False)

import random
random.seed(0)

import numpy as np
np.random.seed(0)


<IPython.core.display.Javascript object>

In [3]:
if torch.cuda.is_available():
    dev = "cuda:1"
else:
    dev = "cpu"
DEVICE = torch.device(dev)

<IPython.core.display.Javascript object>

In [4]:
from pynvml import *


def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")


def print_summary(result):
    print(f"Time: {result.metrics['train_runtime']:.2f}")
    print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
    print_gpu_utilization()

<IPython.core.display.Javascript object>

In [5]:
import torch
from transformers import XLMTokenizer, XLMWithLMHeadModel

# tokenizer = XLMTokenizer.from_pretrained("xlm-mlm-100-1280")
# model = XLMWithLMHeadModel.from_pretrained("xlm-mlm-100-1280")

<IPython.core.display.Javascript object>

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# model = AutoModelForSequenceClassification.from_pretrained("xlm-mlm-100-1280", num_labels=1)

<IPython.core.display.Javascript object>

In [7]:
import torch.nn as nn


class MultiLingualModel(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.device = DEVICE
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_name, num_labels=1
        ).to(DEVICE)

    def forward(self, sentences):
        tokens = torch.tensor(
            [
                self.tokenizer.encode(text, padding="max_length", truncation=True)
                for text in sentences
            ]
        )
        # tokens = self.tokenizer(text, padding="max_length", truncation=True)
        tokens = tokens.to(DEVICE)
        out = self.model(tokens)
        return out, tokens

<IPython.core.display.Javascript object>

In [8]:
model = MultiLingualModel("xlm-mlm-100-1280")

Some weights of the model checkpoint at xlm-mlm-100-1280 were not used when initializing XLMForSequenceClassification: ['pred_layer.proj.weight', 'pred_layer.proj.bias']
- This IS expected if you are initializing XLMForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMForSequenceClassification were not initialized from the model checkpoint at xlm-mlm-100-1280 and are newly initialized: ['sequence_summary.summary.weight', 'transformer.position_ids', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inf

<IPython.core.display.Javascript object>

In [9]:
print_gpu_utilization()

GPU memory occupied: 310 MB.


<IPython.core.display.Javascript object>

In [10]:
out, tokens = model(["Wikipedia was used to", "This is great"])

<IPython.core.display.Javascript object>

In [11]:
from datasets import load_metric


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    rmse = mean_squared_error(labels, predictions, squared=False)
    return {"rmse": rmse}

<IPython.core.display.Javascript object>

In [12]:
import pandas as pd

data = pd.read_csv("data/train.csv")

<IPython.core.display.Javascript object>

In [19]:
def data_loader(data, batch_size=16):
    shuffled_data = data.sample(frac=1, random_state=0).reset_index(drop=True)
    start = 0
    end = start + batch_size
    data_len = len(shuffled_data)
    while start < data_len:
        sub_data = shuffled_data[start:end]
        start += batch_size
        end = min(start + batch_size, data_len)
        yield sub_data["text"].tolist(), torch.tensor(sub_data["label"].tolist())

<IPython.core.display.Javascript object>

In [20]:
train_data = data_loader(data)

<IPython.core.display.Javascript object>

In [12]:
from torch.utils.data import Dataset, DataLoader


class TweetDataset(Dataset):
    def __init__(self, file):
        self.data = pd.read_csv(file)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data.iloc[idx]["text"], self.data.iloc[idx]["label"]

<IPython.core.display.Javascript object>

In [13]:
dataset = TweetDataset("data/train.csv")

<IPython.core.display.Javascript object>

In [15]:
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=0)

<IPython.core.display.Javascript object>

In [16]:
mse_loss = nn.MSELoss()
optimizer = torch.optim.Adam(model.model.parameters(), lr=0.001)

<IPython.core.display.Javascript object>

In [24]:
total_loss = []
for epoch in range(5):
    epoch_loss = []
    for X, y in train_data:
        optimizer.zero_grad()
        y = y.to(DEVICE)
        y_pred = model(list(X))
        print(type(y_pred[0]["logits"]))
        loss = mse_loss(y_pred[0]["logits"], y.float())
        # epoch_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        print_gpu_utilization()

        del X, y, y_pred, loss
        import gc

        gc.collect()
        torch.cuda.empty_cache()
        break
    break
    total_loss.append(sum(epoch_loss) / len(epoch_loss))
    print(total_loss)

OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB (GPU 1; 23.70 GiB total capacity; 21.41 GiB already allocated; 90.31 MiB free; 22.58 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

<IPython.core.display.Javascript object>

In [23]:
import gc

gc.collect()
torch.cuda.empty_cache()

<IPython.core.display.Javascript object>

In [None]:
torch.cuda.is_available()