In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [3]:
# load data
train = torch.load('../embeddings/legal-bert-base-uncased/emb_tr_cpu.pkl')
test = torch.load('../embeddings/legal-bert-base-uncased/emb_test_cpu.pkl')
dev = torch.load('../embeddings/legal-bert-base-uncased/emb_dev_cpu.pkl')

In [4]:
# load labels
train_labels = pd.read_pickle('../ECHR_Dataset_Tokenized/legal-bert-base-uncased/df_train_tokenized.pkl')['label']
test_labels = pd.read_pickle('../ECHR_Dataset_Tokenized/legal-bert-base-uncased/df_test_tokenized.pkl')['label']
dev_labels = pd.read_pickle('../ECHR_Dataset_Tokenized/legal-bert-base-uncased/df_dev_tokenized.pkl')['label']


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# train labels to tensor to device
train = [x.to(device) for x in train]
test = [x.to(device) for x in test]
dev = [x.to(device) for x in dev]
train_labels = torch.tensor(train_labels.values).to(device)
test_labels = torch.tensor(test_labels.values).to(device)
dev_labels = torch.tensor(dev_labels.values).to(device)

In [5]:
# create a torch dataset
class ECHRDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


In [6]:
# pad the data to be of the same shape
def pad_data(data, max_len):
    padded_data = []
    attention_masks = []
    for i in range(len(data)):
        attention_masks.append([1] * data[i].shape[0] + [0] * (max_len - data[i].shape[0]))
        padded_data.append(F.pad(data[i], (0, 0, 0, max_len - data[i].shape[0])))
    return torch.stack(padded_data), torch.tensor(attention_masks)


In [7]:
# pad the data
max_len_train = max([x.shape[0] for x in train])
max_len_test = max([x.shape[0] for x in test])
max_len_dev = max([x.shape[0] for x in dev])
train, train_attention_masks = pad_data(train, max_len_train)
test, test_attention_masks = pad_data(test, max_len_test)
dev, dev_attention_masks = pad_data(dev, max_len_dev)

In [8]:
# create the datasets
train_dataset = ECHRDataset(train, train_labels)
test_dataset = ECHRDataset(test, test_labels)
dev_dataset = ECHRDataset(dev, dev_labels)

In [161]:
class AttentionMLP(nn.Module):
    def __init__(self, input_dim, hidden_sizes, dropout=0, weight_decay=0.01):
        super(AttentionMLP, self).__init__()
        # vector for query attention
        self.selector = nn.parameter.Parameter(torch.randn(input_dim, 1))
        self.Value= nn.Linear(input_dim, input_dim, bias=False)
        self.Key = nn.Linear(input_dim, input_dim, bias=False)
        # mlp layers
        layers = []
        for i in range(len(hidden_sizes) - 1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]), weight_decay=weight_decay)
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
        self.mlp = nn.Sequential(*layers)
        self.output = nn.Linear(hidden_sizes[-1], 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # attention
        key = self.Key(x)

        value = self.Value(x)

        non_normalized_attention = torch.matmul(key, self.selector)

        attention = F.softmax(non_normalized_attention, dim=1)
        # permute the attention to match the shape of the value
        attention = attention.permute(0, 2, 1)

        x = torch.matmul(attention, value)

        # mlp
        x = self.mlp(x)
        x = self.output(x)
        x = self.sigmoid(x)
        return x.squeeze()


In [162]:
# create the model
model = AttentionMLP(768, [768, 256, 64])
model = model.to(device)

In [163]:
# create the loss function
criterion = nn.BCELoss()


In [164]:
# create the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [187]:
# create the dataloader for the training set
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
next(iter(train_dataloader))

TypeError: expected Tensor as element 1 in argument 0, but got Series

In [1]:
# train the model
from tqdm import tqdm
for epoch in range(10):
    running_loss = 0.0
    for i, data in tqdm(enumerate(train_dataloader, 0)):
        inputs, labels = data

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i == 100:
            break
    print(f'Epoch {epoch + 1}, loss: {running_loss / len(train_dataloader)}')
    # validate the model
    with torch.no_grad():
        outputs = model(dev)
        loss = criterion(outputs, dev_labels.float())
        print(f'Validation loss: {loss.item()}')
        accuracy = ((outputs > 0.5) == dev_labels).sum().item() / len(dev_labels)
        print(f'Validation accuracy: {accuracy}')


NameError: name 'train_dataloader' is not defined

In [None]:
# test the model
with torch.no_grad():
    outputs = model(test)
    loss = criterion(outputs, test_labels.float())
    print(f'Test loss: {loss.item()}')
    accuracy = ((outputs > 0.5) == test_labels).sum().item() / len(test_labels)
    print(f'Test accuracy: {accuracy}')