In [1]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.9.2-py3-none-any.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 8.3 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 64.0 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 60.6 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 59.7 MB/s 
Collecting huggingface-hub==0.0.12
  Downloading huggingface_hub-0.0.12-py3-none-any.whl (37 kB)
Installing collected packages: tokenizers, sacremoses, pyyaml, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninsta

In [2]:
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

from transformers import BertModel, BertTokenizer


In [29]:
# Concatenate data
df_train = pd.read_csv("train.csv", header=None, names=["lyrics", "quadrant"], skiprows=1)
df_valid = pd.read_csv("validation.csv", header=None, names=["lyrics", "quadrant"], skiprows=1)
df_test = pd.read_csv("test.csv", header=None, names=["lyrics", "quadrant"], skiprows=1)

frames = [df_train, df_valid, df_test]

df_combined = pd.concat(frames, ignore_index=True)

print(df_combined.shape)


(17427, 2)


In [30]:
df_combined["quadrant"] = pd.to_numeric(df_combined["quadrant"], )

In [31]:
df_combined.head(10)

Unnamed: 0,lyrics,quadrant
0,Gently hold our hands\nGently hold our heads o...,1
1,We are the Sun\nWe are the dead stars\nWe are ...,1
2,You're out of touch\nI'm out of time\nBut I'm ...,0
3,You finally close the door\nYou've left open w...,0
4,随分先に行ってしまった 光の下のキャラバン\nトンネルに残響 塞いだ耳 自分嫌いな自分が好き...,3
5,Lullaby by birdland that's what I\nAlways hear...,0
6,It's the wood man and his splintering self.\nI...,1
7,"Dying men keep rifles in cello cases,\nKnown w...",1
8,"Hey, hey, hey, hey, hey, hey, hey, hey\nHey, h...",0
9,Oooh oooh\nHmmmmh\n\nIt's been the longest win...,1


In [48]:
# Hyper Parameters
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 1e-05
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [49]:
# Custom Dataset
class LyricsDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):

        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __getitem__(self, idx):
        lyrics = self.data.lyrics[idx]
        lyrics = "".join("".join(lyrics).replace("\n", " ").replace("\r", " "))
        inputs = self.tokenizer.encode_plus(
            lyrics,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding="max_length",
            return_token_type_ids=True,
            return_attention_mask=True,
            truncation=True,
            return_tensors='pt'
        )
        input_ids = inputs["input_ids"]
        attention_masks = inputs["attention_mask"]

        return {
            "input_ids": input_ids.flatten(),
            "attention_mask": attention_masks.flatten(),
            "labels": torch.tensor(self.data.quadrant[idx], dtype=torch.long)
        }

    def __len__(self):
        return len(self.data)

In [50]:
# Create Datasets
trainsize = 0.8
trainset = df_combined.sample(frac=trainsize, random_state=42)
testset = df_combined.drop(trainset.index).reset_index(drop=True)
trainset = trainset.reset_index(drop=True)

print(f"Full Dataset {df_combined.shape}\n"\
      f"Train Dataset {trainset.shape}\n"\
      f"Test Dataset {testset.shape}")

trainset = LyricsDataset(trainset, tokenizer, MAX_LEN)
testset = LyricsDataset(testset, tokenizer, MAX_LEN)

Full Dataset (17427, 2)
Train Dataset (13942, 2)
Test Dataset (3485, 2)


In [51]:
# Create Dataloaders
parameters = {
    "batch_size": BATCH_SIZE,
    "shuffle": True,
    "num_workers": 0
}

trainloader = DataLoader(trainset, **parameters)
testloader = DataLoader(testset, **parameters)

In [52]:
next(iter(trainloader))

{'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 1, 1, 1],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]]),
 'input_ids': tensor([[  101,   146,   112,  ...,     0,     0,     0],
         [  101,  2119,  1440,  ...,     0,     0,     0],
         [  101,  1153,  1274,  ...,  4403,   117,   102],
         ...,
         [  101, 10065,  1283,  ...,     0,     0,     0],
         [  101,  2777,  1156,  ...,     0,     0,     0],
         [  101,  1135,   112,  ...,     0,     0,     0]]),
 'labels': tensor([0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0])}

In [53]:
# Create Model for Finetuning
class BERTModel(nn.Module):
    def __init__(self):
        super(BERTModel, self).__init__()

        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.fc1 = nn.Linear(768, 768)
        self.dropout = nn.Dropout(p=0.2)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(768, 4)

    def forward(self, input_ids, attention_mask):
        x = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        x = self.fc1(x.pooler_output)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)

        return x

In [54]:
model = BERTModel()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
  

In [55]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [56]:
# Fine tuning
def calcuate_accu(big_idx, targets):
    n_correct = (big_idx==targets).sum().item()
    return n_correct

In [57]:
# Training the Model
def train(epoch):
    tr_loss = 0
    n_correct = 0
    nb_tr_steps = 0
    nb_tr_examples = 0

    model.train()
    for _, batch in enumerate(trainloader, 0):

        optimizer.zero_grad()

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs, labels)
        tr_loss += loss.item()
        big_val, big_idx = torch.max(outputs.data, dim=1)
        n_correct += calcuate_accu(big_idx, labels)

        nb_tr_steps += 1
        nb_tr_examples+=labels.size(0)

        if _%5000==0:
            loss_step = tr_loss/nb_tr_steps
            accu_step = (n_correct*100)/nb_tr_examples
            print(f"Training Loss per 5000 steps: {loss_step}")
            print(f"Training Accuracy per 5000 steps: {accu_step}")

        loss.backward()
        optimizer.step()
        scheduler.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: {epoch_accu}")

    return

In [58]:
for epoch in range(EPOCHS):
    train(epoch)

Training Loss per 5000 steps: 1.4186981916427612
Training Accuracy per 5000 steps: 12.5
The Total Accuracy for Epoch 0: 29.077607229952662
Training Loss Epoch: 1.3754309737080828
Training Accuracy Epoch: 29.077607229952662
Training Loss per 5000 steps: 1.3771473169326782
Training Accuracy per 5000 steps: 31.25
The Total Accuracy for Epoch 1: 29.321474680820543
Training Loss Epoch: 1.3751139847237035
Training Accuracy Epoch: 29.321474680820543
Training Loss per 5000 steps: 1.4106074571609497
Training Accuracy per 5000 steps: 18.75
The Total Accuracy for Epoch 2: 28.754841486156934
Training Loss Epoch: 1.3752816729589339
Training Accuracy Epoch: 28.754841486156934
Training Loss per 5000 steps: 1.3547313213348389
Training Accuracy per 5000 steps: 31.25
The Total Accuracy for Epoch 3: 30.01004160091809
Training Loss Epoch: 1.3751476647656993
Training Accuracy Epoch: 30.01004160091809
Training Loss per 5000 steps: 1.3928860425949097
Training Accuracy per 5000 steps: 31.25
The Total Accuracy

In [61]:
# Validatin the Model
def valid(model, testloader):
    tr_loss = 0
    n_correct = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    model.eval()
    n_correct = 0; n_wrong = 0; total = 0
    with torch.no_grad():
        for _, batch in enumerate(testloader, 0):

            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)

            tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.data, dim=1)
            n_correct += calcuate_accu(big_idx, labels)

            nb_tr_steps += 1
            nb_tr_examples+=labels.size(0)

            if _%5000==0:
                loss_step = tr_loss/nb_tr_steps
                accu_step = (n_correct*100)/nb_tr_examples
                print(f"Validation Loss per 100 steps: {loss_step}")
                print(f"Validation Accuracy per 100 steps: {accu_step}")

    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Validation Loss Epoch: {epoch_loss}")
    print(f"Validation Accuracy Epoch: {epoch_accu}")

    return epoch_accu

In [62]:
acc = valid(model, testloader)
print("Accuracy on test data = %0.2f%%" % acc)

Validation Loss per 100 steps: 1.3543847799301147
Validation Accuracy per 100 steps: 18.75
Validation Loss Epoch: 1.373496183014791
Validation Accuracy Epoch: 29.010043041606888
Accuracy on test data = 29.01%


In [47]:
# Save the model
torch.save(model.state_dict(), "Bert-Model-Weights.pt")
torch.save(model, "Bert-Model-Full.pt")

print("Models Saved")


FileNotFoundError: ignored