# Fine Tuning Transformer for MultiLabel Text Classification

###Installation

In [None]:
!pip install -q transformers

In [None]:
import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import ElectraTokenizer, ElectraModel, ElectraConfig
from torch.utils.tensorboard import SummaryWriter

In [None]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
from tqdm import tqdm

<a id='section02'></a>
### Importing and Preprocessing Data



In [None]:
# this mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# enter the foldername in your Drive where you have saved the unzipped
# workshop folder, e.g. 'acmlab/workshops/week3'
FOLDERNAME = 'Dataset'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

%cd /content/drive/My\ Drive/$FOLDERNAME/

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1SGfUxG0kyYK011ICRcr-oyA-4S-p6H01/Dataset


In [None]:
df = pd.read_csv('train.csv')
new_df = df[['text', 'is_humor']].copy()
new_df.head()

Unnamed: 0,text,is_humor
0,TENNESSEE: We're the best state. Nobody even c...,1
1,A man inserted an advertisement in the classif...,1
2,How many men does it take to open a can of bee...,1
3,Told my mom I hit 1200 Twitter followers. She ...,1
4,Roses are dead. Love is fake. Weddings are bas...,1


<a id='section03'></a>
### Dataset and Dataloader

In [None]:
# Sections of config

# Defining some key variables that will be used later on in the training
MAX_LEN = 200
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 4
EPOCHS = 1
LEARNING_RATE = 1e-05
tokenizer = ElectraTokenizer.from_pretrained("google/electra-large-discriminator")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




In [None]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.is_humor
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }

In [None]:
# Creating the dataset and dataloader for the neural network

train_size = 0.8
train_dataset=new_df.sample(frac=train_size,random_state=200)
test_dataset=new_df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)


print("FULL Dataset: {}".format(new_df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_dataset, tokenizer, MAX_LEN)

FULL Dataset: (8000, 2)
TRAIN Dataset: (6400, 2)
TEST Dataset: (1600, 2)


In [None]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

<a id='section04'></a>
### Fine-Tuning Neural Network

In [None]:
# Creating the customized model, by adding a drop out and a dense layer on top of ELECTRA to get the final output for the model. 

class ELECTRAClass(torch.nn.Module):
    def __init__(self):
        super(ELECTRAClass, self).__init__()
        self.l1 = transformers.ElectraModel.from_pretrained("google/electra-large-discriminator")
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(1024, 1)
    
    def forward(self, ids, mask, token_type_ids):
        output_1 = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids, return_dict=True)['last_hidden_state'].mean(dim=1)
        #print(output_1.shape)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

weight_seed = 200
torch.manual_seed(weight_seed)
model = ELECTRAClass()
model.to(device)

In [None]:
loss_fn = torch.nn.BCEWithLogitsLoss()

In [1]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

NameError: ignored

In [None]:
writer = SummaryWriter(log_dir='/runs/N')

In [None]:
def validation(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
           # print(targets, outputs)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [None]:
def perform_validation(epoch):
    outputs, targets = validation(epoch)
    outputs = np.array(outputs) >= 0.5
    targets = np.array(targets).reshape(-1, 1)
    accuracy = metrics.accuracy_score(targets, outputs)
    f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
    f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
    outputs_tensor = torch.FloatTensor(outputs)
    targets_tensor = torch.FloatTensor(targets)
    loss = loss_fn(outputs_tensor, targets_tensor)
    print()
    print(f"Val loss = {loss}")
    print(f"Val accuracy Score = {accuracy}")
    print(f"Val F1 Score (Micro) = {f1_score_micro}")
    print(f"Val F1 Score (Macro) = {f1_score_macro}")
    writer.add_scalar("Val loss", loss, epoch)
    writer.add_scalar("Val accuracy", accuracy, epoch)
    writer.add_scalar("Val F1 (micro)", f1_score_micro, epoch)
    writer.add_scalar("Val F2 (macro)", f1_score_macro, epoch)

In [None]:
newest_df = test_dataset.copy(deep=True)

In [None]:
def validation_and_save(epoch):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())

    fin_outputs = (np.array(fin_outputs) >= 0.5).astype(int)
    newest_df.insert(loc=epoch+2, column=f'predictions-{epoch+1}', value=fin_outputs)
    newest_df.to_csv(f'predictions-{weight_seed}-epoch{epoch+1}.csv')



In [None]:
def train(epoch):
    model.train()
    losses = []
    for _,data in tqdm(enumerate(training_loader, 0), total=len(training_loader)):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)
        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs.squeeze(), targets.squeeze())
        losses.append(loss.detach())
        if _ % 25 == 0:
            writer.add_scalar("Loss/train", losses[-1], float(_ % 5000) + float(epoch))
        if len(losses) == 50:
            print(f'Epoch: {epoch}, Loss:  {torch.mean(torch.stack(losses))}')
            perform_validation(epoch)
            print()
            losses.clear()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [None]:
EPOCHS = 3                      # changed from 5
for epoch in range(EPOCHS):
    train(epoch)
    perform_validation(epoch)

  0%|          | 0/400 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
 12%|█▏        | 49/400 [01:43<11:52,  2.03s/it]

Epoch: 0, Loss:  0.6145930886268616

Val loss = 0.5574231147766113
Val accuracy Score = 0.82875
Val F1 Score (Micro) = 0.82875
Val F1 Score (Macro) = 0.7961309523809523



 25%|██▍       | 99/400 [04:42<10:07,  2.02s/it]

Epoch: 0, Loss:  0.3226768374443054

Val loss = 0.49711886048316956
Val accuracy Score = 0.914375
Val F1 Score (Micro) = 0.914375
Val F1 Score (Macro) = 0.9084511836401596



 37%|███▋      | 149/400 [07:39<08:20,  1.99s/it]

Epoch: 0, Loss:  0.32085689902305603

Val loss = 0.4953031539916992
Val accuracy Score = 0.91125
Val F1 Score (Micro) = 0.91125
Val F1 Score (Macro) = 0.9064618708613907



 50%|████▉     | 199/400 [10:36<06:42,  2.00s/it]

Epoch: 0, Loss:  0.16446471214294434

Val loss = 0.4943850040435791
Val accuracy Score = 0.9275
Val F1 Score (Micro) = 0.9275
Val F1 Score (Macro) = 0.9210292072796094



 62%|██████▏   | 249/400 [13:34<05:01,  2.00s/it]

Epoch: 0, Loss:  0.18114927411079407

Val loss = 0.48425570130348206
Val accuracy Score = 0.93875
Val F1 Score (Micro) = 0.93875
Val F1 Score (Macro) = 0.9347100844605005



 75%|███████▍  | 299/400 [16:30<03:21,  2.00s/it]

Epoch: 0, Loss:  0.10156091302633286

Val loss = 0.4806418716907501
Val accuracy Score = 0.943125
Val F1 Score (Micro) = 0.943125
Val F1 Score (Macro) = 0.939744052932532



 87%|████████▋ | 349/400 [19:27<01:41,  1.99s/it]

Epoch: 0, Loss:  0.18713584542274475

Val loss = 0.4836447238922119
Val accuracy Score = 0.943125
Val F1 Score (Micro) = 0.943125
Val F1 Score (Macro) = 0.9389374188226245



100%|█████████▉| 399/400 [22:24<00:01,  1.99s/it]

Epoch: 0, Loss:  0.15710750222206116

Val loss = 0.48234230279922485
Val accuracy Score = 0.940625
Val F1 Score (Micro) = 0.940625
Val F1 Score (Macro) = 0.9369758787614931



100%|██████████| 400/400 [23:43<00:00,  3.56s/it]
  0%|          | 0/400 [00:00<?, ?it/s]


Val loss = 0.48111671209335327
Val accuracy Score = 0.941875
Val F1 Score (Micro) = 0.941875
Val F1 Score (Macro) = 0.9384582552170897


 12%|█▏        | 49/400 [01:40<12:00,  2.05s/it]

Epoch: 1, Loss:  0.08860156685113907

Val loss = 0.4836306869983673
Val accuracy Score = 0.94
Val F1 Score (Micro) = 0.94
Val F1 Score (Macro) = 0.9360425317164086



 25%|██▍       | 99/400 [04:37<09:59,  1.99s/it]

Epoch: 1, Loss:  0.06983064115047455

Val loss = 0.4793185293674469
Val accuracy Score = 0.949375
Val F1 Score (Micro) = 0.949375
Val F1 Score (Macro) = 0.9459457430395759



 37%|███▋      | 149/400 [07:33<08:20,  1.99s/it]

Epoch: 1, Loss:  0.061483535915613174

Val loss = 0.4786795675754547
Val accuracy Score = 0.9475
Val F1 Score (Micro) = 0.9475
Val F1 Score (Macro) = 0.944326524931278



 50%|████▉     | 199/400 [10:29<06:38,  1.98s/it]

Epoch: 1, Loss:  0.07690698653459549

Val loss = 0.4808932840824127
Val accuracy Score = 0.945625
Val F1 Score (Micro) = 0.945625
Val F1 Score (Macro) = 0.9420193027404626



 62%|██████▏   | 249/400 [13:26<05:00,  1.99s/it]

Epoch: 1, Loss:  0.09924348443746567

Val loss = 0.5001881718635559
Val accuracy Score = 0.9225
Val F1 Score (Micro) = 0.9225
Val F1 Score (Macro) = 0.9140379792685949



 75%|███████▍  | 299/400 [16:22<03:20,  1.99s/it]

Epoch: 1, Loss:  0.05529293790459633

Val loss = 0.47720611095428467
Val accuracy Score = 0.95375
Val F1 Score (Micro) = 0.95375
Val F1 Score (Macro) = 0.9506004712982064



 87%|████████▋ | 349/400 [19:18<01:41,  1.99s/it]

Epoch: 1, Loss:  0.055209722369909286

Val loss = 0.4917489290237427
Val accuracy Score = 0.935625
Val F1 Score (Micro) = 0.935625
Val F1 Score (Macro) = 0.9293453166191563



100%|█████████▉| 399/400 [22:15<00:01,  1.99s/it]

Epoch: 1, Loss:  0.04853200167417526

Val loss = 0.48488426208496094
Val accuracy Score = 0.945
Val F1 Score (Micro) = 0.945
Val F1 Score (Macro) = 0.9403211805555555



100%|██████████| 400/400 [23:33<00:00,  3.53s/it]
  0%|          | 0/400 [00:00<?, ?it/s]


Val loss = 0.48557212948799133
Val accuracy Score = 0.944375
Val F1 Score (Micro) = 0.944375
Val F1 Score (Macro) = 0.9395275419347193


 12%|█▏        | 49/400 [01:40<12:01,  2.06s/it]

Epoch: 2, Loss:  0.20326682925224304

Val loss = 0.5352197289466858
Val accuracy Score = 0.86625
Val F1 Score (Micro) = 0.86625
Val F1 Score (Macro) = 0.8448687999623046



 25%|██▍       | 99/400 [04:37<10:01,  2.00s/it]

Epoch: 2, Loss:  0.0912720113992691

Val loss = 0.47981780767440796
Val accuracy Score = 0.946875
Val F1 Score (Micro) = 0.946875
Val F1 Score (Macro) = 0.9434640961675723



 37%|███▋      | 149/400 [07:34<08:24,  2.01s/it]

Epoch: 2, Loss:  0.0233850609511137

Val loss = 0.48105743527412415
Val accuracy Score = 0.94875
Val F1 Score (Micro) = 0.94875
Val F1 Score (Macro) = 0.9449182926685912



 50%|████▉     | 199/400 [10:32<06:44,  2.01s/it]

Epoch: 2, Loss:  0.07334630936384201

Val loss = 0.4807187020778656
Val accuracy Score = 0.946875
Val F1 Score (Micro) = 0.946875
Val F1 Score (Macro) = 0.9432381389622178



 62%|██████▏   | 249/400 [13:30<05:02,  2.00s/it]

Epoch: 2, Loss:  0.03414641320705414

Val loss = 0.4804288148880005
Val accuracy Score = 0.9425
Val F1 Score (Micro) = 0.9425
Val F1 Score (Macro) = 0.9392141446042642



 75%|███████▍  | 299/400 [16:27<03:22,  2.01s/it]

Epoch: 2, Loss:  0.03305930644273758

Val loss = 0.47884368896484375
Val accuracy Score = 0.950625
Val F1 Score (Micro) = 0.950625
Val F1 Score (Macro) = 0.9472448585648849



 87%|████████▋ | 349/400 [19:26<01:42,  2.02s/it]

Epoch: 2, Loss:  0.03075265884399414

Val loss = 0.4769163131713867
Val accuracy Score = 0.949375
Val F1 Score (Micro) = 0.949375
Val F1 Score (Macro) = 0.9464984339645774



100%|█████████▉| 399/400 [22:25<00:02,  2.02s/it]

Epoch: 2, Loss:  0.03763146325945854

Val loss = 0.47963282465934753
Val accuracy Score = 0.9525
Val F1 Score (Micro) = 0.9525
Val F1 Score (Macro) = 0.9488391192250474



100%|██████████| 400/400 [23:45<00:00,  3.56s/it]



Val loss = 0.4779323935508728
Val accuracy Score = 0.955
Val F1 Score (Micro) = 0.955
Val F1 Score (Macro) = 0.9516355740504703


In [None]:
torch.save(model.state_dict(), "300-jan-30-humor-classification.pt")