# Text classification

Things to do:
do you want to use collate_fn in data_loader

### imports and globals

In [1]:
T5_SMALL = "t5-small"
GPT = "gpt2"  # 117M parameters as per https://huggingface.co/transformers/v3.3.1/pretrained_models.html # "openai-gpt"
DISTILBERT = "distilbert-base-uncased"

In [2]:
# opt
from collections import defaultdict

# mandatory imports
from pathlib import Path
from datasets import load_dataset

from transformers import AutoTokenizer
from transformers import T5Tokenizer, T5ForConditionalGeneration
from torch.utils.data import Dataset, DataLoader

## load dataset

## Preprocess
- load DistilBERT Tokenizer to process the text field
Can we load T5-small?
- Create a preprocessinging function to tokenize text and trucnate sequences to be no longer than DistilBERT's or T5's max input length


In [3]:
tokenizer = AutoTokenizer.from_pretrained(GPT)

tokenizer config

In [4]:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'  # 'left'

In [23]:
# Use `DataCollatorWithPadding` as it is more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding to max length
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

#### load dataset

In [6]:
#raw_dataset = load_dataset('super_glue', 'cb', cache_dir="./datasets/.cache/huggingface_datasets")
raw_dataset = load_dataset('ag_news', cache_dir="./datasets/.cache/huggingface_datasets")

Found cached dataset ag_news (/home/jovyan/llm_peft_exploration/datasets/.cache/huggingface_datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548)


  0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
{k: len(raw_dataset[k]) for k in raw_dataset}

{'train': 120000, 'test': 7600}

In [8]:
raw_dataset['test'][0] 

{'text': "Fears for T N pension after talks Unions representing workers at Turner   Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul.",
 'label': 2}

In [9]:
id2label = {
    0: "World",
    1: "Sports",
    2: "Business",
    3: "Sci/Tech"
}

label2id = {v:k for k,v in id2label.items()}

In [40]:
def preprocess_function(examples):
    return tokenizer(examples["text"], return_tensors='pt',  padding="max_length", truncation=True, max_length=128, )
labels = label2id.values()

import numpy as np
def preprocess_data_old(examples):
    #(copied from https://colab.research.google.com/drive/1U7SX7jNYsNQG5BY1xEQQHu48Pn6Vgnyt?usp=sharing#scrollTo=AFWlSsbZaRLc)
  # take a batch of texts
    text = examples["text"]  # this is batch_size
      # encode them
    encoding = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors='pt')
    # add labels
    labels_batch = {k: examples[k] for k in examples['label']}
    # create numpy array of shape (batch_size, num_labels)
    labels_matrix = np.zeros((len(text), len(labels)))
    # fill numpy array
    for idx, label in enumerate(labels):
        labels_matrix[:, idx] = labels_batch[label]
    encoding["label"] = labels_matrix.tolist()
  
    return encoding
#tokenized_imdb = imdb.map(preprocess_function, batched=True)

In [115]:
def preprocess_data(examples):
  # take a batch of texts
    text = examples["text"]  # this is batch_size
      # encode them
    encoding = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors='pt')
    # add labels
    labels_batch = torch.tensor(examples['label'])
    #torch.transpose(labels_batch, 0, 1)
    # create numpy array of shape (batch_size, num_labels)
    labels_matrix = torch.nn.functional.one_hot(labels_batch)
    labels_matrix = labels_matrix.float()
    #print(labels_matrix)
    encoding["label"] = labels_matrix.tolist()
  
    return encoding
#to

In [77]:
target = torch.randint(0, 10, (8,))
one_hot = torch.nn.functional.one_hot(target)
target.shape

torch.Size([8])

#### encode dataset

In [118]:
# tokenize 
encoded_dataset = raw_dataset.map(preprocess_data, batched=True)

Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

In [119]:
# we need to create tratin/valid sets
train_dataset, validation_dataset= encoded_dataset['train'].train_test_split(test_size=0.1).values()


In [120]:
train_dataset[0]['label']

[0.0, 0.0, 1.0, 0.0]

In [116]:
a, b= raw_dataset['test'].train_test_split(test_size=0.001).values()


In [117]:
encoded_dataset = b.map(preprocess_data, batched=True)
encoded_dataset['label']

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

[[0.0, 0.0, 0.0, 1.0],
 [0.0, 0.0, 0.0, 1.0],
 [0.0, 0.0, 0.0, 1.0],
 [0.0, 0.0, 0.0, 1.0],
 [0.0, 1.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0],
 [0.0, 0.0, 0.0, 1.0]]

#### just label distr utils

In [9]:
def check_label_distr(data_split):
    label_distr = defaultdict(list)
    for idx, item in enumerate(raw_dataset[data_split]):
        label = item['label']
        label_distr[item['label']].append(idx)
    label_distr_lens = {label: len(label_distr[label]) for label in label_distr.keys()}
    print(f"label distribution in {data_split} is:\n{label_distr_lens}")
    return label_distr

In [16]:
DATA_SPLIT = 'train'
train_label_distr = check_label_distr(data_split=DATA_SPLIT)

label distribution in train is:
{2: 30000, 3: 30000, 1: 30000, 0: 30000}


In [17]:
DATA_SPLIT = 'test'
CLASS_LABEL = 2
test_label_distr = check_label_distr(data_split=DATA_SPLIT)
raw_dataset[DATA_SPLIT][test_label_distr[CLASS_LABEL][0]]

label distribution in test is:
{2: 1900, 3: 1900, 1: 1900, 0: 1900}


{'text': "Fears for T N pension after talks Unions representing workers at Turner   Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul.",
 'label': 2}

### carve out a valid split
different ways to create valid-test splits from train:
1. random sampling: simply use random_split on torch.utils.data.Dataset object. It returns a torch.utils.data.Subset (subset of indices from parent dataset)
2. use test_train_split from sklearn.model_selection()
3. can also use weightedRandomSampler() https://towardsdatascience.com/demystifying-pytorchs-weightedrandomsampler-by-example-a68aceccb452

In [16]:
def describe_label_distr_from_data_loader(dl):
    label_list = []
    for ids, item in enumerate(dl):
        batch_labels = item['label'].tolist()
        label_list.extend(batch_labels)
    print(f'distribution of labels: {collections.Counter(label_list)}')
  

1. random sampling: simply use random_split on torch.utils.data.Dataset object

In [20]:
from torch.utils.data.sampler import Sampler
from torch.utils.data import random_split
import collections
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split

VALID_SIZE = 0.1
BATCH_SIZE = 8
SEED = 42


In [96]:
labels = [instance['label'] for instance in encoded_dataset['train']]
collections.Counter(labels)

Counter({2: 30000, 3: 30000, 1: 30000, 0: 30000})

In [19]:
encoded_dataset.set_format("torch")
#train, valid = random_split(encoded_dataset['train'], lengths=[0.9,0.1])


In [100]:
valid_loader = DataLoader(valid, batch_size=BATCH_SIZE, shuffle=True)#, collate_fn=data_collator)
train_loader = DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)#, collate_fn=data_collator)
  
#describe_label_distr_from_data_loader(valid_loader)
#describe_label_distr_from_data_loader(train_loader)

#### discarded
2. use test_train_split from sklearn.model_selection()

In [28]:

# generate indices: instead of the actual data we pass in integers instead
train_indices, valid_indices, _, _ = train_test_split(
    range(len(raw_dataset['train'])),
    labels,
    stratify=labels,
    test_size=VALID_SIZE,
    random_state=SEED
)

# generate subset based on indices
train_split = Subset(encoded_dataset['train'], train_indices)
valid_split = Subset(encoded_dataset['train'], valid_indices)

# create batches
train_loader = DataLoader(train_split, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_split, batch_size=BATCH_SIZE)

3. simply from splitting the train dataset using pytorch internal wrapper on 
train_dataset, validation_dataset= encoded_dataset['train'].train_test_split(test_size=0.1).values()


In [28]:
describe_label_distr_from_data_loader(DataLoader(train_dataset))
describe_label_distr_from_data_loader(DataLoader(validation_dataset))

distribution of labels: Counter({0: 27020, 1: 27010, 2: 26987, 3: 26983})
distribution of labels: Counter({3: 3017, 2: 3013, 1: 2990, 0: 2980})


### tokenise, training loop

In [39]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(GPT, 
                                                           problem_type="multi_label_classification", 
                                                           num_labels=len(label2id),
                                                           id2label=id2label,
                                                           label2id=label2id)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [96]:
model.config.pad_token_id = model.config.eos_token_id


In [97]:
MODEL_DIR = Path(f"data/models_20230602")
MODEL_DIR.exists()

True

In [98]:
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
import torch
    
# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, 
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds, 
        labels=p.label_ids)
    return result

In [124]:
from transformers import TrainingArguments, Trainer
METRIC_NAME = "f1"
N_EPOCHS = 10

In [99]:
args = TrainingArguments(
    output_dir=MODEL_DIR,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=N_EPOCHS,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=METRIC_NAME,
    logging_dir='./logs',            # directory for storing logs*
    logging_steps=20,
    report_to='tensorboard'
)
    

In [122]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator
)

In [125]:
example = train_dataset[0]
example['label']

[0.0, 0.0, 1.0, 0.0]

In [103]:
model.device

device(type='cuda', index=0)

In [107]:
#outputs = 
model(input_ids=torch.tensor(train_dataset[0]['input_ids']).to('cuda:0'), labels=torch.tensor(train_dataset[0]['label']).to('cuda:0'))


ValueError: not enough values to unpack (expected 2, got 1)

In [123]:
trainer.train()



Epoch,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
1,0.098,0.101883,0.937534,0.957931,0.933667


TrainOutput(global_step=13500, training_loss=0.11835006403040003, metrics={'train_runtime': 1357.4827, 'train_samples_per_second': 79.559, 'train_steps_per_second': 9.945, 'total_flos': 7055139667968000.0, 'train_loss': 0.11835006403040003, 'epoch': 1.0})

In [126]:
trainer.evaluate()

{'eval_loss': 0.10188324004411697,
 'eval_f1': 0.9375339036094303,
 'eval_roc_auc': 0.9579305555555556,
 'eval_accuracy': 0.9336666666666666,
 'eval_runtime': 40.4015,
 'eval_samples_per_second': 297.018,
 'eval_steps_per_second': 37.127,
 'epoch': 1.0}

In [127]:
trainer.eval_dataset=encoded_dataset['test']
trainer.evaluate()

{'eval_loss': 0.09730511903762817,
 'eval_f1': 0.9404244101752999,
 'eval_roc_auc': 0.959780701754386,
 'eval_accuracy': 0.9364473684210526,
 'eval_runtime': 28.4259,
 'eval_samples_per_second': 267.362,
 'eval_steps_per_second': 33.42,
 'epoch': 1.0}

### train

In [50]:
from tqdm import tqdm
train_losses = []
test_losses = []
use_cuda = True
for epoch in range(20):
    tot_loss = 0
    for step, inputs in enumerate(tqdm(train_dataloader)):
        if use_cuda:
            inputs = inputs.cuda()
        logits = prompt_model(inputs)
        labels = inputs['label']
        loss = loss_func(logits, labels)
        loss.backward()
        tot_loss += loss.item()
        optimizer.step()
        optimizer.zero_grad()
        ep_train_loss = tot_loss/(step+1)
        if step %2000 ==1:
            train_losses.append(ep_train_loss)
            print("Epoch {}, average loss: {}".format(epoch, ep_train_loss), flush=True)
    allpreds = []
    alllabels = []
    for batch, inputs in enumerate(test_dataloader):
        if use_cuda:
            inputs = inputs.cuda()
        logits = prompt_model(inputs)
        labels = inputs['label']
        alllabels.extend(labels.cpu().tolist())
        allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist())
    acc = sum([int(i==j) for i,j in zip(allpreds, alllabels)])/len(allpreds)
    print(f'Epoch {epoch}: test accuracy: {acc}')



  0%|          | 1/7500 [00:00<38:03,  3.28it/s]

Epoch 0, average loss: 0.07447695569135249


 27%|██▋       | 2001/7500 [09:54<27:37,  3.32it/s]

Epoch 0, average loss: 0.09317532743272194


 53%|█████▎    | 4001/7500 [19:49<17:07,  3.41it/s]

Epoch 0, average loss: 0.09629840620888583


 80%|████████  | 6001/7500 [29:43<07:14,  3.45it/s]

Epoch 0, average loss: 0.09757871261904678


100%|██████████| 7500/7500 [37:07<00:00,  3.37it/s]


Epoch 0: test accuracy: 0.9465789473684211


  0%|          | 1/7500 [00:00<37:45,  3.31it/s]

Epoch 1, average loss: 0.031372164376080036


 27%|██▋       | 2001/7500 [09:53<27:28,  3.34it/s]

Epoch 1, average loss: 0.03638204309497708


 53%|█████▎    | 4001/7500 [19:48<17:10,  3.40it/s]

Epoch 1, average loss: 0.041061164024217466


 80%|████████  | 6001/7500 [29:40<07:23,  3.38it/s]

Epoch 1, average loss: 0.042727474881904176


100%|██████████| 7500/7500 [37:05<00:00,  3.37it/s]


Epoch 1: test accuracy: 0.945


  0%|          | 1/7500 [00:00<37:40,  3.32it/s]

Epoch 2, average loss: 0.008110608905553818


 27%|██▋       | 2001/7500 [09:53<26:58,  3.40it/s]

Epoch 2, average loss: 0.019265413387238885


 53%|█████▎    | 4001/7500 [19:46<17:05,  3.41it/s]

Epoch 2, average loss: 0.023106005005659007


 80%|████████  | 6001/7500 [29:39<07:35,  3.29it/s]

Epoch 2, average loss: 0.025498516863532664


100%|██████████| 7500/7500 [37:04<00:00,  3.37it/s]


Epoch 2: test accuracy: 0.9465789473684211


  0%|          | 1/7500 [00:00<37:47,  3.31it/s]

Epoch 3, average loss: 0.0018224656232632697


 27%|██▋       | 2001/7500 [09:53<27:27,  3.34it/s]

Epoch 3, average loss: 0.01433275319054617


 53%|█████▎    | 4001/7500 [19:47<17:25,  3.35it/s]

Epoch 3, average loss: 0.017226989501110814


 80%|████████  | 6001/7500 [29:41<07:29,  3.34it/s]

Epoch 3, average loss: 0.01908128715847471


100%|██████████| 7500/7500 [37:06<00:00,  3.37it/s]


Epoch 3: test accuracy: 0.9464473684210526


  0%|          | 1/7500 [00:00<42:51,  2.92it/s]

Epoch 4, average loss: 0.0010925912647508085


 27%|██▋       | 2001/7500 [09:53<26:43,  3.43it/s]

Epoch 4, average loss: 0.012275348992028971


 53%|█████▎    | 4001/7500 [19:47<17:08,  3.40it/s]

Epoch 4, average loss: 0.01310150157845624


 80%|████████  | 6001/7500 [29:39<07:19,  3.41it/s]

Epoch 4, average loss: 0.015475986897156149


100%|██████████| 7500/7500 [37:04<00:00,  3.37it/s]


Epoch 4: test accuracy: 0.9475


  0%|          | 1/7500 [00:00<38:04,  3.28it/s]

Epoch 5, average loss: 0.00480890175094828


 27%|██▋       | 2001/7500 [09:54<27:04,  3.38it/s]

Epoch 5, average loss: 0.011649178321212029


 53%|█████▎    | 4001/7500 [19:47<17:21,  3.36it/s]

Epoch 5, average loss: 0.01214325343143492


 80%|████████  | 6001/7500 [29:39<07:23,  3.38it/s]

Epoch 5, average loss: 0.013850588207033502


100%|██████████| 7500/7500 [37:04<00:00,  3.37it/s]


Epoch 5: test accuracy: 0.9472368421052632


  0%|          | 1/7500 [00:00<37:30,  3.33it/s]

Epoch 6, average loss: 0.0001773704971128609


 27%|██▋       | 2001/7500 [09:53<26:55,  3.40it/s]

Epoch 6, average loss: 0.010855463469311162


 53%|█████▎    | 4001/7500 [19:47<17:16,  3.38it/s]

Epoch 6, average loss: 0.012064546242813825


 80%|████████  | 6001/7500 [29:42<07:37,  3.27it/s]

Epoch 6, average loss: 0.01250786911575898


100%|██████████| 7500/7500 [37:07<00:00,  3.37it/s]


Epoch 6: test accuracy: 0.9478947368421052


  0%|          | 1/7500 [00:00<37:23,  3.34it/s]

Epoch 7, average loss: 0.0009870353387668729


 27%|██▋       | 2001/7500 [09:54<27:15,  3.36it/s]

Epoch 7, average loss: 0.007647645044765501


 53%|█████▎    | 4001/7500 [19:51<17:45,  3.28it/s]

Epoch 7, average loss: 0.009735594871383875


 80%|████████  | 6001/7500 [29:48<07:24,  3.37it/s]

Epoch 7, average loss: 0.01019961646487118


100%|██████████| 7500/7500 [37:14<00:00,  3.36it/s]


Epoch 7: test accuracy: 0.9456578947368421


  0%|          | 1/7500 [00:00<38:35,  3.24it/s]

Epoch 8, average loss: 0.0006434436654672027


 27%|██▋       | 2001/7500 [09:55<26:44,  3.43it/s]

Epoch 8, average loss: 0.010980361509342151


 53%|█████▎    | 4001/7500 [19:49<16:58,  3.43it/s]

Epoch 8, average loss: 0.010838185029570315


 80%|████████  | 6001/7500 [29:44<07:25,  3.36it/s]

Epoch 8, average loss: 0.010895906623017877


100%|██████████| 7500/7500 [37:10<00:00,  3.36it/s]


Epoch 8: test accuracy: 0.9447368421052632


  0%|          | 1/7500 [00:00<37:47,  3.31it/s]

Epoch 9, average loss: 0.010528070370128262


 27%|██▋       | 2001/7500 [09:54<27:06,  3.38it/s]

Epoch 9, average loss: 0.009318949781246576


 53%|█████▎    | 4001/7500 [19:48<17:20,  3.36it/s]

Epoch 9, average loss: 0.010102476102322058


 80%|████████  | 6001/7500 [29:43<07:30,  3.33it/s]

Epoch 9, average loss: 0.011255457109534


100%|██████████| 7500/7500 [37:07<00:00,  3.37it/s]


Epoch 9: test accuracy: 0.9467105263157894


  0%|          | 1/7500 [00:00<37:16,  3.35it/s]

Epoch 10, average loss: 8.784360215940978e-05


 27%|██▋       | 2001/7500 [09:54<26:58,  3.40it/s]

Epoch 10, average loss: 0.006546116735990147


 53%|█████▎    | 4001/7500 [19:48<18:20,  3.18it/s]

Epoch 10, average loss: 0.008889513546248137


 80%|████████  | 6001/7500 [29:40<07:17,  3.43it/s]

Epoch 10, average loss: 0.009827858608230659


100%|██████████| 7500/7500 [37:04<00:00,  3.37it/s]


Epoch 10: test accuracy: 0.9469736842105263


  0%|          | 1/7500 [00:00<37:23,  3.34it/s]

Epoch 11, average loss: 0.0011608727218117565


 27%|██▋       | 2001/7500 [09:53<26:50,  3.41it/s]

Epoch 11, average loss: 0.006660266698941149


 53%|█████▎    | 4001/7500 [19:47<16:56,  3.44it/s]

Epoch 11, average loss: 0.008020238391335437


 71%|███████   | 5336/7500 [26:22<10:28,  3.44it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 53%|█████▎    | 4001/7500 [19:48<17:10,  3.39it/s]

Epoch 12, average loss: 0.007884605233659463


 67%|██████▋   | 5012/7500 [24:48<12:08,  3.42it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 27%|██▋       | 2001/7500 [09:55<26:57,  3.40it/s]

Epoch 13, average loss: 0.008033443022561813


 53%|█████▎    | 4001/7500 [19:50<17:30,  3.33it/s]

Epoch 13, average loss: 0.008259694514089342


 80%|████████  | 6001/7500 [29:45<07:27,  3.35it/s]

Epoch 13, average loss: 0.008924862471027702


100%|██████████| 7500/7500 [37:11<00:00,  3.36it/s]


Epoch 13: test accuracy: 0.9478947368421052


  0%|          | 1/7500 [00:00<37:28,  3.34it/s]

Epoch 14, average loss: 0.00048300328489858657


 27%|██▋       | 2001/7500 [09:53<27:00,  3.39it/s]

Epoch 14, average loss: 0.005861840679875735


 53%|█████▎    | 4001/7500 [19:49<17:17,  3.37it/s]

Epoch 14, average loss: 0.006774755869287761


 80%|████████  | 6001/7500 [29:41<07:39,  3.26it/s]

Epoch 14, average loss: 0.0071073428124988425


100%|██████████| 7500/7500 [37:09<00:00,  3.36it/s]


Epoch 14: test accuracy: 0.9486842105263158


  0%|          | 1/7500 [00:00<38:16,  3.27it/s]

Epoch 15, average loss: 0.01741854052670533


 27%|██▋       | 2001/7500 [09:52<26:55,  3.40it/s]

Epoch 15, average loss: 0.006390110117993448


 53%|█████▎    | 4001/7500 [19:46<16:59,  3.43it/s]

Epoch 15, average loss: 0.0067789925776595445


 80%|████████  | 6001/7500 [29:39<07:21,  3.39it/s]

Epoch 15, average loss: 0.007404725531324859


100%|██████████| 7500/7500 [37:04<00:00,  3.37it/s]


Epoch 15: test accuracy: 0.9481578947368421


  0%|          | 1/7500 [00:00<37:41,  3.32it/s]

Epoch 16, average loss: 0.0008547779070795514


 27%|██▋       | 2001/7500 [09:53<27:28,  3.34it/s]

Epoch 16, average loss: 0.005855757648435902


 53%|█████▎    | 4001/7500 [19:47<17:38,  3.30it/s]

Epoch 16, average loss: 0.007172257324654123


 80%|████████  | 6001/7500 [29:39<07:21,  3.39it/s]

Epoch 16, average loss: 0.007530230293921482


100%|██████████| 7500/7500 [37:01<00:00,  3.38it/s]


Epoch 16: test accuracy: 0.9475


  0%|          | 1/7500 [00:00<41:02,  3.05it/s]

Epoch 17, average loss: 4.2787789425347e-05


 27%|██▋       | 2001/7500 [09:51<27:10,  3.37it/s]

Epoch 17, average loss: 0.004507046333325917


 53%|█████▎    | 4001/7500 [19:43<17:23,  3.35it/s]

Epoch 17, average loss: 0.0058147416288691365


 80%|████████  | 6001/7500 [29:34<07:18,  3.42it/s]

Epoch 17, average loss: 0.006744115019564166


100%|██████████| 7500/7500 [36:59<00:00,  3.38it/s]


Epoch 17: test accuracy: 0.9477631578947369


  0%|          | 1/7500 [00:00<37:34,  3.33it/s]

Epoch 18, average loss: 5.784257382401847e-05


 27%|██▋       | 2001/7500 [09:52<26:44,  3.43it/s]

Epoch 18, average loss: 0.004421349419179631


 53%|█████▎    | 4001/7500 [19:43<17:12,  3.39it/s]

Epoch 18, average loss: 0.004974599751183542


 80%|████████  | 6001/7500 [29:34<07:44,  3.23it/s]

Epoch 18, average loss: 0.005956028616079698


100%|██████████| 7500/7500 [36:58<00:00,  3.38it/s]


Epoch 18: test accuracy: 0.9457894736842105


  0%|          | 1/7500 [00:00<40:03,  3.12it/s]

Epoch 19, average loss: 0.002635062555782497


 27%|██▋       | 2001/7500 [09:51<26:46,  3.42it/s]

Epoch 19, average loss: 0.004935971168553347


 53%|█████▎    | 4001/7500 [19:44<17:18,  3.37it/s]

Epoch 19, average loss: 0.004854086502257933


 80%|████████  | 6001/7500 [29:35<07:29,  3.34it/s]

Epoch 19, average loss: 0.006098550923006531


100%|██████████| 7500/7500 [36:58<00:00,  3.38it/s]


Epoch 19: test accuracy: 0.9463157894736842


In [51]:
import torch

torch.save(prompt_model, 'data/models/tensor.pt')

test how the accuracy improves with batches of training data

In [52]:
import json
with open('test_losses.json', 'w+') as f:
    json.dump(test_losses, f)

In [None]:
import json
with open('train_losses.json', 'w+') as f:
    json.dump(test_losses, f)

In [25]:
allpreds = []
alllabels = []
for batch, inputs in enumerate(validation_dataloader):
    if use_cuda:
        inputs = inputs.cuda()
    logits = prompt_model(inputs)
    labels = inputs['label']
    alllabels.extend(labels.cpu().tolist())
    allpreds.extend(torch.argmax(logits, dim=-1).cpu().tolist())
acc = sum([int(i==j) for i,j in zip(allpreds, alllabels)])/len(allpreds)
acc    


0.9107142857142857

In [None]:
allpreds

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_true=alllabels, y_pred=allpreds)

##### push model to hf hub

(https://colab.research.google.com/drive/1U7SX7jNYsNQG5BY1xEQQHu48Pn6Vgnyt?usp=sharing#scrollTo=H5j5YJE2hK58)

In [17]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
trainer.push_to_hub("your-username/model-name")