In [1]:
import os
import pandas as pd
import numpy as np
# torch
import torch
from torch import Generator
# local
from uda_data import *
from uda_model import *
from uda_optimizer import *
from uda_trainer import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEVICE = torch.device('mps')
PATH = '/Users/lunli/Library/CloudStorage/GoogleDrive-yaojn19880525@gmail.com/My Drive/Colab Notebooks/'
DATASET_NAME = 'uda_imdb_data_128'

In [3]:
### load supervised and unsupervised data
# sup
train_sup_data = torch.load(os.path.join(PATH, f'data/{DATASET_NAME}/train_sup_data.pt'))
# unsup
train_unsup_data = torch.load(os.path.join(PATH, f'data/{DATASET_NAME}/train_unsup_data.pt'))
train_unsup_data = TensorDataset(
torch.cat([train_unsup_data.tensors[0][:20000], train_unsup_data.tensors[0][-20000:]]),
torch.cat([train_unsup_data.tensors[1][:20000], train_unsup_data.tensors[1][-20000:]]),
torch.cat([train_unsup_data.tensors[2][:20000], train_unsup_data.tensors[2][-20000:]]),
torch.cat([train_unsup_data.tensors[3][:20000], train_unsup_data.tensors[3][-20000:]]))

In [4]:
### compile data loader
# valid
valid_data = torch.load(os.path.join(PATH, f'data/{DATASET_NAME}/val_data.pt'))
# test
test_data = torch.load(os.path.join(PATH, f'data/{DATASET_NAME}/test_data.pt'))
# to dataloader
generator = Generator().manual_seed(42)
train_sup_dataloader = DataLoader(train_sup_data, sampler=RandomSampler(train_sup_data, generator=generator), batch_size=8)
train_unsup_dataloader = DataLoader(train_unsup_data, sampler=RandomSampler(train_unsup_data, generator=generator), batch_size=24)
valid_dataloader = DataLoader(valid_data, sampler=RandomSampler(valid_data, generator=generator), batch_size=16)
# organize the container
datamodeler = {
    DataLoaderType.TRAINING: train_sup_dataloader,
    DataLoaderType.VALIDATION: valid_dataloader,
    DataLoaderType.TRAINING_UNLABELED: train_unsup_dataloader
}

In [5]:
### set up neccessities for training
# load model and tokenizer
tokenizer, model = load_bert_model("bert-base-uncased", num_labels=2, device=DEVICE)
# load loss function for sup/unsup
loss_sup = get_loss_functions(LossFuncType.CROSS_ENTROPY, reduce='none')
loss_unsup = get_loss_functions(LossFuncType.KL_DIV, reduce='none')
loss_dict = {'sup':loss_sup, 'unsup':loss_unsup}
    
# optimizer set up
USE_LORA = False
if not USE_LORA:
    optimizer = AdamNLP.newNLPAdam(model, {'embeddings':True, 'encoder': 9}, lr = 2e-5)
    model = optimizer.get_model_transformed()
else:
    lora_config = LoraConfig(task_type=TaskType.SEQ_CLS, target_modules=["query", "key", "value"], r=1, lora_alpha=1, lora_dropout=0.1)
    optimizer = AdamNLP.newNLPAdam_LORA(model, lora_config)
    model = optimizer.get_model_transformed()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
### Start Training
trainer = TrainerUDA(model, datamodeler, loss_dict, optimizer, report_freq=2, device=DEVICE)
trainer.train(2, schedule_type = SchedulerType.INVERSE_SQRT, save_model_freq=-1)

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

At step 2, the training (sup)loss is 0.7823353409767151, the training unsup-loss is 0.010606745723634958.
At step 4, the training (sup)loss is 0.7850031703710556, the training unsup-loss is 0.011880077654495835.
At step 6, the training (sup)loss is 0.7866012851397196, the training unsup-loss is 0.012018722171584765.
At step 8, the training (sup)loss is 0.7721334546804428, the training unsup-loss is 0.01082521805074066.
At step 10, the training (sup)loss is 0.7727465212345124, the training unsup-loss is 0.010313795367255806.
At step 12, the training (sup)loss is 0.7720537384351095, the training unsup-loss is 0.009726998900684217.
At step 14, the training (sup)loss is 0.7719060012272426, the training unsup-loss is 0.009816831277151192.
At step 16, the training (sup)loss is 0.7706480100750923, the training unsup-loss is 0.010173917369684204.
At step 18, the training (sup)loss is 0.7268674870332082, the training unsup-loss is 0.009802090288657282.
At step 20, the training (sup)loss is 0.73

Epoch:  50%|█████     | 1/2 [16:08<16:08, 968.79s/it]

Validation accuracy is: 0.8119999766349792.

At step 2, the training (sup)loss is 0.0, the training unsup-loss is 0.037479495629668236.
At step 4, the training (sup)loss is 0.0, the training unsup-loss is 0.048472246155142784.
At step 6, the training (sup)loss is 0.0, the training unsup-loss is 0.04943556276460489.
At step 8, the training (sup)loss is 0.0, the training unsup-loss is 0.04904877534136176.
At step 10, the training (sup)loss is 0.0, the training unsup-loss is 0.05183429643511772.
At step 12, the training (sup)loss is 0.0, the training unsup-loss is 0.05256942007690668.
At step 14, the training (sup)loss is 0.0, the training unsup-loss is 0.052746785538537164.
At step 16, the training (sup)loss is 0.0, the training unsup-loss is 0.05023617227561772.
At step 18, the training (sup)loss is 0.0, the training unsup-loss is 0.05149162126084169.
At step 20, the training (sup)loss is 0.0, the training unsup-loss is 0.050935244746506216.
At step 22, the training (sup)loss is 0.0, th

Epoch: 100%|██████████| 2/2 [32:24<00:00, 972.23s/it]

Validation accuracy is: 0.840399980545044.






In [7]:
# assemble test dataloader
test_data = torch.load(os.path.join(PATH, f'data/{DATASET_NAME}/test_data.pt'))
test_data = TensorDataset(
    test_data.tensors[0],
    test_data.tensors[1],
    test_data.tensors[2]
)
test_dataloder = DataLoader(test_data, batch_size=16)

In [8]:
test_evaluator = Evaluator(loss_sup, test_dataloder, device=DEVICE)

In [9]:
test_evaluator.run(model)

Validation accuracy is: 0.8436499834060669.



In [None]:
for each in model.named_parameters():
    if each[1].requires_grad:
        print(f'{each[0]} : {each[1].requires_grad}')

In [10]:
from sentence_transformers import SentenceTransformer

In [16]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2').to(torch.device('mps'))



In [19]:
a = model.encode(['asdf', 'asdf'], convert_to_tensor=True)

In [27]:
a.mean(axis=0).cpu().numpy()

array([-4.76334281e-02, -4.01788950e-02, -7.13511631e-02,  4.50336933e-02,
        1.61476061e-02, -4.87910211e-02,  8.94101784e-02,  3.41040939e-02,
       -2.44780928e-02, -5.21589257e-02,  3.36785540e-02, -6.02451935e-02,
       -2.48572119e-02,  4.25534463e-03, -2.88347546e-02, -5.18903993e-02,
        1.29055418e-02, -1.11941166e-01, -7.36576989e-02,  2.17653196e-02,
       -5.77138327e-02,  3.37865651e-02,  6.34062812e-02, -4.78094025e-03,
        2.23415568e-02, -2.57172510e-02, -5.82065880e-02, -3.65310088e-02,
        1.24478126e-02, -1.53364465e-01,  7.69489110e-02,  5.37916534e-02,
        6.88013136e-02,  3.24284062e-02, -2.96538621e-02,  2.27979664e-02,
       -3.80658992e-02,  9.05014388e-03,  1.19353570e-02,  4.71523553e-02,
       -5.87766767e-02, -9.66688469e-02,  8.36317707e-03, -2.60597691e-02,
       -1.64373312e-02, -1.97897255e-02, -3.57994549e-02, -3.85996960e-02,
        1.14329785e-01,  5.42615354e-02, -1.44849066e-02,  6.67404532e-02,
       -7.33617917e-02, -