# Run Joint Learning Benchmark

In [1]:
%cd ..

/home/adam/Projects/hybrid-transformer


In [2]:
import os
import torch
import wandb

from hybrid_transformer.configs.task import TaskConfig
from hybrid_transformer.configs.model import ModelConfig
from hybrid_transformer.configs.trainer import TrainerConfig
from hybrid_transformer.configs.logger import LoggerConfig

from hybrid_transformer.utils.datasets.auto import AutoDataset
from hybrid_transformer.utils.tokenizers.auto import AutoTokenizer
from hybrid_transformer.models.auto import AutoModel
from hybrid_transformer.utils.loggers.wandb import WandbLogger

from hybrid_transformer.trainers.trainer import Trainer

from scripts.pretrain.train import DEFAULT_CONFIG_FILES

from hybrid_transformer.utils.objectives.guacamol.objective import GUACAMOL_TASKS
from hybrid_transformer.models.prediction import PREDICTION_MODEL_CONFIGS
%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm
2024-03-31 20:23:43.635323: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-31 20:23:43.675157: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-31 20:23:43.832440: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-31 20:23:43.832468: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-31 20:23:43.833162: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515

In [3]:
# Load configs

task_config_path = lambda: f'./configs/tasks/molecule_net/{guacamol_task}/config.json'


for guacamol_task in ['bbbp']:
    
    task_config = TaskConfig.from_pretrained(task_config_path())
    task_config.validate = False
    task_config.split = 'test'
    task_config.num_samples = 100
    dataset = AutoDataset.from_config(task_config)
    print(f"Loaded {task_config.target_label} data.")
    
    for model_name, path_to_model_config in PREDICTION_MODEL_CONFIGS.items():
        
        model_config = ModelConfig.from_pretrained(path_to_model_config)
        model_config.prediction_task = 'classification'
        trainer_config = TrainerConfig.from_pretrained('./configs/trainers/debug/')
        trainer_config.compile = False
        logger_config = LoggerConfig.from_pretrained(DEFAULT_CONFIG_FILES['logger'])
        
        out_dir = f'./results/regression_task/guacamol/{model_name}/{guacamol_task}'
        trainer_config.out_dir = out_dir
        logger_config.name = model_name + '_' + guacamol_task
        logger_config.project = 'debug'
        trainer_config.enable_save_checkpoint = False
        task_config.validate = False
        logger_config.wandb_log = False
        
        task_config.validate = False
        dataset = AutoDataset.from_config(task_config)
        tokenizer = AutoTokenizer.from_config(task_config)                
        model = AutoModel.from_config(model_config)
        logger = WandbLogger(logger_config, [task_config, model_config, trainer_config])
        trainer = Trainer(config=trainer_config, model=model, train_dataset=dataset, eval_dataset=dataset, tokenizer=tokenizer, logger=logger)
        trainer._train_init()
        model.eval()
        # results = trainer.test(dataset)
        

        

Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead
Loaded bbbp data.
number of parameters: 38.06M
tokens per iteration will be: 512
Using cuda device
Random seed set to 1337
num decayed parameter tensors: 63, with 38,116,352 parameters
num non-decayed parameter tensors: 25, with 12,800 parameters
using fused AdamW: True


In [18]:
trainer.model.generate(
    task='lm', input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'],
    max_length=inputs['input_ids'].size(1) + 10, num_beams=5, num_return_sequences=5)

('C([C@@H](C(=O)OCC)N[C@@H](C)C(N1CCC[C@H]1C(=O)O)=O)Cc1ccccc1', tensor([0.]))

In [10]:
from hybrid_transformer.models.utils import GuacamolModelWrapper

temperature = 1.0
top_k = 5

wrapper_model = GuacamolModelWrapper(trainer.model, trainer.tokenizer, 64, trainer.device, temperature=temperature, top_k=top_k)

In [11]:
samples = wrapper_model.generate(number_samples=64)

In [9]:
samples

['[13C][V+5][unused5][SH5][14CH2][Al+3][CuH2-][V][Si][Si][Ru][Ru][Cf][SiH3][Yb][Ac-][Na-2][Hf+2][Hf+2][At][SH-]#[se]%16%16[Rh+2][Cr+2][Mo+4][13C][Ho+3][Ho+3][C+4][RuH2][Ru-][Pd][Ti+5][Ti+5][Er+3][Er+3]1[N+3][N+3][35S][S@@+][S@@+][ClH+][o+]%11[Hg+2][Cu][Yb+3][Mg]%16%16%16%16%19=[13CH]1[Bi+][Ni+][B][2NH][O-2][Ni][KH][Fm][Pt+2][124I-][124I-][124I-][Dy+3][Dy+3][SH3+][O-2][O-2][Al][AlH3][p-][Pd+4][TaH3][CH2][Cm][Ho+3][Na+][P][Th]p[Ni][Tl+]22[Sn][Ag+2][IH][IH][Zr+4][SeH][SeH][Ir][18F][OH3+][11C]6[Al+2][YH][Ca+2][Er][BH3-][Ce][c-][Al-][Al-][Mo+2]%15%22[NH+][Nb+5][Nb+5]8[Ni+3][Ni+3][AlH4]%20[Cr+6][Tl+2]',
 '[S][Sn+2][Sr+2][Ir]8[SH2][CaH2][Hg+2][Nd][123I][Nd][Nd][SH3+][Hf][Gd+3][nH+][Ti+6][Mg+2][Mg+2][Sc+2][BH-][BH-][NH3+][Mn+3][N@@H+][NH3+][K][31P][Tb][Tb][Tb][P@][Ir+4][19F][unused7][Si@H][cH-]O[SeH][2NH][2NH][W+4][LaH][AlH4-][Li+][Ca+2][Ca+2][K+][PH-][As-][AsH2][Cr-][Cr-][Te][NH-][NH-][AlH4-][OH3+][OH3+]%13[Eu+3][GeH][AsH2][AlH][Th][CH2-][124I-][cH-][cH-][C@][C@][AsH+]5[Ni+2][Ce+4][Ce+4]c[Sn+

In [14]:
task = 'lm'
inputs = trainer.get_batch(split='train', task=task)
trainer.model.zero_grad()
outputs = trainer.model(
    task=task, input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'],
    labels=inputs['labels'], target=inputs['target'], eos_mask=inputs['eos_mask'])
outputs['loss'].backward()

In [20]:
grads = {name: params.grad.view(-1).cpu().clone().numpy() for name, params in trainer.model.named_parameters() if "weight" in name}

AttributeError: 'NoneType' object has no attribute 'view'

In [57]:
B = inputs['input_ids'].size(0)
T = inputs['input_ids'].size(1)
C = trainer.model.embedding_dim
num_heads = trainer.model.num_heads

for name, params in trainer.model.named_parameters():
    print(name)
    if name == 'transformer.h.8.attn_1.q_proj.weight':
        q = params.view(B, T, num_heads, C // num_heads).transpose(1, 2)
    if name == 'transformer.h.8.attn_1.kv_proj.weight':
        grad_kv_proj = params.grad
    if name == 'transformer.h.8.attn_1.out_proj.weight':
        grad_out_proj = params.grad    

transformer.wte.weight
transformer.wpe.weight
transformer.h.0.ln_1.weight
transformer.h.0.attn_1.q_proj.weight
transformer.h.0.attn_1.kv_proj.weight
transformer.h.0.attn_1.out_proj.weight
transformer.h.0.ln_2.weight
transformer.h.0.mlp.fc.weight
transformer.h.0.mlp.proj.weight
transformer.h.1.ln_1.weight
transformer.h.1.attn_1.q_proj.weight
transformer.h.1.attn_1.kv_proj.weight
transformer.h.1.attn_1.out_proj.weight
transformer.h.1.ln_2.weight
transformer.h.1.mlp.fc.weight
transformer.h.1.mlp.proj.weight
transformer.h.2.ln_1.weight
transformer.h.2.attn_1.q_proj.weight
transformer.h.2.attn_1.kv_proj.weight
transformer.h.2.attn_1.out_proj.weight
transformer.h.2.ln_2.weight
transformer.h.2.mlp.fc.weight
transformer.h.2.mlp.proj.weight
transformer.h.3.ln_1.weight
transformer.h.3.attn_1.q_proj.weight
transformer.h.3.attn_1.kv_proj.weight
transformer.h.3.attn_1.out_proj.weight
transformer.h.3.ln_2.weight
transformer.h.3.mlp.fc.weight
transformer.h.3.mlp.proj.weight
transformer.h.4.ln_1.weigh

RuntimeError: shape '[2, 128, 8, 64]' is invalid for input of size 262144

In [61]:
B * T * num_heads * C // num_heads * 2

262144

In [60]:
512 * 512 

262144

In [44]:
k, v = self.kv_proj(x).split(self.embed_dim, dim=2)
k = k.view(B, T, self.num_heads, C // self.num_heads).transpose(1, 2)  # (B, nh, T, hs)
q = q.view(B, T, self.num_heads, C // self.num_heads).transpose(1, 2)  # (B, nh, T, hs)
v = v.view(B, T, self.num_heads, C // self.num_heads).transpose(1, 2) 

torch.Size([512, 512])

In [99]:
inputs['target']

tensor([1., 0.], device='cuda:0')

In [95]:
sized_mismatched_keys = []
for k, v in self.model.state_dict().items():
    if v.size() != loaded['model'][k].size():
        a = set_attr(self.model, k.split("."), None)
        sized_mismatched_keys.append(k)

self.model.load_state_dict(loaded['model'], strict=False)
for missed_key in sized_mismatched_keys:
    set_attr(self.model, missed_key.split("."), torch.nn.Parameter(loaded['model'][missed_key]).to(device))

tensor([1., 0., 1., 0.])

In [74]:
predictions = []
targets = []

In [75]:

prediction = torch.argmax(outputs['prediction'], dim=1) if trainer.model.prediction_task == 'classification' else outputs['prediction']
predictions.extend(dataset.undo_target_transform(prediction.cpu()))
targets.extend(dataset.undo_target_transform(inputs['target'].cpu()))



In [80]:
predictions

[tensor([3.3848], dtype=torch.float64), tensor([3.3848], dtype=torch.float64)]

In [77]:
targets

[tensor([1.3889], dtype=torch.float64), tensor([2.6244], dtype=torch.float64)]

In [78]:
inputs['target']

tensor([-0.6547,  0.3695], device='cuda:0')

In [27]:
import deepchem as dc
from deepchem.feat.molecule_featurizers.raw_featurizer import RawFeaturizer

In [30]:
featurizer = RawFeaturizer(smiles=True)
splitter = 'random'
target_transforms = None
a, b, c = dc.molnet.load_lipo(featurizer=featurizer, splitter=splitter)

In [33]:
c is not None

True

In [34]:
target_transforms = c if c is not None else None

In [35]:
target_transforms

[<deepchem.trans.transformers.NormalizationTransformer at 0x7f8e91352750>]

In [9]:
import torch

torch.Tensor([0.571, 0.914, 0.573]).mean()

tensor(0.6860)

In [10]:
from hybrid_transformer.utils.datasets.utils import load_txt_into_list

data = load_txt_into_list('../data/guacamol/train/smiles.txt')

In [11]:
len(data)

1273104