In [41]:
import matplotlib.pyplot as plt
import pnstyler.styler as styler
from data_preprocess import trajectory_to_ase_atoms, preprocess_asedb
from train import get_dataset
import ml_confs
from tqdm.auto import tqdm
from kooplearn.nn.functional import vamp_score, relaxed_projection_score

from pathlib import Path
import schnetpack
import logging 
from input_pipeline import TimeLaggedSampler
from model import GraphDPNet
import torch
import lightning
import numpy as np

In [17]:
configs = ml_confs.from_file('configs.yaml')
configs.tabulate()

In [13]:
#trajectory_to_ase_atoms(configs)

In [14]:
#preprocess_asedb(cutoff, '/home/pnovelli/code/dp_examples/chignolin/schnetpack/data/CLN025-0-protein_backbone.db')

In [15]:
db_path = Path('/home/pnovelli/code/dp_examples/chignolin/schnetpack/data/CLN025-0-protein_backbone.db')

In [18]:
dataset = get_dataset(db_path, configs.cutoff)
n_atoms = dataset[0][schnetpack.properties.n_atoms].item()

model = GraphDPNet(
        configs,
        n_atoms,
        torch.optim.Adam,
        use_relaxed_loss=configs.use_relaxed_loss,
        metric_deformation_loss_coefficient=configs.metric_loss,
        optimizer_kwargs={"lr": 1e-2},
    )

In [43]:
checkpoint_path = 'ckpt/daily-rain-10/epoch=99-step=16700.ckpt'
model = GraphDPNet.load_from_checkpoint(checkpoint_path, configs=configs, optimizer = torch.optim.Adam)
model.to('cpu')
fmap = []
with torch.no_grad():
    for i in tqdm(range(2048)):
        data = dataset[i]
        out = model(data)
        fmap.append(out['scalar_representation'].squeeze())
fmap = torch.tensor(fmap)
fmap = fmap.mean(dim=1)

  0%|          | 3/2048 [00:10<1:56:10,  3.41s/it]


KeyboardInterrupt: 

In [42]:
X = fmap[:-1]
Y = fmap[1:]

_norm = torch.rsqrt(torch.tensor(encoded_X.shape[0]))
encoded_X *= _norm
encoded_Y *= _norm

cov_X = torch.mm(encoded_X.T, encoded_X)
cov_Y = torch.mm(encoded_Y.T, encoded_Y)
cov_XY = torch.mm(encoded_X.T, encoded_Y)

In [39]:
vals, vecs = np.linalg.eigh(cov)

In [40]:
vals

array([1.52772163e-04, 2.36804724e-04, 5.87446966e-04, 1.35341729e-03,
       4.85378354e-03, 6.21326797e-03, 2.49874705e-02, 5.19040921e-02,
       8.22884161e-02, 1.45571794e-01, 3.09330047e-01, 4.76245486e-01,
       2.39180486e+00, 5.12561120e+00, 2.58280021e+01, 1.01491094e+02])

In [22]:
#train_logger = lightning.pytorch.loggers.WandbLogger(project='GraphDPNet-chignolin', entity='csml')

lr_finder_cb = lightning.pytorch.callbacks.LearningRateFinder()

ckpt_path = db_path.parent.parent / 'ckpt' / str(train_logger.experiment.name)
ckpt_cb = lightning.pytorch.callbacks.ModelCheckpoint()

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mpietronvll[0m ([33mcsml[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [23]:
trainer = lightning.Trainer(
    accelerator='gpu', 
    #logger=train_logger, 
    max_epochs=2, 
    callbacks=[lr_finder_cb, ckpt_cb],
)

INFO: Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
INFO:lightning.pytorch.utilities.rank_zero:Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU av

In [24]:
trainer.fit(model, dataloader)

/home/pnovelli/miniforge3/envs/schnetpack/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:43: attribute 'configs' removed from hparams because it cannot be pickled
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]


AttributeError: 'GraphDPNet' object has no attribute '_optimizer'

In [None]:
#Data loading
dataset = get_dataset(db_path, cutoff)
batch_sampler = TimeLaggedSampler(dataset, batch_size = configs.training.batch_size, lagtime = configs.data.lagtime, shuffle = configs.training.shuffle)
dataloader = schnetpack.data.AtomsLoader(dataset, batch_sampler=batch_sampler, num_workers=20, persistent_workers=True)


#Optimizer
optimizer = torch.optim.Adam
#Model
n_atoms = dataset[0][schnetpack.properties.n_atoms].item()
model = GraphDPNet(configs, n_atoms, optimizer, use_relaxed_loss = True, metric_deformation_loss_coefficient=1.0,  optimizer_kargs= {'lr': 1e-3})
logger = lightning.loggers.WandbLogger(project=configs.wandb.project_name, config=configs.to_dict(), entity=configs.wandb.entity)



ckpt_path = os.path.join(ckpt_path, str(logger.experiment.name))
ckpt_callback = pl.callbacks.ModelCheckpoint(
    monitor='val/P',
    dirpath = ckpt_path,
    mode = 'max', 
    save_top_k = 5, 
    save_last = True
)

trainer = lightning.Trainer(
    accelerator='gpu', 
    logger=logger, 
    max_epochs=configs.training.epochs, 
    log_every_n_steps=configs.training.log_every,
    callbacks=[ckpt_callback],
    limit_val_batches = 10,
    val_check_interval = configs.training.log_every
    )
trainer.fit(model, dataloader, val_dataloaders=dataloader) #Using the same data for tranining and validation is not kosher, but it's a hack for now.