In [1]:
import os
import torch
from torch import optim, nn, utils, Tensor
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl

In [2]:
import numpy as np
import dataset as MOFdata

def filter_data(x):
    a, b = -4000, 5000
    mid = (a + b)/2
    dif = b-a
    # energy = x[0]
    energy = np.clip(x[0] - mid, a - mid, b - mid) / (dif / 2)
    # return np.array([x[1], x[2]])
    return np.array([energy, x[1], x[2]])


def filter_labels(x):
    return x[:3]

grid_file = "data/probability.npy"
csv_file = "data/ASR.csv"
lattice_file = "data/grids.lattice.npy"
feature = "lattice"

dataset = MOFdata.Dataset(grid_file, csv_file, lattice_file, feature, transform=filter_data, transform_labels=filter_labels)
train_set_size = int(.9 * len(dataset))
validation_set_size = int(.05 * len(dataset))
test_set_size = len(dataset) - train_set_size - validation_set_size
train_set, validation_set, test_set = random_split(
    dataset=dataset,
    lengths=(train_set_size,
    validation_set_size,
    test_set_size),
    generator=torch.Generator().manual_seed(42))

loader_args = dict(batch_size=64, num_workers=4)
train_loader = DataLoader(train_set, **loader_args)
validation_loader = DataLoader(validation_set, **loader_args)
test_loader = DataLoader(test_set, **loader_args)

a = np.min(train_set[0][0])
b = np.max(train_set[0][0])
small = 0
for x, _ in train_set:
    a = min(a, np.min(x))
    b = max(b, np.max(x))
print("small grids", small, len(train_set))
print("data shape", train_set[0][0].shape)
print("feature shape", train_set[0][1].shape, train_set[0][1])
print("data range", a, "->", b)

available features: 10143 available grids: 10140
0 missing features
small grids 0 9126
data shape (3, 32, 32, 32)
feature shape (3,) [14.932 14.932 11.956]
data range -0.48497555 -> 5.660271


In [3]:
import random
from zeonet import BasicModel

def loss_fn(output, target):
    return nn.functional.mse_loss(output, target)

def proportional_loss(output, target):
    return torch.mean(torch.abs(output-target)/torch.abs(target))

# define the LightningModule
class ModulePL(pl.LightningModule):
    def __init__(self, features=3, channels=3, dropout=0.2, device=None):
        super().__init__()
        self.model = BasicModel(features=features, channels=channels, dropout=dropout)
        # self.saved_model = saved_model
    
    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = loss_fn(y.float(), y_hat)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y = y.float()
        y_hat = self.forward(x)
        loss = loss_fn(y_hat, y)
        l1_loss = nn.functional.l1_loss(y_hat, y)

        self.log('validation_loss', loss)
        self.log('validation_l1_loss', l1_loss)
        self.log('validation_p_loss', proportional_loss(y_hat, y))
        return loss

    def configure_optimizers(self):
        # optimizer = optim.Adam(self.parameters(), lr=1e-3, amsgrad=True)
        optimizer = optim.SGD(self.parameters(), lr=1e-3, momentum=0.99, nesterov=True)
        # optimizer = optim.Adadelta(self.parameters(), lr=1e-3)
        return dict(optimizer=optimizer, lr_scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, cooldown=20, verbose=True), monitor="validation_loss")
        # return optimizer


model = ModulePL()

In [4]:
# set up trainer
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint

wandb_logger = WandbLogger(project="lattice")
chkpt_dir = "./lattice_pt"
checkpoint_callback = ModelCheckpoint(dirpath=chkpt_dir, save_top_k=2, monitor="validation_loss")

[34m[1mwandb[0m: Currently logged in as: [33mefergus3[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# import os
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

# train
trainer = pl.Trainer(
    logger = wandb_logger,
    limit_train_batches=100,
    limit_val_batches=10,
    max_epochs=100,
    gradient_clip_val=0.5,
    accelerator='gpu',
    callbacks=[checkpoint_callback],
)

# train the model
trainer.fit(model=model, train_dataloaders=train_loader,  val_dataloaders=validation_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | BasicModel | 17.2 M
-------------------------------------
17.2 M    Trainable params
0         Non-trainable params
17.2 M    Total params
68.628    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [None]:
import itertools
def predict(model, loader, count):
    correct = []
    predictions = []
    with torch.no_grad():
        for data_x, data_y in itertools.islice(loader, count):
            pred_y = model(data_x)
            for d, p in zip(data_y, pred_y):
                correct.append(np.array(d.detach()))
                predictions.append(np.array(p.detach()))
            if len(correct) > count:
                break
        return np.array(predictions), np.array(correct)

In [None]:
# load most recent model
import util
checkpoint = util.latest_file(chkpt_dir)
print("loading", checkpoint)
model = ModulePL.load_from_checkpoint(checkpoint)

In [None]:
# plot performance
import matplotlib.pyplot as plt

count = min(100, test_set_size)

feature_range = (0, 3)
loaders = dict(
    training=train_loader,
    test=test_loader
)
fig, axs = plt.subplots(feature_range[1]-feature_range[0], len(loaders), figsize=(len(loaders)*4, 3*(feature_range[1]-feature_range[0])), dpi=100)
fig.suptitle(f"CNN Performance on {count} MOFs", fontsize=16, verticalalignment='center')

for i, (title, loader) in enumerate(loaders.items()):
    x, x_hat = predict(model, loader, count)
    for feature_idx in range(*feature_range):
        ax = axs[feature_idx][i]
        fx = x[:,feature_idx]
        fx_hat = x_hat[:,feature_idx]
        lim = [0, max(*fx, *fx_hat)*1.1]
        ax.scatter(fx, fx_hat, s=15, linewidths=1, edgecolors='black')

        ax.title.set_text(title)
        ax.set_aspect('equal')
        ax.set_xlim(lim)
        ax.set_ylim(lim)
        # plt.xscale('log')
        # plt.yscale('log')
        ax.plot([0, lim[1]], [0, lim[1]], color='red')

plt.show()

100


In [10]:
# load specific model
import util
from basic_net import BasicModel
if True:
    # checkpoint = "lattice_pt/saved_checkpoints/basic_net_lattice_constants/epoch=216-step=21700.ckpt"
    # print("loading", checkpoint)
    # saved_model = ModulePL.load_from_checkpoint(checkpoint)
    # torch.save(saved_model.model.state_dict(), "lattice_pt/saved_checkpoints/basic_net_lattice_constants/model.pt")
    saved_model = BasicModel(6, 3, 32, 0.2)
    saved_model.load_state_dict(torch.load("lattice_pt/saved_checkpoints/basic_net_lattice_constants/model.pt"))
    saved_model.requires_grad_(False)