# Model description

`Baseline` is a simple phase prediction model.  


We solve binary classification task.  
We build one model for each lattice size.  

## Import section

In [None]:
import pandas as pd
import numpy as np
import os, sys
from tqdm import tqdm
import copy
import logging
import time

import torch
import torch.nn as nn
from torchvision.transforms import Compose

import spi3n

In [2]:
def load_data(path_to_samples):
    st = time.time()
    data = {}
    files = [file for file in os.listdir(
             path_to_samples) if ('.csv' in file and 'dup' not in file)]
    for file in files:
        T = float(file[:-4].replace('_','.'))
        data[T] = pd.read_csv(f'{path_to_samples}{file}')
    logging.info(f"Done with files in {path_to_samples}. Took {time.time()-st} sec.")
    return data
            

class ResNet18(torch.nn.Module):
    def __init__(self):
        super(ResNet18, self).__init__()
        self.resnet = spi3n.models.custom_resnet.resnet18(num_classes=2)
    
    def forward(self, x):
        x = self.resnet(x)
        return x

## Create pipeline
Initialise parameters and create pipeline

In [3]:
# ==================================================================
#
# SET DATA CONSTANTS
#
# ==================================================================

L = 216  # option: 24, 48, 72, 96, 144, 216(243 if BW)
SPIN_MODEL = 'ISING'  # option: 'BW'
PATH_TO_SAMPLES = f'../../../{SPIN_MODEL}/data/samples_{L}/'
N_IMG = 1500
tc = 2/(np.log(1+2**0.5)) # Ising, BW critical T




# ==================================================================
#
# SET TRANSFORMER
#
# ==================================================================

transform = Compose([
    spi3n.transform.array2tensor(),  # create torch.tensor from np.array
    spi3n.transform.Rotate([1, 2])   # random rotation of image
])




# ==================================================================
#
# SET TRAINING
#
# ==================================================================

experiment = f'L{L}'
n_epochs = 50
batch_sz = 512
train_test_shares = [2/3*0.9, 2/3*0.1, 1/3],  # train+valid -- 2/3, test -- 1/3
init_lr = 1.0e-4
save_path = f'{SPIN_MODEL}/{experiment}/'
n_cpu = 25




# ==================================================================
#
# INIT
#
# ==================================================================

spi3n.utils.create_dir(save_path)
spi3n.utils.create_dir(f"{save_path}saved_checkpoints/")
spi3n.utils.set_logger(
    f'train.log',
    f'{save_path}',
)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
logging.info(f"""Start new pipeline. 
Model: {SPIN_MODEL}. 
Path: {PATH_TO_SAMPLES}.
Experiment: {experiment}.
Device: {device}.
""")
print(device)


cuda:0


In [None]:
# load data to memory

loaded_data = load_data(PATH_TO_SAMPLES)

## Prepare training
Initialise NN, loss, optimizer, dataloaders

In [None]:
net = ResNet18()
net = net.to(device)


loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    net.parameters(), 
    lr=init_lr
)


train_loader, valid_loader, test_loader = spi3n.pipeline.make_loaders(
    n_img=N_IMG,
    d_shares=train_test_shares,
    bs=batch_sz,
    DataSetClass=spi3n.loaders.SimpleSampler,
    dataset_params=dict(
        data=loaded_data, 
        transform=transform
    ),
    n_workers=n_cpu,
    random_state=12345
)

## Stage: Train

In [None]:
train_losses, val_losses = [], []
time_arr = []

logging.info(f"start training. \texp: {experiment}.")
for e in tqdm(range(n_epochs)):
    
    # Train
    st = time.time()
    logging.info(f"ep: \t{e}")
    net.train()
    _train_loss, n_iter = 0.0, 0
    for idx, data in enumerate(train_loader):
        optimizer.zero_grad()
        T, X = data
        X = X.to(device).float()
        T = T.to(device)
        Y = (T>tc)*1

        Y_hat = net.forward(X)
        loss = loss_fn(Y_hat, Y)
        loss.backward()
        optimizer.step()

        _train_loss += loss.item() * Y.shape[0]
        n_iter += Y.shape[0]

    time_arr += [time.time() - st]
    train_losses += [_train_loss / n_iter]
    logging.info(f"ep:\t{e}. train loss: \t{train_losses[-1]}")

    model_params = copy.deepcopy(net.state_dict())
    torch.save(model_params, f'{save_path}saved_checkpoints/ep{e}_model')


    # Validate
    net.eval()
    with torch.no_grad():
        _val_loss, n_iter = 0.0, 0
        for data in valid_loader:
            T, X = data
            X = X.to(device).float()
            T = T.to(device)
            Y = (T>tc)*1

            Y_hat = net.forward(X)
            _val_loss += loss_fn(Y_hat, Y).item() * Y.shape[0]
            n_iter += Y.shape[0]
        val_losses += [_val_loss / n_iter]
    logging.info(f"ep:\t{e}. valid loss: \t{val_losses[-1]}")
    

## Stage: Test best epoch

In [None]:
# set best ep, save path

best_epoch = np.argmin(val_losses)
logging.info(f"Best epoch: \t{best_epoch}.")
logging.info(f"Training time: {np.mean(time_arr)} +- {np.std(time_arr)/len(time_arr)}")
path_pred_test = f"{save_path}best_epoch_test.csv"



# save training stats

pd.DataFrame({
    'L': L,
    'e': range(n_epochs),
    'train_loss': train_losses,
    'valid_loss': val_losses,
    't': time_arr
}).to_csv(f"{save_path}train_stats.csv", index=False)

In [None]:
# load best network
net.load_state_dict(torch.load(f'{save_path}saved_checkpoints/ep{best_epoch}_model'))


# Test
net.eval()
with torch.no_grad():
    mode = 'w'
    header=True
    for data in test_loader:
        T, X = data
        X = X.to(device).float()
        T = T.to(device)
        Y = (T>tc)*1
        Y_hat = net.forward(X).softmax(1)
        
        df_out = pd.DataFrame({
            'L': L,
            'T': T.data.cpu().numpy().reshape(-1),
            'f_proba': Y_hat[:,0].data.cpu().numpy().reshape(-1),
            'p_proba': Y_hat[:,1].data.cpu().numpy().reshape(-1),
            'true': Y.data.cpu().numpy().reshape(-1),
        })
        df_out.to_csv(
            path_pred_test,
            index=False,
            header=header,
            mode=mode
        )
        header = False
        mode = 'a'

print('Done')

## Stage: Test specific epoch

In [15]:
# set best ep, save path
epoch = 7
path_pred_test = f"{save_path}epoch{epoch}_test.csv"


# load best network
net.load_state_dict(torch.load(f'{save_path}saved_checkpoints/ep{epoch}_model'))


# Test
net.eval()
with torch.no_grad():
    mode = 'w'
    header=True
    for data in test_loader:
        T, X = data
        X = X.to(device).float()
        T = T.to(device)
        Y = (T>tc)*1
        Y_hat = net.forward(X).softmax(1)
        
        df_out = pd.DataFrame({
            'L': L,
            'T': T.data.cpu().numpy().reshape(-1),
            'f_proba': Y_hat[:,0].data.cpu().numpy().reshape(-1),
            'p_proba': Y_hat[:,1].data.cpu().numpy().reshape(-1),
            'true': Y.data.cpu().numpy().reshape(-1),
        })
        df_out.to_csv(
            path_pred_test,
            index=False,
            header=header,
            mode=mode
        )
        header = False
        mode = 'a'

print('Done')

Done
