In [1]:
from bounce import *
from torch.utils.data import DataLoader
from pysr import PySRRegressor
import torch
import torch.nn.functional as F
import pytorch_lightning as pl
import pickle

loader = DataLoader(
    BouncyBallsDataBounceRatioLabels(0.5),
    batch_size=4096,
    drop_last=True,
    pin_memory=False
)

x, y, l = next(iter(loader))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
x, y, l

(tensor([[ 37.0000, 332.5000,   0.0000,  ...,  11.0000,   0.8754,   0.5194],
         [ 37.0000, 394.2500,   0.0000,  ...,  11.0000,   0.8754,   0.5194],
         [ 42.4422, 392.2435, 160.5849,  ...,  11.0000,   0.8754,   0.5194],
         ...,
         [ 67.7732, 479.5254,  38.8028,  ...,  22.0000,   0.8761,   0.9750],
         [ 68.4199, 480.5545,  38.8028,  ...,  22.0000,   0.8761,   0.9750],
         [ 69.0666, 481.8336,  38.8028,  ...,  22.0000,   0.8761,   0.9750]]),
 tensor([[  37.0000,  333.5000,    0.0000,   75.0000],
         [  37.0000,  400.0000,  160.5849, -232.3085],
         [  45.1186,  388.8717,  160.5849, -187.3085],
         ...,
         [  68.4199,  480.5545,   38.8028,   76.7482],
         [  69.0666,  481.8336,   38.8028,   91.7482],
         [  69.7133,  483.3628,   38.8028,  106.7482]]),
 tensor([[0],
         [1],
         [0],
         ...,
         [0],
         [0],
         [0]], dtype=torch.int32))

In [3]:
with open('base_model.pk', 'rb') as sr_model_file:
    sr_model = pickle.load(sr_model_file)

In [5]:
# model for predicting category (bounce or not)

class BaseModule(pl.LightningModule):
    def __init__(self, only_position=False):
        super(BaseModule, self).__init__()
        self.only_position = only_position
        self.input_size = 8
        self.output_size = 1

    def training_step(self, batch, batch_idx):
        x, y = batch

        #error = (y - sr_model.predict(x)).abs().mean(dim=1)
        
        error = (y - torch.Tensor(sr_model.predict(x.cpu())).cuda()).abs().mean(dim=1, keepdims=True)
        category = (error > error.mean(0, keepdims=True)).type(torch.float)
        y_hat = torch.sigmoid(self.forward(x))
        # could think of scaling too.
        #mae, rmse, mse
        loss = F.mse_loss(y_hat, category)
        self.log('train_loss', loss.item(), on_epoch=True, on_step=False)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)
        return optimizer 

In [27]:
# model: soft_min of free fall and MLP

class BaseModule(pl.LightningModule):
    def __init__(self, only_position=False):
        super(BaseModule, self).__init__()
        self.only_position = only_position
        self.input_size = 8
        self.output_size = 4

    def training_step(self, batch, batch_idx):
        x, y = batch

        y_sr = torch.Tensor(sr_model.predict(x.cpu())).cuda()
        y_mlp = self.forward(x)

        loss_sr = F.l1_loss(y_sr, y, reduction='none').mean(axis=1)
        loss_mlp = F.l1_loss(y_mlp, y, reduction='none').mean(axis=1)
        # without .mean, this would be a dimensionswise soft_min

        # compared to mean error (otherwise this caused the softmin to give 0+eps for most bigger values)
        # think about this: could also compare to the mean of the two, this way it says we should weight something up if it's good relative to other predictions of given branch
        loss_sr_ = loss_sr / (loss_sr.mean() + 1e-6)
        loss_mlp_ = loss_mlp / (loss_mlp.mean() + 1e-6)

        # add the epsilon to the weights - otherwise if both losses very big, none of them will try to learn the data points. 
        # some better idea?
        c_sr = torch.exp(-loss_sr_) + 1e-6
        c_mlp = torch.exp(-loss_mlp_) + 1e-6
        c_sum = c_sr + c_mlp
        c_sr /= c_sum
        c_mlp /= c_sum

        loss = c_sr * loss_sr + c_mlp * loss_mlp
        loss = loss.mean()

        self.log('sr_loss', loss_sr.mean().item(), on_epoch=True, on_step=False)
        self.log('mlp_loss', loss_mlp.mean().item(), on_epoch=True, on_step=False)
        self.log('c_sr_loss', (c_sr * loss_sr).mean().item(), on_epoch=True, on_step=False)
        self.log('c_mlp_loss', (c_mlp * loss_mlp).mean().item(), on_epoch=True, on_step=False)
        self.log('train_loss', loss.item(), on_epoch=True, on_step=False)
        self.log('error_corr', torch.corrcoef(torch.stack([loss_sr, loss_mlp]))[0,1].item(), on_epoch=True, on_step=False)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)
        return optimizer 

class MLP_model(BaseModule):
    def __init__(self, hidden_sizes, only_position=False):
        super(MLP_model, self).__init__(only_position)
        self.model = MLP(self.input_size, hidden_sizes, self.output_size)
    
    def forward(self, x):
        return self.model(x)

In [117]:
class BaseModule(pl.LightningModule):
    def __init__(self, only_position=False):
        super(BaseModule, self).__init__()
        self.only_position = only_position
        self.input_size = 8
        self.output_size = 4

    def training_step(self, batch, batch_idx):
        x, y = batch

        y_mlp1 = self.mlp1(x)
        y_mlp2 = self.mlp2(x)

        # should we use L1 or L2 loss? 
        loss_mlp1 = F.l1_loss(y_mlp1, y, reduction='none').mean(axis=1)
        loss_mlp2 = F.l1_loss(y_mlp2, y, reduction='none').mean(axis=1)
        
        # without .mean, this would be a dimensionswise soft_min

        # add the epsilon to the weights - otherwise if both losses very big, none of them will try to learn the data points. 
        # some better idea?
        c_mlp1 = torch.exp(-loss_mlp1) + 1e-6
        c_mlp2 = torch.exp(-loss_mlp2) + 1e-6
        c_sum = c_mlp1 + c_mlp2
        c_mlp1 /= c_sum
        c_mlp2 /= c_sum

        loss = c_mlp1 * loss_mlp1 + c_mlp2 * loss_mlp2
        loss = loss.mean()

        self.log('mlp1_loss', loss_mlp1.mean().item(), on_epoch=True, on_step=False)
        self.log('mlp2_loss', loss_mlp2.mean().item(), on_epoch=True, on_step=False)
        self.log('c_mlp1_loss', (c_mlp1 * loss_mlp1).mean().item(), on_epoch=True, on_step=False)
        self.log('c_mlp2_loss', (c_mlp2 * loss_mlp2).mean().item(), on_epoch=True, on_step=False)
        self.log('train_loss', loss.item(), on_epoch=True, on_step=False)
        self.log('error_corr', torch.corrcoef(torch.stack([loss_mlp1, loss_mlp2]))[0,1].item(), on_epoch=True, on_step=False)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)
        return optimizer 

class MLP_model(BaseModule):
    def __init__(self, hidden_sizes1, hidden_sizes2, only_position=False):
        super(MLP_model, self).__init__(only_position)
        self.mlp1 = MLP(self.input_size, hidden_sizes1, self.output_size)
        self.mlp2 = MLP(self.input_size, hidden_sizes2, self.output_size)
    
    def forward(self, x):
        return self.mlp1(x), self.mlp2(x)

In [3]:
# model: no softmin, just MLP


class BaseModule(pl.LightningModule):
    def __init__(self, only_position=False):
        super(BaseModule, self).__init__()
        self.only_position = only_position
        self.input_size = 8
        self.output_size = 4

    def training_step(self, batch, batch_idx):
        x, y = batch

        y_mlp1 = self.mlp1(x)
        y_mlp2 = self.mlp2(x)
        c_mlp = torch.squeeze(F.sigmoid(self.mlpc(x)))

        # add some offset to make sure it does not collapse
        #padding_weight = 1e-5
        #c_mlp = (c_mlp + 0.5 * padding_weight) / (1 + padding_weight)

        # should we use L1 or L2 loss? 
        loss_mlp1 = F.l1_loss(y_mlp1, y, reduction='none').mean(axis=1)
        loss_mlp2 = F.l1_loss(y_mlp2, y, reduction='none').mean(axis=1)
        
        # without .mean, this would be a dimensionswise soft_min

        # add the epsilon to the weights - otherwise if both losses very big, none of them will try to learn the data points. 
        # some better idea?
        #c_mlp1 = torch.exp(-loss_mlp1) + 1e-6
        #c_mlp2 = torch.exp(-loss_mlp2) + 1e-6
        #c_sum = c_mlp1 + c_mlp2
        #c_mlp1 /= c_sum
        #c_mlp2 /= c_sum

        loss = c_mlp * loss_mlp1 + (1-c_mlp) * loss_mlp2
        loss = loss.mean()

        self.log('mlp1_loss', loss_mlp1.mean().item(), on_epoch=True, on_step=False)
        self.log('mlp2_loss', loss_mlp2.mean().item(), on_epoch=True, on_step=False)
        self.log('c_mlp1_loss', (c_mlp * loss_mlp1).mean().item(), on_epoch=True, on_step=False)
        self.log('c_mlp2_loss', ((1-c_mlp) * loss_mlp2).mean().item(), on_epoch=True, on_step=False)
        self.log('train_loss', loss.item(), on_epoch=True, on_step=False)
        self.log('error_corr', torch.corrcoef(torch.stack([loss_mlp1, loss_mlp2]))[0,1].item(), on_epoch=True, on_step=False)
        self.log('coeff_corr', torch.corrcoef(torch.stack([loss_mlp1, c_mlp]))[0,1].item(), on_epoch=True, on_step=False)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)
        return optimizer 

class MLP_model(BaseModule):
    def __init__(self, hidden_sizes1, hidden_sizes2, hidden_sizes3, only_position=False):
        super(MLP_model, self).__init__(only_position)
        self.mlp1 = MLP(self.input_size, hidden_sizes1, self.output_size)
        self.mlp2 = MLP(self.input_size, hidden_sizes2, self.output_size)
        self.mlpc = MLP(self.input_size, hidden_sizes3, 1)
    
    def forward(self, x):
        return self.mlp1(x), self.mlp2(x), torch.squeeze(F.sigmoid(self.mlpc(x)))

In [6]:
# model: softmin helps out


class BaseModule(pl.LightningModule):
    def __init__(self, only_position=False):
        super(BaseModule, self).__init__()
        self.only_position = only_position
        self.input_size = 8
        self.output_size = 4

    def training_step(self, batch, batch_idx):
        x, y = batch

        y_mlp1 = self.mlp1(x)
        y_mlp2 = self.mlp2(x)
        c_mlp = torch.squeeze(F.sigmoid(self.mlpc(x)))

        # add some offset to make sure it does not collapse
        # we can let the soft_min do the padding
        padding_weight = 1e-5
        c_mlp = (c_mlp + 0.5 * padding_weight) / (1 + padding_weight)

        # should we use L1 or L2 loss? 
        loss_mlp1 = F.l1_loss(y_mlp1, y, reduction='none').mean(axis=1)
        loss_mlp2 = F.l1_loss(y_mlp2, y, reduction='none').mean(axis=1)
        # without .mean, this would be a dimensionswise soft_min

        # compared to mean error (otherwise this caused the softmin to give 0+eps for most bigger values)
        loss_mlp1_ = loss_mlp1 / (loss_mlp1.mean() + 1e-6)
        loss_mlp2_ = loss_mlp1 / (loss_mlp2.mean() + 1e-6)
        

        # add the epsilon to the weights - otherwise if both losses very big, none of them will try to learn the data points. 
        # some better idea?
        c_mlp1 = torch.exp(-loss_mlp1_) + 1e-6
        c_mlp2 = torch.exp(-loss_mlp2_) + 1e-6
        c_sum = c_mlp1 + c_mlp2
        c_mlp1 /= c_sum
        c_mlp2 /= c_sum

        # equal weight of classifier and softmin
        c_loss = c_mlp * loss_mlp1 + (1-c_mlp) * loss_mlp2
        m_loss = c_mlp1 * loss_mlp1 + c_mlp2 * loss_mlp2
        loss = 0.5 * (c_loss + m_loss)
        loss = loss.mean()

        # enhanced with error correlation maximization and coefficient correlation minimization
        loss = loss * (2 + torch.corrcoef(torch.stack([loss_mlp1, loss_mlp2]))[0,1]) * (2 - torch.corrcoef(torch.stack([c_mlp1, c_mlp]))[0,1])

        #loss = loss - torch.corrcoef(torch.stack([c_mlp1, c_mlp]))[0,1] #+ torch.corrcoef(torch.stack([loss_mlp1, loss_mlp2]))[0,1] #

        self.log('mlp1_loss', loss_mlp1.mean().item(), on_epoch=True, on_step=False)
        self.log('mlp2_loss', loss_mlp2.mean().item(), on_epoch=True, on_step=False)
        self.log('c_mlp1_loss', (c_mlp * loss_mlp1).mean().item(), on_epoch=True, on_step=False)
        self.log('c_mlp2_loss', ((1-c_mlp) * loss_mlp2).mean().item(), on_epoch=True, on_step=False)
        self.log('train_loss', loss.item(), on_epoch=True, on_step=False)
        self.log('train_c_loss', c_loss.mean().item(), on_epoch=True, on_step=False)
        self.log('train_m_loss', m_loss.mean().item(), on_epoch=True, on_step=False)
        self.log('error_corr', torch.corrcoef(torch.stack([loss_mlp1, loss_mlp2]))[0,1].item(), on_epoch=True, on_step=False)
        self.log('coeff_corr', torch.corrcoef(torch.stack([c_mlp1, c_mlp]))[0,1].item(), on_epoch=True, on_step=False)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)
        return optimizer 

class MLP_model(BaseModule):
    def __init__(self, hidden_sizes1, hidden_sizes2, hidden_sizes3, only_position=False):
        super(MLP_model, self).__init__(only_position)
        self.mlp1 = MLP(self.input_size, hidden_sizes1, self.output_size)
        self.mlp2 = MLP(self.input_size, hidden_sizes2, self.output_size)
        self.mlpc = MLP(self.input_size, hidden_sizes3, 1)
    
    def forward(self, x):
        return self.mlp1(x), self.mlp2(x), torch.squeeze(F.sigmoid(self.mlpc(x)))

In [10]:
class MLP(pl.LightningModule):
    def __init__(self, input_size, hidden_sizes, output_size):
        
        super(MLP, self).__init__()
        self.input_size = input_size
        self.hidden_sizes  = hidden_sizes
        self.output_size = output_size
        
        self.linears = []
        prev_hidden = self.input_size
        for hidden_size in self.hidden_sizes:
            self.linears.append(torch.nn.Linear(prev_hidden, hidden_size))
            prev_hidden = hidden_size
        self.linears.append(torch.nn.Linear(prev_hidden, self.output_size))

        self.linears = torch.nn.ModuleList(self.linears)
        self.relu = torch.nn.ReLU()

    
    def forward(self, x):
        for linear in self.linears[:-1]:
            x = self.relu(linear(x))
        x = self.linears[-1](x)
        return x

In [28]:
models = {
    'sm_sr_mlp': MLP_model([256, 256]),
    #'double_model_l1': MLP_model([256, 256]),
    #'double_mlp_l1': MLP_model([256, 256], [256, 256]),
}

In [29]:
models['sm_sr_mlp'](x)

tensor([[  2.9901,  -5.7961,  -5.5810,  -5.1119],
        [  3.1072,  -6.0536,  -5.1749,  -4.6451],
        [  3.0261,  -6.2914,  -4.9262,  -4.1240],
        ...,
        [-28.1328, -24.8551, -26.2985,  35.6199],
        [-28.6839, -25.3942, -26.7826,  35.6591],
        [-29.1914, -25.9910, -27.2442,  35.7803]], grad_fn=<AddmmBackward0>)

In [30]:
# could add early stopping with patience
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
early_stop_callback = EarlyStopping(monitor="train_loss", patience=3000, verbose=False, mode="min")


for model_name, model in models.items():
    # this is to handle models without parameters that do not need training
    try:
        logger = TensorBoardLogger("lightning_logs", name=model_name)
        trainer = pl.Trainer(gpus=1, max_epochs=10000,
                            gradient_clip_val=0.5,
                            callbacks=[early_stop_callback],
                            logger=logger)
                            #accumulate_grad_batches=4)

        trainer.fit(model, loader)
    except Exception as e:
        print(e)
    torch.save(model.state_dict(), f'sr_learn_models/{model_name}.mod')

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True, used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type | Params
-------------------------------
0 | model | MLP  | 69.1 K
-------------------------------
69.1 K    Trainable params
0         Non-trainable params
69.1 K    Total params
0.276     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 66: : 23it [11:51, 30.92s/it, loss=0.809, v_num=0]
Epoch 110: : 3it [01:08, 22.67s/it, loss=51.8, v_num=1]

In [83]:
# this was the dimensionwise min, but we want to average over the features
m = 'c_m_mlp_corr'
torch.save(models[m].state_dict(), f'{m}.mod')

In [None]:
# run tensorboard in cmd to see the progress
tensorboard --logdir lightning_logs