In [179]:
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger

import wandb
wandb_logger = WandbLogger(project="MethPrediction")

import os
os.environ['WANDB_API_KEY'] = '2a1829519497eaab2f05c336830a1d4b0a3a8238'

In [180]:
run = wandb.init(
    # Set the wandb entity where your project will be logged (generally your team name).
    entity="andygglez-meth",
    # Set the wandb project where this run will be logged.
    project="my-awesome-project",
    # Track hyperparameters and run metadata.
    config={
        "learning_rate": 1e-3,
        "architecture": "CNN+ATT",
        "dataset": "test.for.torch.npz",
        "epochs": 100,
    },
)

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
epoch,‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà
train_loss_epoch,‚ñà‚ñà‚ñà‚ñÉ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
train_loss_relative,‚ñà‚ñà‚ñà‚ñà‚ñá‚ñÜ‚ñÖ‚ñÖ‚ñÑ‚ñÉ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
train_loss_step,‚ñà‚ñÉ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
trainer/global_step,‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà

0,1
epoch,299.0
train_loss_epoch,0.00096
train_loss_relative,0.89027
train_loss_step,0.00096
trainer/global_step,299.0


In [181]:
#### Dataset Class
## Notice that the __init__ method contains an argument `apply_log10`, if you set it to True
## you will apply a log10 to the raw counts. We can experiment with this
class MethDataset(Dataset):
    def __init__(self, sequence, histone, methylation, coords, apply_log10=False):
        self.sequence = sequence
        self.histone = histone
        self.methylation = methylation
        self.transform = apply_log10
        self.coords = coords
        self.histone_names = ['H3K4me3', 'H3K36me2', 'H3K27me3', 'H3K9me3']

    def __len__(self):
        return self.methylation.shape[0]

    def __getitem__(self, idx):
        
        sequence = torch.from_numpy(self.sequence[idx])
        histone = self.histone.astype(np.float32)

        H3K4me3 = torch.from_numpy(histone[:, :, 0][idx].astype(np.float32)) if not self.transform else torch.from_numpy(np.log10(histone[:, :, 0]+1e-4)[idx])
        H3K36me2 = torch.from_numpy(histone[:, :, 1][idx].astype(np.float32)) if not self.transform else torch.from_numpy(np.log10(histone[:, :, 1]+1e-4)[idx])
        H3K27me3 = torch.from_numpy(histone[:, :, 2][idx].astype(np.float32)) if not self.transform else torch.from_numpy(np.log10(histone[:, :, 2]+1e-4)[idx])
        H3K9me3 = torch.from_numpy(histone[:, :, 3][idx].astype(np.float32)) if not self.transform else torch.from_numpy(np.log10(histone[:, :, 3]+1e-4)[idx])

        methylation = self.methylation[idx]
        coordinates = self.coords[idx]

        return sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3, methylation, coordinates

class MethDataModule(pl.LightningDataModule):
    def __init__(self, npz_path, train_split=0.8, batch_size=32, apply_log10=True):
        super().__init__()
        self.npz_path = npz_path
        self.batch_size = batch_size
        self.train_split = train_split
        self.transform = apply_log10
        self.histone_names = ['H3K4me3', 'H3K36me2', 'H3K27me3', 'H3K9me3']
    
    def prepare_data(self):
        self.data = np.load(self.npz_path, allow_pickle=True)
    
    def setup(self, stage=None):
        split_index = int(self.train_split * self.data['dna'].shape[0]) ### 80% of the data will be for training

        self.train_dataset = MethDataset(sequence = data['dna'][:split_index],
                                histone = data['histone'][:split_index], 
                                methylation = data['methyl'][:split_index],
                                coords = data['coords'][:split_index],
                                apply_log10=True)

        self.test_dataset = MethDataset(sequence = data['dna'][split_index:],
                                histone = data['histone'][split_index:], 
                                methylation = data['methyl'][split_index:],
                                coords = data['coords'][split_index:],
                                apply_log10=True)
        
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=False)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)



In [182]:
data_module = MethDataModule(npz_path='chr19.npz', train_split=0.8, batch_size=32)

In [183]:
class Model(pl.LightningModule):
    def __init__(self, DNA_kernel_sizes, DNA_strides, DNA_conv_channels, loss_fn=nn.MSELoss, optimizer=torch.optim.Adam, learning_rate=1e-3):
        super().__init__()
        # Module parameters
        self.DNA_layer1_kernel_size, self.DNA_layer2_kernel_size, self.DNA_layer3_kernel_size, self.DNA_layer4_kernel_size = DNA_kernel_sizes
        self.DNA_conv_channels = DNA_conv_channels
        self.DNA_layer1_stride, self.DNA_layer2_stride, self.DNA_layer3_stride, self.DNA_layer4_stride = DNA_strides

        self.loss_fn = loss_fn()
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.first_epoch_loss = None
        self.first_test_loss = None

        
        ############## Modules and architecture
        self.dna_module = nn.Sequential(
            nn.Conv1d(in_channels=4, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )

        ### 
        self.H3K4me3_module = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )
        self.H3K36me2_module = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )
        self.H3K27me3_module = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )
        self.H3K9me3_module = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )
        
        #### Cross-Attention
        self.attn = nn.MultiheadAttention(embed_dim=25, num_heads=5, batch_first=True)

        self.fc = nn.Sequential(
            nn.Linear(125, 250),
            nn.ReLU(),
            nn.Linear(250, 100),
            nn.ReLU(),
            nn.Linear(100, 10),
            nn.ReLU(),
            nn.Linear(10, 1),
            nn.Softplus()
        )

    def forward(self, sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3):
        sequence = sequence.to(torch.float32).permute(0, 2, 1) ### Changed to (B,C=4,L=500) to use Conv1D
        dna_module_output = self.dna_module(sequence)

        H3K4me3_module_output = self.H3K4me3_module(H3K4me3.unsqueeze(1))
        H3K36me2_module_output = self.H3K36me2_module(H3K36me2.unsqueeze(1))
        H3K27me3_module_output = self.H3K27me3_module(H3K27me3.unsqueeze(1))
        H3K9me3_module_output = self.H3K9me3_module(H3K9me3.unsqueeze(1))
        
        stack = torch.cat([dna_module_output, H3K4me3_module_output, H3K36me2_module_output, H3K27me3_module_output, H3K9me3_module_output], dim=1)#.permute(1,0,2) # Not sure if this is ok

        ### Attention
        attention_output, attention_weights = self.attn(stack, stack, stack)
        attention_reshaped = attention_output.reshape(attention_output.size(0), -1)
        ###

        methylation_prediction = self.fc(attention_reshaped)

        return methylation_prediction
    
    def training_step(self, batch, batch_idx):
        sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3, methylation, coordinates = batch
        prediction = self.forward(sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3)
        loss = self.loss_fn(prediction, methylation.unsqueeze(-1).float())
        self.log('train_loss', loss, on_epoch=True)
        return loss

    def on_train_epoch_end(self):
        epoch_loss = self.trainer.callback_metrics["train_loss"].item()

        if self.current_epoch == 0:
            self.first_epoch_loss = epoch_loss

        if self.first_epoch_loss is not None:

            relative = epoch_loss / self.first_epoch_loss * 100
            print("train_loss_relative", relative)
            self.log("train_loss_relative", relative)
    

    ############################# NOT USING THIS ######################################
    def validation_step(self, batch, batch_idx):
        sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3, methylation, coordinates = batch
        prediction = self.forward(sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3)
        loss = loss_fn(prediction, methylation.unsqueeze(-1).float())
        self.log('val_loss', loss, on_epoch=True)
        return loss
    ############################# NOT USING THIS ######################################
    
    def test_step(self, batch, batch_idx):
        sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3, methylation, coordinates = batch
        prediction = self.forward(sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3)
        loss = loss_fn(prediction, methylation.unsqueeze(-1).float())
        self.log('test_loss', loss, on_epoch=True)
        return loss
    
    def on_test_epoch_end(self):
        epoch_loss = self.trainer.callback_metrics["test_loss"].item()
        if not hasattr(self, "first_test_loss") or self.first_test_loss is None:
            self.first_test_loss = epoch_loss
        relative = epoch_loss / self.first_test_loss * 100
        print("test_loss_relative:", relative)
        self.log("test_loss_relative", relative)

    def configure_optimizers(self):
        return self.optimizer(self.parameters(), lr=self.learning_rate)

In [184]:
model = Model(DNA_kernel_sizes=(10,0,10,5), DNA_strides=(2,5,3,3), DNA_conv_channels = 2)

In [185]:
trainer = pl.Trainer(max_epochs=300, logger=wandb_logger)
trainer.fit(model=model, train_dataloaders=data_module)

üí° Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


/home/andy-bio/.anaconda/envs/lightning/lib/python3.13/site-packages/pytorch_lightning/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
/home/andy-bio/.anaconda/envs/lightning/lib/python3.13/site-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name            | Type               | Params | Mode 
---------------------------------------------------------------
0 | loss_fn         | MSELoss            | 0      | train
1 | dna_module      | Sequential         | 103    | train
2 | H3K4me3_module  | Sequential         | 43     | train
3 | H3K36me2_module | Sequential         | 43     | train
4 | H3K27me3_module | Sequential         | 43     | train
5 | H3K9me3_module  | Sequential         | 43     | train
6 | attn            | Mu

Epoch 0: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 13.17it/s, v_num=29m7]train_loss_relative 100.0
Epoch 1: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 13.50it/s, v_num=29m7]train_loss_relative 99.63172879191204
Epoch 2: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 12.55it/s, v_num=29m7]train_loss_relative 99.3053757566455
Epoch 3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 13.00it/s, v_num=29m7]train_loss_relative 98.92740664784868
Epoch 4: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.94it/s, v_num=29m7]train_loss_relative 98.41140500792122
Epoch 5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 10.53it/s, v_num=29m7]train_loss_relative 97.74906329139141
Epoch 6: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.09it/s, v_num=29m7]train_loss_relative 96.9017835767504
Epoch 7: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 11.67it/s, v_num=29m7]train_loss_relative 95.79795219349982
Epoch 8: 100%|‚ñà‚ñà‚ñ

`Trainer.fit` stopped: `max_epochs=300` reached.


Epoch 299: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  9.61it/s, v_num=29m7]


In [186]:
trainer.test(model, dataloaders=data_module)

/home/andy-bio/.anaconda/envs/lightning/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
/home/andy-bio/.anaconda/envs/lightning/lib/python3.13/site-packages/pytorch_lightning/utilities/data.py:106: Total length of `DataLoader` across ranks is zero. Please make sure this was your intention.


[]



In [697]:
### Model class
## 1- My idea is to be able to control de architecture of the model, and training parameters since the model is created.
## This should make it easier to debug and to try different architectures, and the architecture of the model can be 
## specified through the arguments.
## 2- The `forward` method unsqueezes the input so the model understands the structure in batches.
## 3- There is a method called `training_loop`. Please, complete it, after you specify the architecture, add the loss function, and backward
## propagation step
## 4- I think we can add an `eval_loop` method, in which we iterate over the `test_dataloader` and evaluate the accuracy of the model (R^2)
## 5- Try some architectures, and some way to pass arguments to the model, such that we can try different numbers without having problems
## with tensor shapes and things like that. The idea is to be able to test certain combinations of numbers, so we can use Optuna to make
## a bayesian search for "optimal" parameters. Look at papers where people use CNNs for DNA and histone marks, try to have a similar architecture
## and let's start with that
class Model(nn.Module):
    def __init__(self, DNA_kernel_sizes, DNA_strides, DNA_conv_channels):
        super().__init__()
        # Module parameters
        self.DNA_layer1_kernel_size, self.DNA_layer2_kernel_size, self.DNA_layer3_kernel_size, self.DNA_layer4_kernel_size = DNA_kernel_sizes
        self.DNA_conv_channels = DNA_conv_channels
        self.DNA_layer1_stride, self.DNA_layer2_stride, self.DNA_layer3_stride, self.DNA_layer4_stride = DNA_strides

        
        ############## Modules and architecture
        self.dna_module = nn.Sequential(
            nn.Conv1d(in_channels=4, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )

        ### 
        self.H3K4me3_module = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )
        self.H3K36me2_module = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )
        self.H3K27me3_module = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )
        self.H3K9me3_module = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=DNA_conv_channels, kernel_size=(self.DNA_layer1_kernel_size), 
                        stride=self.DNA_layer1_stride, padding=0),
            nn.ReLU(),
            nn.Conv1d(in_channels=DNA_conv_channels, out_channels=1, kernel_size=(self.DNA_layer3_kernel_size), 
                        stride=self.DNA_layer3_stride, padding=0),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=(self.DNA_layer4_kernel_size), 
                        stride=self.DNA_layer4_stride, padding=0)
        )
        
        #### Cross-Attention
        self.attn = nn.MultiheadAttention(embed_dim=25, num_heads=5, batch_first=True)

        self.fc = nn.Sequential(
            nn.Linear(125, 250),
            nn.ReLU(),
            nn.Linear(250, 100),
            nn.ReLU(),
            nn.Linear(100, 10),
            nn.ReLU(),
            nn.Linear(10, 1),
            nn.Softplus()
        )

    def forward(self, sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3):
        sequence = sequence.to(torch.float32).permute(0, 2, 1) ### Changed to (B,C=4,L=500) to use Conv1D
        dna_module_output = self.dna_module(sequence)

        H3K4me3_module_output = self.H3K4me3_module(H3K4me3.unsqueeze(1))
        H3K36me2_module_output = self.H3K36me2_module(H3K36me2.unsqueeze(1))
        H3K27me3_module_output = self.H3K27me3_module(H3K27me3.unsqueeze(1))
        H3K9me3_module_output = self.H3K9me3_module(H3K9me3.unsqueeze(1))
        
        stack = torch.cat([dna_module_output, H3K4me3_module_output, H3K36me2_module_output, H3K27me3_module_output, H3K9me3_module_output], dim=1)#.permute(1,0,2) # Not sure if this is ok

        ### Attention
        attention_output, attention_weights = self.attn(stack, stack, stack)
        attention_reshaped = attention_output.reshape(attention_output.size(0), -1)
        ###

        methylation_prediction = self.fc(attention_reshaped)

        return methylation_prediction


    def training_loop(self, loss_fn, train_dataset, batch_size=10, epochs=100, learning_rate=1e-3, optimizer=torch.optim.SGD):

        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        optimizer = optimizer(self.parameters(), lr=learning_rate)
        loss_fn = loss_fn()

        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Model will be trained in {device}")
        self.to(device)
        
        self.train()
        loss_dict = {}
        for e in range(epochs):
            loss_accum = 0
            for i, (sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3, methylation, coordinates) in enumerate(train_dataloader):
                
                sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3, methylation = sequence.to(device), H3K4me3.to(device), H3K36me2.to(device), H3K27me3.to(device), H3K9me3.to(device), methylation.to(device)
                prediction = self.forward(sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3)

                loss = loss_fn(prediction, methylation.unsqueeze(-1).float())
                
                loss_accum += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                print("---")

            if (e+1) % 2 == 0:
                # print(f"Iter: {e+1}, Loss: {loss_accum}")
                loss_dict[e+1] = loss_accum
        
        with open("loss_dict.pkl", "wb") as file:
            pickle.dump(loss_dict, file)
    
    def eval_loop(args, kwargs):
        pass


In [698]:
model = Model(DNA_kernel_sizes=(10,0,10,5), DNA_strides=(2,5,3,3), DNA_conv_channels = 2)

In [699]:
torch.save(model, "model.pth")

with open("train_dataset.pkl", "wb") as file:
    pickle.dump(train_dataset, file)

In [694]:
model.training_loop(loss_fn=nn.MSELoss, train_dataset=train_dataset, batch_size=10, epochs=150, learning_rate=1e-3, optimizer=torch.optim.Adam)

---
---
---
---
---
---
---
---


KeyboardInterrupt: 

In [671]:
torch.save(model, "model.pth")

tensor([[-2.3323e-01,  1.1601e+00,  1.4217e+00, -2.4813e-01,  4.2929e-01,
         -5.4772e-01,  5.7561e-01,  1.2113e+00, -2.6437e-01,  1.3577e+00,
          5.2429e-01, -5.2623e-01, -1.7426e+00, -8.3357e-01, -1.3284e+00,
         -8.0573e-02, -6.7825e-01,  5.6580e-01, -1.4961e+00,  3.0421e-01,
         -6.0418e-01,  1.2223e+00,  2.6899e-01,  5.1948e-01, -3.0307e-01,
         -3.5551e-01,  2.7880e-01, -9.3031e-01,  8.4154e-01,  3.5884e-01,
          2.7014e-01,  2.2733e+00,  1.1548e-01, -2.4733e+00,  2.1760e+00,
         -1.2356e+00,  6.0599e-01,  8.1846e-01,  7.7446e-01, -6.3601e-01,
         -1.7114e-01, -5.5536e-01, -6.5090e-01, -1.1438e-01,  1.3795e-01,
         -1.0505e+00,  9.5566e-01, -6.4408e-01,  7.0710e-01,  1.8673e+00,
          6.0603e-01, -1.0644e-01, -6.3872e-01, -9.0523e-01, -2.3540e-01,
         -7.6514e-01, -1.2568e+00,  1.4438e+00,  1.0096e+00, -1.1652e+00,
         -1.1298e+00, -1.1263e+00, -8.0377e-01, -2.7690e-01, -9.2203e-01,
          1.9362e+00,  1.2261e+00,  7.

In [587]:
sequence, H3K4me3, H3K36me2, H3K27me3, H3K9me3, methylation, coordinates = next(iter(train_dataloader))

In [190]:
print(H3K4me3.shape)
print(H3K4me3.unsqueeze(1).shape)

torch.Size([8, 500])
torch.Size([8, 1, 500])


In [104]:
seq[0].shape    # [8, 500, 4] I need to reshape this to (B,C=4,L=500)
seq[0].permute(0,2, 1)    # [8, 500, 4]

tensor([[[1, 0, 0,  ..., 0, 0, 1],
         [0, 0, 0,  ..., 1, 1, 0],
         [0, 0, 1,  ..., 0, 0, 0],
         [0, 1, 0,  ..., 0, 0, 0]],

        [[0, 0, 1,  ..., 1, 0, 1],
         [0, 1, 0,  ..., 0, 1, 0],
         [1, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[1, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 1, 1],
         [0, 1, 1,  ..., 1, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        ...,

        [[1, 0, 0,  ..., 0, 0, 0],
         [0, 1, 1,  ..., 0, 1, 0],
         [0, 0, 0,  ..., 0, 0, 1],
         [0, 0, 0,  ..., 1, 0, 0]],

        [[1, 0, 1,  ..., 1, 0, 1],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 1, 0,  ..., 0, 1, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[0, 0, 1,  ..., 0, 0, 0],
         [0, 1, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 1],
         [1, 0, 0,  ..., 1, 1, 0]]])

In [98]:
seq[0][0].transpose(-2,1)

tensor([[1, 0, 0,  ..., 0, 0, 1],
        [0, 0, 0,  ..., 1, 1, 0],
        [0, 0, 1,  ..., 0, 0, 0],
        [0, 1, 0,  ..., 0, 0, 0]])

In [86]:
seq[0].permute(0, 2, 1)[0]#.shape

tensor([[1, 0, 0,  ..., 0, 0, 1],
        [0, 0, 0,  ..., 1, 1, 0],
        [0, 0, 1,  ..., 0, 0, 0],
        [0, 1, 0,  ..., 0, 0, 0]])

In [53]:
[
    [[1,
      0,
      0,
      1]
     ],
     [[0,0,1,0],
      [0,0,0,0],
      [0,0,0,0],
      [0,0,1,0]],
     [],
     [],CHANNEL4], BATCH1
    [],
    [],
    [],
    [],
 ]
 BATCH=5, CHANNEL=4, WIDTH=, HEIGHT=

NameError: name 'self' is not defined

In [152]:
def get_out_Conv1D(length, kernel_size,
                  padding, stride, dilation=1):
    return np.floor((length+2*padding-dilation*(kernel_size-1)-1)/stride+1).astype(int)

def get_out_MaxPool1D(length, kernel_size,
                  padding, stride, dilation=1):
    return np.floor((length+2*padding-dilation*(kernel_size-1)-1)/stride+1).astype(int)


100

In [493]:
k1=10
s1=2

k2=10
s2=5

k3=5
s3=3

k4=5
s4=3

size1 = get_out_Conv1D(length=500, kernel_size=k1, padding=0, stride=s1)
# size2 = get_out_MaxPool1D(length=size1, kernel_size=k2 ,padding=0, stride=s2, dilation=1)
size3 = get_out_Conv1D(length=size1, kernel_size=k3, padding=0, stride=s3)
size4 = get_out_MaxPool1D(length=size3, kernel_size=k4, padding=0, stride=s4, dilation=1)


print(size4)

26


In [47]:
w2, h2

(0, 18)

In [159]:
lr = 1e-3
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [None]:
## GPyOpt check this library (== Optuna)

### Model from 

# def d_cnn_model(input_length):
#     model = Sequential()

#     model.add(Dropout(0.2, input_shape=(input_length,1)))
#     model.add(Conv1D(32, 3, activation='relu'))
#     # model.add(Conv1D(32, 3, activation='relu'))
#     # model.add(Dropout(0.5))
#     model.add(MaxPooling1D(2))

#     model.add(Conv1D(64, 3, activation='relu'))
#     # # model.add(Dropout(0.5))
#     model.add(MaxPooling1D(2))

#     model.add(Conv1D(128, 3, activation='relu'))
#     # # model.add(Dropout(0.5))
#     model.add(MaxPooling1D(2))