# TFBS and Nucleosome checkpoints

1. Take the original checkpoints from PRINT paper and save it in pytorch format
2. Change the linear model into convolution based form, to prevent manually create scanning window.
3. All below based on Ruochi's code

In [1]:
import h5py
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
from datetime import datetime

today = f"{datetime.now():%y%m%d}"

## All-in-one model class

In [3]:
class TFModelConv(nn.Module):
    def __init__(
        self,
        scales,
        footprintMean,
        footprintSd,
        weights_0,
        weights_1,
        weights_2,
        weights_3,
        weights_4,
        weights_5,
        sigmoid=True,
        motif=True,
    ):
        super().__init__()
        self.scales = torch.tensor(scales)
        weights_2 = weights_2.T[..., None]
        self.weights_2 = nn.Parameter(torch.from_numpy(weights_2).double())
        self.weights_3 = nn.Parameter(torch.from_numpy(weights_3).double())
        weights_4 = weights_4.T[..., None]
        self.weights_4 = nn.Parameter(torch.from_numpy(weights_4).double())
        self.weights_5 = nn.Parameter(torch.from_numpy(weights_5).double())

        self.sigmoid = sigmoid

        # reformat l1 weights and bias
        # put the last col in weights_0 to weights_1 (motif)
        if motif:
            weights_1 = weights_1 + weights_0[-1]
            weights_0 = weights_0[:-1]

        # put mean and sd into first conv layer weight
        new_weights_0 = 1 / footprintSd[:, None] * weights_0
        new_weights_1 = weights_1 - (footprintMean / footprintSd) @ weights_0

        # reshape weights to conv1d weights
        l1_channel = len(scales)
        l1_out = weights_2.shape[1]
        new_weights_0 = new_weights_0.reshape((l1_channel, 201, l1_out)).transpose(
            2, 0, 1
        )

        self.weights_0 = nn.Parameter(torch.from_numpy(new_weights_0).double())
        self.weights_1 = nn.Parameter(torch.from_numpy(new_weights_1).double())

    def forward(self, data: torch.Tensor):
        data = data.type(self.weights_0.dtype)

        shapes = data.shape
        # deal with four axes data
        data = data.reshape((-1, data.shape[-2], data.shape[-1]))
        x = F.relu(F.conv1d(data, self.weights_0, self.weights_1))
        x = F.relu(F.conv1d(x, self.weights_2, self.weights_3))
        x = F.conv1d(x, self.weights_4, self.weights_5)
        if self.sigmoid:
            x = torch.sigmoid(x)
        x = x[:, 0, :]
        new_shape = shapes[:-2] + (shapes[-1] - 200,)
        x = x.reshape(new_shape)
        return x

## TFBS_model.h5
- Input: footprint
- Model: All TF model
- Output: TFBS score
- Notes: produces score that looks like broad peaks

In [4]:
with h5py.File("TFBS_model.h5", "r") as TFBSmodel:
    (
        footprintMean,
        footprintSd,
        weights_0,
        weights_1,
        weights_2,
        weights_3,
        weights_4,
        weights_5,
    ) = (
        TFBSmodel["footprint_mean"][:],
        TFBSmodel["footprint_sd"][:],
        TFBSmodel["model_weights"]["dense_15"]["dense_15"]["kernel:0"][:],
        TFBSmodel["model_weights"]["dense_15"]["dense_15"]["bias:0"][:],
        TFBSmodel["model_weights"]["dense_16"]["dense_16"]["kernel:0"][:],
        TFBSmodel["model_weights"]["dense_16"]["dense_16"]["bias:0"][:],
        TFBSmodel["model_weights"]["dense_17"]["dense_17"]["kernel:0"][:],
        TFBSmodel["model_weights"]["dense_17"]["dense_17"]["bias:0"][:],
    )

In [5]:
model_conv = TFModelConv(
    scales=[10, 20, 30, 50, 80, 100],
    footprintMean=footprintMean,
    footprintSd=footprintSd,
    weights_0=weights_0,
    weights_1=weights_1,
    weights_2=weights_2,
    weights_3=weights_3,
    weights_4=weights_4,
    weights_5=weights_5,
)
model_conv.version = today
model_conv = torch.jit.script(model_conv)
model_conv.save("footprint_to_TFBS_conv_model.pt")

In [6]:
# test
data = torch.from_numpy(np.random.randn(2, 2, 6, 1000))
x = model_conv(data)
x.shape

torch.Size([2, 2, 800])

## TFBS_model_cluster_I.h5
- Input: footprint
- Model: TF with strong footprint model
- Output: TFBS score
- Notes: produces score that looks like narrow peaks

In [7]:
with h5py.File("TFBS_model_cluster_I.h5", "r") as TFBSmodel:
    (
        footprintMean,
        footprintSd,
        weights_0,
        weights_1,
        weights_2,
        weights_3,
        weights_4,
        weights_5,
    ) = (
        TFBSmodel["footprint_mean"][:],
        TFBSmodel["footprint_sd"][:],
        TFBSmodel["model_weights"]["dense"]["dense"]["kernel:0"][:],
        TFBSmodel["model_weights"]["dense"]["dense"]["bias:0"][:],
        TFBSmodel["model_weights"]["dense_1"]["dense_1"]["kernel:0"][:],
        TFBSmodel["model_weights"]["dense_1"]["dense_1"]["bias:0"][:],
        TFBSmodel["model_weights"]["dense_2"]["dense_2"]["kernel:0"][:],
        TFBSmodel["model_weights"]["dense_2"]["dense_2"]["bias:0"][:],
    )

In [8]:
model_conv = TFModelConv(
    scales=[10, 20, 30, 50, 80, 100],
    footprintMean=footprintMean,
    footprintSd=footprintSd,
    weights_0=weights_0,
    weights_1=weights_1,
    weights_2=weights_2,
    weights_3=weights_3,
    weights_4=weights_4,
    weights_5=weights_5,
)
model_conv.version = today
model_conv = torch.jit.script(model_conv)
model_conv.save("footprint_to_TFBS_class1_conv_model.pt")

In [9]:
# test
data = torch.from_numpy(np.random.randn(2, 2, 6, 1000))
x = model_conv(data)
x.shape

torch.Size([2, 2, 800])

## nucleosome_model.h5
- Input: footprint
- Model: nucleosome footprint model
- Output: nucleosome score
- Notes: produces score that has large nucleosome footprint

In [10]:
with h5py.File("nucleosome_model.h5", "r") as TFBSmodel:
    (
        footprintMean,
        footprintSd,
        weights_0,
        weights_1,
        weights_2,
        weights_3,
        weights_4,
        weights_5,
        scales,
    ) = (
        TFBSmodel["footprint_mean"][:],
        TFBSmodel["footprint_sd"][:],
        TFBSmodel["model_weights"]["dense"]["dense"]["kernel:0"][:],
        TFBSmodel["model_weights"]["dense"]["dense"]["bias:0"][:],
        TFBSmodel["model_weights"]["dense_1"]["dense_1"]["kernel:0"][:],
        TFBSmodel["model_weights"]["dense_1"]["dense_1"]["bias:0"][:],
        TFBSmodel["model_weights"]["dense_2"]["dense_2"]["kernel:0"][:],
        TFBSmodel["model_weights"]["dense_2"]["dense_2"]["bias:0"][:],
        TFBSmodel["scales"][:],
    )
scales

array([10., 20., 30., 50., 80.])

In [11]:
model_conv = TFModelConv(
    scales=scales,
    footprintMean=footprintMean,
    footprintSd=footprintSd,
    weights_0=weights_0,
    weights_1=weights_1,
    weights_2=weights_2,
    weights_3=weights_3,
    weights_4=weights_4,
    weights_5=weights_5,
    motif=False,
    sigmoid=False,
)
model_conv.version = today
model_conv = torch.jit.script(model_conv)
model_conv.save("footprint_to_nucleosome_conv_model.pt")
scales

array([10., 20., 30., 50., 80.])

In [12]:
# test
data = torch.from_numpy(np.random.randn(4, 5, 1000))
x = model_conv(data)
x.shape

torch.Size([4, 800])