In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
from collections import OrderedDict
from fastprogress import progress_bar
from pathlib import Path
from sklearn.model_selection import train_test_split, ShuffleSplit
from torch import nn
from torch.utils.data import Dataset
from torch.utils.tensorboard import SummaryWriter

import functools
import os
import pandas as pd
import random
import shutil
import torch
import torch.nn.functional as F


target_cols = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']
input_cols = ['sequence', 'structure', 'predicted_loop_type']
error_cols = ['reactivity_error', 'deg_error_Mg_pH10', 'deg_error_Mg_50C', 'deg_error_pH10', 'deg_error_50C']

token_dicts = {
    "sequence": {x: i for i, x in enumerate("ACGU")},
    "structure": {x: i for i, x in enumerate('().')},
    "predicted_loop_type": {x: i for i, x in enumerate("BEHIMSX")}
}


def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

In [3]:
from sklearn.model_selection import train_test_split, ShuffleSplit
from torch import nn
from torch.utils.data import Dataset

import functools


BASE_PATH = "../../input/"
MODEL_SAVE_PATH = "./model"


def preprocess_inputs(df, cols):
    return np.concatenate([preprocess_feature_col(df, col) for col in cols], axis=2)


def preprocess_feature_col(df, col):
    dic = token_dicts[col]
    dic_len = len(dic)
    seq_length = len(df[col][0])
    ident = np.identity(dic_len)
    # convert to one hot
    arr = np.array(
        df[[col]].applymap(lambda seq: [ident[dic[x]] for x in seq]).values.tolist()
    ).squeeze(1)
    # shape: data_size x seq_length x dic_length
    assert arr.shape == (len(df), seq_length, dic_len)
    return arr


def preprocess(base_data, is_test=False):
    inputs = preprocess_inputs(base_data, input_cols)
    if is_test:
        labels = None
    else:
        labels = np.array(base_data[target_cols].values.tolist()).transpose((0, 2, 1))
        assert labels.shape[2] == len(target_cols)
    assert inputs.shape[2] == 14
    return inputs, labels


def get_bpp_feature(bpp):
    bpp_nb_mean = 0.077522  # mean of bpps_nb across all training data
    bpp_nb_std = 0.08914  # std of bpps_nb across all training data
    bpp_max = bpp.max(-1)[0]
    bpp_sum = bpp.sum(-1)
    bpp_nb = torch.true_divide((bpp > 0).sum(dim=1), bpp.shape[1])
    bpp_nb = torch.true_divide(bpp_nb - bpp_nb_mean, bpp_nb_std)
    return [bpp_max.unsqueeze(2), bpp_sum.unsqueeze(2), bpp_nb.unsqueeze(2)]


@functools.lru_cache(5000)
def load_from_id(id_):
    path = Path(BASE_PATH) / f"bpps/{id_}.npy"
    data = np.load(str(path))
    return data


def get_distance_matrix(leng):
    idx = np.arange(leng)
    Ds = []
    for i in range(len(idx)):
        d = np.abs(idx[i] - idx)
        Ds.append(d)

    Ds = np.array(Ds) + 1
    Ds = 1 / Ds
    Ds = Ds[None, :, :]
    Ds = np.repeat(Ds, 1, axis=0)

    Dss = []
    for i in [1, 2, 4]:
        Dss.append(Ds ** i)
    Ds = np.stack(Dss, axis=3)
    print(Ds.shape)
    return Ds


def get_structure_adj(df):
    Ss = []
    for i in range(len(df)):
        seq_length = df["seq_length"].iloc[i]
        structure = df["structure"].iloc[i]
        sequence = df["sequence"].iloc[i]

        cue = []
        a_structures = OrderedDict([
            (("A", "U"), np.zeros([seq_length, seq_length])),
            (("C", "G"), np.zeros([seq_length, seq_length])),
            (("U", "G"), np.zeros([seq_length, seq_length])),
            (("U", "A"), np.zeros([seq_length, seq_length])),
            (("G", "C"), np.zeros([seq_length, seq_length])),
            (("G", "U"), np.zeros([seq_length, seq_length])),
        ])
        for j in range(seq_length):
            if structure[j] == "(":
                cue.append(j)
            elif structure[j] == ")":
                start = cue.pop()
                a_structures[(sequence[start], sequence[j])][start, j] = 1
                a_structures[(sequence[j], sequence[start])][j, start] = 1

        a_strc = np.stack([a for a in a_structures.values()], axis=2)
        a_strc = np.sum(a_strc, axis=2, keepdims=True)
        Ss.append(a_strc)

    Ss = np.array(Ss)
    return Ss


def create_loader(df, batch_size=1, is_test=False):
    features, labels = preprocess(df, is_test)
    features_tensor = torch.from_numpy(features)
    if labels is not None:
        labels_tensor = torch.from_numpy(labels)
        dataset = VacDataset(features_tensor, df, labels_tensor)
        loader = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True, drop_last=False)
    else:
        dataset = VacDataset(features_tensor, df, None)
        loader = torch.utils.data.DataLoader(dataset, batch_size, shuffle=False, drop_last=False)
    return loader


class VacDataset(Dataset):
    def __init__(self, features, df, labels=None):
        self.features = features
        self.labels = labels
        self.test = labels is None
        self.ids = df["id"]
        self.score = None
        self.structure_adj = get_structure_adj(df)
        self.distance_matrix = get_distance_matrix(self.structure_adj.shape[1])
        if "score" in df.columns:
            self.score = df["score"]
        else:
            df["score"] = 1.0
            self.score = df["score"]
        self.signal_to_noise = None
        if not self.test:
            self.signal_to_noise = df["signal_to_noise"]
            assert self.features.shape[0] == self.labels.shape[0]
        else:
            assert self.ids is not None

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        bpp = torch.from_numpy(load_from_id(self.ids[index]).copy()).float()
        adj = self.structure_adj[index]
        distance = self.distance_matrix[0]
        bpp = np.concatenate([bpp[:, :, None], adj, distance], axis=2)
        if self.test:
            return dict(sequence=self.features[index].float(), bpp=bpp, ids=self.ids[index])
        else:
            return dict(sequence=self.features[index].float(), bpp=bpp,
                        label=self.labels[index], ids=self.ids[index],
                        signal_to_noise=self.signal_to_noise[index],
                        score=self.score[index])

In [4]:
from torch.nn import TransformerEncoder, TransformerEncoderLayer

import math


class Conv1dStack(nn.Module):
    def __init__(self, in_dim, out_dim, kernel_size=3, padding=1, dilation=1):
        super(Conv1dStack, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(in_dim, out_dim, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=False),
            nn.BatchNorm1d(out_dim),
            nn.Dropout(0.1),
            nn.LeakyReLU(),
        )
        self.res = nn.Sequential(
            nn.Conv1d(out_dim, out_dim, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=False),
            nn.BatchNorm1d(out_dim),
            nn.Dropout(0.1),
            nn.LeakyReLU(),
        )

    def forward(self, x):
        x = self.conv(x)
        h = self.res(x)
        return x + h


class Conv2dStack(nn.Module):
    def __init__(self, in_dim, out_dim, kernel_size=3, padding=1, dilation=1):
        super(Conv2dStack, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=False),
            nn.BatchNorm2d(out_dim),
            nn.Dropout(0.1),
            nn.LeakyReLU(),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_dim, out_dim, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=False),
            nn.BatchNorm2d(out_dim),
        )
        self.res = nn.Sequential(
            nn.Conv2d(out_dim, out_dim, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=False),
            nn.BatchNorm2d(out_dim),
            nn.Dropout(0.1),
            nn.LeakyReLU(),
        )
        self.se = SELayer2D(out_dim, 4)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        #print(x.shape)
        x = self.conv1(x)
        #print(x.shape)
        x = self.conv2(x)
        #print(x.shape)
        x = self.se(x)
        x = self.relu(x)
        h = self.res(x)
        return x + h

class SELayer1D(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer1D, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1)
        return x * y.expand_as(x)
    
class SELayer2D(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer2D, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)
    
    
class SeqEncoder(nn.Module):
    def __init__(self, in_dim: int):
        super(SeqEncoder, self).__init__()
        self.conv0 = Conv1dStack(in_dim, 128, 3, padding=1)
        self.conv1 = Conv1dStack(128, 64, 6, padding=5, dilation=2)
        self.conv2 = Conv1dStack(64, 32, 15, padding=7, dilation=1)
        self.conv3 = Conv1dStack(32, 32, 30, padding=29, dilation=2)

    def forward(self, x):
        x1 = self.conv0(x)
        x2 = self.conv1(x1)
        x3 = self.conv2(x2)
        x4 = self.conv3(x3)
        x = torch.cat([x1, x2, x3, x4], dim=1)
        # x = x.permute(0, 2, 1).contiguous()
        # BATCH x 256 x seq_length
        return x


class BppAttn(nn.Module):
    def __init__(self, in_channel: int, out_channel: int):
        super(BppAttn, self).__init__()
        self.conv0 = Conv1dStack(in_channel, out_channel, 3, padding=1)
        self.bpp_conv = Conv2dStack(5, out_channel)

    def forward(self, x, bpp):
        x = self.conv0(x)
        bpp = self.bpp_conv(bpp)
        # BATCH x C x SEQ x SEQ
        # BATCH x C x SEQ
        x = torch.matmul(bpp, x.unsqueeze(-1))
        return x.squeeze(-1)


class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)


class TransformerWrapper(nn.Module):
    def __init__(self, dmodel=256, nhead=8, num_layers=2):
        super(TransformerWrapper, self).__init__()
        self.pos_encoder = PositionalEncoding(256)
        encoder_layer = TransformerEncoderLayer(d_model=dmodel, nhead=nhead)
        self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers)
        self.pos_emb = PositionalEncoding(dmodel)

    def flatten_parameters(self):
        pass

    def forward(self, x):
        x = x.permute((1, 0, 2)).contiguous()
        x = self.pos_emb(x)
        x = self.transformer_encoder(x)
        x = x.permute((1, 0, 2)).contiguous()
        return x, None


class RnnLayers(nn.Module):
    def __init__(self, dmodel, dropout=0.3, transformer_layers: int = 2):
        super(RnnLayers, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.rnn0 = TransformerWrapper(dmodel, nhead=8, num_layers=transformer_layers)
        self.rnn1 = nn.LSTM(dmodel, dmodel // 2, batch_first=True, num_layers=1, bidirectional=True)
        self.rnn2 = nn.GRU(dmodel, dmodel // 2, batch_first=True, num_layers=1, bidirectional=True)

    def forward(self, x):
        self.rnn0.flatten_parameters()
        x, _ = self.rnn0(x)
        if self.rnn1 is not None:
            self.rnn1.flatten_parameters()
            x = self.dropout(x)
            x, _ = self.rnn1(x)
        if self.rnn2 is not None:
            self.rnn2.flatten_parameters()
            x = self.dropout(x)
            x, _ = self.rnn2(x)
        return x

    
class BaseAttnModel(nn.Module):
    def __init__(self, transformer_layers: int = 2):
        super(BaseAttnModel, self).__init__()
        self.linear0 = nn.Linear(14 + 3, 1)
        self.seq_encoder_x = SeqEncoder(18)
        self.attn = BppAttn(256, 128)
        self.seq_encoder_bpp = SeqEncoder(128)
        self.seq = RnnLayers(256 * 2, dropout=0.3,
                             transformer_layers=transformer_layers)

    def forward(self, x, bpp):
        bpp_features = get_bpp_feature(bpp[:, :, :, 0].float())
        x = torch.cat([x] + bpp_features, dim=-1)
        learned = self.linear0(x)
        x = torch.cat([x, learned], dim=-1)
        x = x.permute(0, 2, 1).contiguous().float()
        # BATCH x 18 x seq_len
        bpp = bpp.permute([0, 3, 1, 2]).contiguous().float()
        # BATCH x 5 x seq_len x seq_len
        x = self.seq_encoder_x(x)
        # BATCH x 256 x seq_len
        bpp = self.attn(x, bpp)
        bpp = self.seq_encoder_bpp(bpp)
        # BATCH x 256 x seq_len
        x = x.permute(0, 2, 1).contiguous()
        # BATCH x seq_len x 256
        bpp = bpp.permute(0, 2, 1).contiguous()
        # BATCH x seq_len x 256
        x = torch.cat([x, bpp], dim=2)
        # BATCH x seq_len x 512
        x = self.seq(x)
        return x


class AEModel(nn.Module):
    def __init__(self, transformer_layers: int = 2):
        super(AEModel, self).__init__()
        self.seq = BaseAttnModel(transformer_layers=transformer_layers)
        self.linear = nn.Sequential(
            nn.Linear(256 * 2, 14),
            nn.Sigmoid(),
        )

    def forward(self, x, bpp):
        x = self.seq(x, bpp)
        x = F.dropout(x, p=0.3)
        x = self.linear(x)
        return x


class FromAeModel(nn.Module):
    def __init__(self, seq, pred_len=68, dmodel: int = 256):
        super(FromAeModel, self).__init__()
        self.seq = seq
        self.pred_len = pred_len
        self.linear = nn.Sequential(
            nn.Linear(dmodel * 2, len(target_cols)),
        )

    def forward(self, x, bpp):
        x = self.seq(x, bpp)
        x = self.linear(x)
        x = x[:, :self.pred_len]
        return x

In [5]:
base_train_data = pd.read_json(str(Path(BASE_PATH) / 'train.json'), lines=True)
base_train_data.head()

device = torch.device('cuda')
BATCH_SIZE = 32
base_train_data = pd.read_json(str(Path(BASE_PATH) / 'train.json'), lines=True)
base_test_data = pd.read_json(str(Path(BASE_PATH) / 'test.json'), lines=True)
public_df = base_test_data.query("seq_length == 107").copy()
private_df = base_test_data.query("seq_length == 130").copy()
print(f"public_df: {public_df.shape}")
print(f"private_df: {private_df.shape}")
public_df = public_df.reset_index()
private_df = private_df.reset_index()

features, _ = preprocess(base_train_data, True)
features_tensor = torch.from_numpy(features)
dataset0 = VacDataset(features_tensor, base_train_data, None)
features, _ = preprocess(public_df, True)
features_tensor = torch.from_numpy(features)
dataset1 = VacDataset(features_tensor, public_df, None)
features, _ = preprocess(private_df, True)
features_tensor = torch.from_numpy(features)
dataset2 = VacDataset(features_tensor, private_df, None)

loader0 = torch.utils.data.DataLoader(dataset0, BATCH_SIZE, shuffle=False, drop_last=False)
loader1 = torch.utils.data.DataLoader(dataset1, BATCH_SIZE, shuffle=False, drop_last=False)
loader2 = torch.utils.data.DataLoader(dataset2, BATCH_SIZE, shuffle=False, drop_last=False)

public_df: (629, 7)
private_df: (3005, 7)
(1, 107, 107, 3)
(1, 107, 107, 3)
(1, 130, 130, 3)


In [6]:
def learn_from_batch_ae(model, data, device):
    seq = data["sequence"].clone()
    seq[:, :, :14] = F.dropout2d(seq[:, :, :14], p=0.3)
    target = data["sequence"][:, :, :14]
    out = model(seq.to(device), data["bpp"].to(device))
    loss = F.binary_cross_entropy(out, target.to(device))
    return loss


def train_ae(model, train_data, optimizer, lr_scheduler, epochs=10, device="cpu",
             start_epoch: int = 0, start_it: int = 0, log_path: str = "./logs"):
    print(f"device: {device}")
    losses = []
    it = start_it
    model_save_path = Path(MODEL_SAVE_PATH)
    start_epoch = start_epoch
    end_epoch = start_epoch + epochs
    min_loss = 10.0
    min_loss_epoch = 0
    if not model_save_path.exists():
        model_save_path.mkdir(parents=True)
    for epoch in progress_bar(range(start_epoch, end_epoch)):
        print(f"epoch: {epoch}")
        model.train()
        for i, data in enumerate(train_data):
            optimizer.zero_grad()
            loss = learn_from_batch_ae(model, data, device)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 0.5)
            optimizer.step()
            if lr_scheduler:
                lr_scheduler.step()
            loss_v = loss.item()
            losses.append(loss_v)
            it += 1
        loss_m = np.mean(losses)
        if loss_m < min_loss:
            min_loss_epoch = epoch
            min_loss = loss_m
        print(f'epoch: {epoch} loss: {loss_m}')
        losses = []
        torch.save(optimizer.state_dict(), str(model_save_path / "optimizer.pt"))
        torch.save(model.state_dict(), str(model_save_path / f"model-{epoch}.pt"))
    return dict(end_epoch=end_epoch, it=it, min_loss_epoch=min_loss_epoch)

In [7]:
import shutil


set_seed(123)
shutil.rmtree("./model", True)
shutil.rmtree("./logs", True)
save_path = Path("./model_prediction")
if not save_path.exists():
    save_path.mkdir(parents=True)

lr_scheduler = None
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AEModel()
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
res = dict(end_epoch=0, it=0, min_loss_epoch=0)
epochs = [5, 5, 5, 5]
for e in epochs:
    res = train_ae(model, loader0, optimizer, lr_scheduler, e, device=device,
                   start_epoch=res["end_epoch"], start_it=res["it"])
    res = train_ae(model, loader1, optimizer, lr_scheduler, e, device=device,
                   start_epoch=res["end_epoch"], start_it=res["it"])
    res = train_ae(model, loader2, optimizer, lr_scheduler, e, device=device,
                   start_epoch=res["end_epoch"], start_it=res["it"])

epoch = res["min_loss_epoch"]
shutil.copyfile(str(Path(MODEL_SAVE_PATH) / f"model-{epoch}.pt"), "ae-model.pt")

device: cuda


epoch: 0
epoch: 0 loss: 0.25412579397360485
epoch: 1
epoch: 1 loss: 0.13216191202402114
epoch: 2
epoch: 2 loss: 0.06935024549563726
epoch: 3
epoch: 3 loss: 0.028938308432698248
epoch: 4
epoch: 4 loss: 0.020188981965184212
device: cuda


epoch: 5
epoch: 5 loss: 0.015516863716766239
epoch: 6
epoch: 6 loss: 0.16228479091078044
epoch: 7
epoch: 7 loss: 0.06675708461552858
epoch: 8
epoch: 8 loss: 0.026076144073158503
epoch: 9
epoch: 9 loss: 0.017415161617100237
device: cuda


epoch: 10
epoch: 10 loss: 0.020721865639566107
epoch: 11
epoch: 11 loss: 0.016409699260158107
epoch: 12
epoch: 12 loss: 0.01466008892996197
epoch: 13
epoch: 13 loss: 0.01360777104669746
epoch: 14
epoch: 14 loss: 0.0130978908508699
device: cuda


epoch: 15
epoch: 15 loss: 0.024169061488161486
epoch: 16
epoch: 16 loss: 0.02279738210141659
epoch: 17
epoch: 17 loss: 0.010225677366058032
epoch: 18
epoch: 18 loss: 0.008707359122733275
epoch: 19
epoch: 19 loss: 0.008218180400629839
device: cuda


epoch: 20
epoch: 20 loss: 0.006329368962906301
epoch: 21
epoch: 21 loss: 0.0059927744325250385
epoch: 22
epoch: 22 loss: 0.005773757956922055
epoch: 23
epoch: 23 loss: 0.005522173235658556
epoch: 24
epoch: 24 loss: 0.005406275019049644
device: cuda


epoch: 25
epoch: 25 loss: 0.012985392404284249
epoch: 26
epoch: 26 loss: 0.012053749434887729
epoch: 27
epoch: 27 loss: 0.011598565912944205
epoch: 28
epoch: 28 loss: 0.011280736123072975
epoch: 29
epoch: 29 loss: 0.011070258746993668
device: cuda


epoch: 30
epoch: 30 loss: 0.006801698009173076
epoch: 31
epoch: 31 loss: 0.006209328363959988
epoch: 32
epoch: 32 loss: 0.006049357218046983
epoch: 33
epoch: 33 loss: 0.0059371042251586915
epoch: 34
epoch: 34 loss: 0.005784233702967564
device: cuda


epoch: 35
epoch: 35 loss: 0.004913143662270159
epoch: 36
epoch: 36 loss: 0.004844974970910698
epoch: 37
epoch: 37 loss: 0.004375648417044431
epoch: 38
epoch: 38 loss: 0.00417260117828846
epoch: 39
epoch: 39 loss: 0.0040570184239186345
device: cuda


epoch: 40
epoch: 40 loss: 0.010656323531602925
epoch: 41
epoch: 41 loss: 0.009965676913394573
epoch: 42
epoch: 42 loss: 0.00953505505768067
epoch: 43
epoch: 43 loss: 0.00984791328417177
epoch: 44
epoch: 44 loss: 0.009858391153566699
device: cuda


epoch: 45
epoch: 45 loss: 0.0058253320182363195
epoch: 46
epoch: 46 loss: 0.005865139256541928
epoch: 47
epoch: 47 loss: 0.005555450444420179
epoch: 48
epoch: 48 loss: 0.0054214531493683655
epoch: 49
epoch: 49 loss: 0.005005289750794569
device: cuda


epoch: 50
epoch: 50 loss: 0.004363679292146117
epoch: 51
epoch: 51 loss: 0.5766543057747185
epoch: 52
epoch: 52 loss: 0.4566668406128883
epoch: 53
epoch: 53 loss: 0.40210413485765456
epoch: 54
epoch: 54 loss: 0.3841605201363564
device: cuda


epoch: 55
epoch: 55 loss: 0.38130888279448166
epoch: 56
epoch: 56 loss: 0.36800752865507247
epoch: 57
epoch: 57 loss: 0.353798822836673
epoch: 58
epoch: 58 loss: 0.3440731884317195
epoch: 59
epoch: 59 loss: 0.3405891951728374


'ae-model.pt'

In [9]:
def MCRMSE(y_true, y_pred):
    colwise_mse = torch.mean(torch.square(y_true - y_pred), dim=1)
    return torch.mean(torch.sqrt(colwise_mse), dim=1)


def sn_mcrmse_loss(predict, target, signal_to_noise):
    loss = MCRMSE(target, predict)
    weight = 0.5 * torch.log(signal_to_noise + 1.01)
    loss = (loss * weight).mean()
    return loss


def learn_from_batch(model, data, optimizer, lr_scheduler, device):
    optimizer.zero_grad()
    out = model(data["sequence"].to(device), data["bpp"].to(device))
    signal_to_noise = data["signal_to_noise"] * data["score"]
    loss = sn_mcrmse_loss(out, data["label"].to(device), signal_to_noise.to(device))
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optimizer.step()
    if lr_scheduler:
        lr_scheduler.step()
    return out, loss


def evaluate(model, valid_data, device):
    model.eval()
    loss_list = []
    mcrmse = []
    for i, data in enumerate(valid_data):
        with torch.no_grad():
            y = model(data["sequence"].to(device), data["bpp"].to(device))
            mcrmse_ = MCRMSE(data["label"].to(device), y)[data["signal_to_noise"] > 1]
            mcrmse.append(mcrmse_.mean().item())
            loss = sn_mcrmse_loss(y, data["label"].to(device), data["signal_to_noise"].to(device))
            loss_list.append(loss.item())
    model.train()
    return dict(loss=np.mean(loss_list), mcmse=np.mean(mcrmse))


def train(model, train_data, valid_data, optimizer, lr_scheduler, epochs=10, device="cpu",
          start_epoch: int = 0, log_path: str = "./logs"):
    print(f"device: {device}")
    losses = []
    writer = SummaryWriter(log_path)
    it = 0
    model_save_path = Path(MODEL_SAVE_PATH)
    start_epoch = start_epoch
    end_epoch = start_epoch + epochs
    if not model_save_path.exists():
        model_save_path.mkdir(parents=True)
    min_eval_loss = 10.0
    min_eval_epoch = None
    for epoch in progress_bar(range(start_epoch, end_epoch)):
        print(f"epoch: {epoch}")
        model.train()
        for i, data in enumerate(train_data):
            _, loss = learn_from_batch(model, data, optimizer, lr_scheduler, device)
            loss_v = loss.item()
            writer.add_scalar('loss', loss_v, it)
            losses.append(loss_v)
            it += 1
        print(f'epoch: {epoch} loss: {np.mean(losses)}')
        losses = []

        eval_result = evaluate(model, valid_data, device)
        eval_loss = eval_result["loss"]
        if eval_loss <= min_eval_loss:
            min_eval_epoch = epoch
            min_eval_loss = eval_loss

        print(f"eval loss: {eval_loss} {eval_result['mcmse']}")
        writer.add_scalar(f"evaluate/loss", eval_loss, epoch)
        writer.add_scalar(f"evaluate/mcmse", eval_result["mcmse"], epoch)
        model.train()
        torch.save(optimizer.state_dict(), str(model_save_path / "optimizer.pt"))
        torch.save(model.state_dict(), str(model_save_path / f"model-{epoch}.pt"))
    print(f'min eval loss: {min_eval_loss} epoch {min_eval_epoch}')
    return min_eval_epoch

In [10]:
device = torch.device('cuda')
BATCH_SIZE = 64
base_train_data = pd.read_json(str(Path(BASE_PATH) / 'train.json'), lines=True)
samples = base_train_data
save_path = Path("./model_prediction")
if not save_path.exists():
    save_path.mkdir(parents=True)
shutil.rmtree("./model", True)
shutil.rmtree("./logs", True)
split = ShuffleSplit(n_splits=5, test_size=.1)
ids = samples.reset_index()["id"]
set_seed(124)
for fold, (train_index, test_index) in enumerate(split.split(samples)):
    print(f"fold: {fold}")
    train_df = samples.loc[train_index].reset_index()
    val_df = samples.loc[test_index].reset_index()
    train_loader = create_loader(train_df, BATCH_SIZE)
    valid_loader = create_loader(val_df, BATCH_SIZE)
    print(train_df.shape, val_df.shape)
    ae_model = AEModel()
    state_dict = torch.load("./ae-model.pt")
    ae_model.load_state_dict(state_dict)
    del state_dict
    model = FromAeModel(ae_model.seq)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    lr_scheduler = None
    epoch = train(model, train_loader, valid_loader, optimizer, lr_scheduler, 200, device=device,
                  log_path=f"logs/{fold}")
    shutil.copyfile(str(Path(MODEL_SAVE_PATH) / f"./model-{epoch}.pt"), f"model_prediction/model-{fold}.pt")
    del model

fold: 0
(1, 107, 107, 3)
(1, 107, 107, 3)
(2160, 21) (240, 21)
device: cuda


epoch: 0
epoch: 0 loss: 0.35893201413014175
eval loss: 0.32571133703687594 0.42200799856151605
epoch: 1
epoch: 1 loss: 0.32377848282525445
eval loss: 0.3159935092790472 0.4099649967466611
epoch: 2
epoch: 2 loss: 0.320223978817654
eval loss: 0.3152107171678853 0.4110719909266878
epoch: 3
epoch: 3 loss: 0.3212728114396724
eval loss: 0.3154512321095718 0.41004085730569784
epoch: 4
epoch: 4 loss: 0.32117550254504007
eval loss: 0.313507628402655 0.4091822635711965
epoch: 5
epoch: 5 loss: 0.3204418301763755
eval loss: 0.3168630116036648 0.4107762732618996
epoch: 6
epoch: 6 loss: 0.3207079624261503
eval loss: 0.31523044712609694 0.41029571536629333
epoch: 7
epoch: 7 loss: 0.32086160736872926
eval loss: 0.31556930137734307 0.4107864735242974
epoch: 8
epoch: 8 loss: 0.320299039225034
eval loss: 0.3150132145471407 0.41051437274194746
epoch: 9
epoch: 9 loss: 0.32002861818797634
eval loss: 0.31657246028396957 0.4096875953026655
epoch: 10
epoch: 10 loss: 0.3199227827465274
eval loss: 0.316550072154

eval loss: 0.3035050611851159 0.3950646537515273
epoch: 87
epoch: 87 loss: 0.3085537409296057
eval loss: 0.3019460057440915 0.3912063824715222
epoch: 88
epoch: 88 loss: 0.30741362409350165
eval loss: 0.30075640594643716 0.3904603366525723
epoch: 89
epoch: 89 loss: 0.30854253078719246
eval loss: 0.30445276459885584 0.3921741378418848
epoch: 90
epoch: 90 loss: 0.30771273984141545
eval loss: 0.3016285347133114 0.3924424328265846
epoch: 91
epoch: 91 loss: 0.30709673042840613
eval loss: 0.3030954298595632 0.3954841125009456
epoch: 92
epoch: 92 loss: 0.307574002095627
eval loss: 0.3065661429816176 0.39690611244613905
epoch: 93
epoch: 93 loss: 0.30733173848617557
eval loss: 0.30531940837547566 0.3946554225195
epoch: 94
epoch: 94 loss: 0.30630655975487997
eval loss: 0.3021161384300707 0.39172967080301363
epoch: 95
epoch: 95 loss: 0.3073638188375676
eval loss: 0.303235442198711 0.39292315708132874
epoch: 96
epoch: 96 loss: 0.30667767209289565
eval loss: 0.3050269664122171 0.39382570583984466
ep

epoch: 171 loss: 0.30256787970437304
eval loss: 0.2966598930118262 0.3848768562590066
epoch: 172
epoch: 172 loss: 0.30212269302459527
eval loss: 0.2980501724341208 0.3870595562651022
epoch: 173
epoch: 173 loss: 0.30191469798016624
eval loss: 0.2971405796156257 0.3841263456956243
epoch: 174
epoch: 174 loss: 0.30692438196435795
eval loss: 0.3121683068645721 0.4062905121379845
epoch: 175
epoch: 175 loss: 0.31048234464641183
eval loss: 0.30314706690400467 0.395852315893677
epoch: 176
epoch: 176 loss: 0.30630863578829487
eval loss: 0.3000290887428738 0.3902634356936934
epoch: 177
epoch: 177 loss: 0.30598390564195493
eval loss: 0.30052479020378176 0.3888731242730509
epoch: 178
epoch: 178 loss: 0.3056511542123049
eval loss: 0.3031183748613747 0.3919834791409864
epoch: 179
epoch: 179 loss: 0.3059345692111122
eval loss: 0.3018507667348325 0.39222546381042056
epoch: 180
epoch: 180 loss: 0.30549650746414475
eval loss: 0.30380335892779575 0.3926245323496738
epoch: 181
epoch: 181 loss: 0.3054453352

epoch: 0
epoch: 0 loss: 0.3844470835099143
eval loss: 0.33202091487027713 0.4235491876871253
epoch: 1
epoch: 1 loss: 0.3229697166164548
eval loss: 0.32318378910769385 0.413047705922972
epoch: 2
epoch: 2 loss: 0.3192985091344393
eval loss: 0.3234063966229204 0.41305827032941367
epoch: 3
epoch: 3 loss: 0.31868978202620574
eval loss: 0.3242491889730413 0.41379789724669813
epoch: 4
epoch: 4 loss: 0.320753958722659
eval loss: 0.3229115317365371 0.4129795371532284
epoch: 5
epoch: 5 loss: 0.31864876599206227
eval loss: 0.3224206873047216 0.41159034540368394
epoch: 6
epoch: 6 loss: 0.31867296624680497
eval loss: 0.32499016218073074 0.4125301926460784
epoch: 7
epoch: 7 loss: 0.3185606647828574
eval loss: 0.3219736470359364 0.41273511312710637
epoch: 8
epoch: 8 loss: 0.31907298428890835
eval loss: 0.3227612897792978 0.4121580660147873
epoch: 9
epoch: 9 loss: 0.3189043742336824
eval loss: 0.3260500138731212 0.41482326867677305
epoch: 10
epoch: 10 loss: 0.3196179614584133
eval loss: 0.325635668371

KeyboardInterrupt: 