In [1]:
import warnings, random, os, sys, tqdm, time
sys.path.append("../")
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler, MinMaxScaler, QuantileTransformer, RobustScaler

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import KFold

import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.modules.loss import _WeightedLoss
from torch.optim.lr_scheduler import ReduceLROnPlateau

from pytorch_tabnet.tab_model import TabNetRegressor
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

pd.set_option("display.max_columns", 1200)
pd.set_option("display.max_rows", 1200)
%matplotlib inline



In [2]:
def metric(y_true, y_pred):
    res = []
    for i in range(0, y_true.shape[1]):
        y = y_true[:,i]
        pred = y_pred[:,i]
        print(i)
        res.append(log_loss(y, pred))
    return np.mean(res)

In [3]:
def metric(y_true, y_pred):
    res = []
    for i in range(0, y_true.shape[1]):
        y = y_true[:,i]
        pred = y_pred[:,i]
        if np.sum(pred) <= 0.0:
            pre += 1e-15
        res.append(log_loss(y, pred))
    return np.mean(res)

def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True
seed_everything(42)
        
    
def make_scaler(flag, seed):
    if flag == "quantile":
        return QuantileTransformer(n_quantiles=100,random_state=seed, output_distribution="normal")
    elif flag == "gauss":
        return GaussRankScaler()
    elif flag == "standard":
        return StandardScaler()
    elif flag == "minmax":
        return MinMaxScaler()
    elif flag == "robust":
        return RobustScaler()
    
seeds = [7, 8, 9, 10, 110, 12, 13]
SCALE = "quantile"

    

In [4]:
# g772, c100, 206クラス、402クラスの分類

train_df = pd.read_csv("../../../Data/Raw/train_features.csv")
test_df = pd.read_csv("../../../Data/Raw/test_features.csv")
#pub_test_df = pd.read_csv("../input/moapublictest/test_features.csv")
pub_test_df = pd.read_csv("../../../Data/Raw/test_features.csv")
drug_df = pd.read_csv("../../../Data/Raw/train_drug.csv")#

y = pd.read_csv("../../../Data/Raw/train_targets_scored.csv")
y_non = pd.read_csv("../../../Data/Raw/train_targets_nonscored.csv")
y_all = pd.concat([y, y_non.drop("sig_id", axis=1)], axis=1)
y = y.merge(drug_df, on='sig_id', how='left') #

GENES = [col for col in train_df.columns if col.startswith("g-")]
CELLS = [col for col in train_df.columns if col.startswith("c-")]
BIOS = GENES + CELLS


SCORED_MOAS = [col for col in y.columns if col != "sig_id" and col != "drug_id"]#
NONSCORED_MOAS = [col for col in y_non.columns if col != "sig_id"]
ALL_MOAS = SCORED_MOAS + NONSCORED_MOAS


TR_SIZE = train_df.shape[0]
TE_SIZE = test_df.shape[0]

train_nonvehicle_index = train_df[train_df["cp_type"] != "ctl_vehicle"].index
test_nonvehicle_index = test_df[test_df["cp_type"] != "ctl_vehicle"].index

train_df["time_dose"] = train_df["cp_time"].astype(str) + " * " + train_df["cp_dose"]
test_df["time_dose"] = test_df["cp_time"].astype(str) + " * " + test_df["cp_dose"]
pub_test_df["time_dose"] = pub_test_df["cp_time"].astype(str) + " * " + pub_test_df["cp_dose"]

# remove cp_type = ctl_vehicle
mask = train_df["cp_type"] != "ctl_vehicle"
train_df = train_df[mask].drop("cp_type", axis=1).reset_index(drop=True)
test_df = test_df[test_df["cp_type"] != "ctl_vehicle"].drop("cp_type", axis=1).reset_index(drop=True)
pub_test_df = pub_test_df[pub_test_df["cp_type"] != "ctl_vehicle"].drop("cp_type", axis=1).reset_index(drop=True)
y_nonv = y[mask].reset_index(drop=True)#
y_all_nonv = y_all[mask].reset_index(drop=True)

scored = y_nonv.copy()#
y_nonv.drop("drug_id", axis=1, inplace=True)#
y.drop("drug_id", axis=1, inplace=True)#

TR_NONV_SIZE = train_df.shape[0]
TE_NONV_SHAPE = test_df.shape[0]

In [5]:
"""# prod
# 上位500こ
prod_cols = [['g-145', 'g-201', 'g-208'], ['g-370', 'g-508', 'g-37'], ['g-38', 'g-392', 'g-707'], ['g-328', 'g-28', 'g-392'], ['g-441', 'g-157', 'g-392'], ['g-181', 'g-100', 'g-392'], ['g-67', 'g-760', 'g-50'], ['g-731', 'g-100', 'g-707'], ['g-478', 'g-468', 'g-310'], ['g-91', 'g-145', 'g-208'], ['g-106', 'g-744', 'g-91'], ['g-131', 'g-208', 'g-392'], ['g-144', 'g-123', 'g-86'], ['g-228', 'g-72', 'g-67'], ['g-31', 'g-328', 'g-460'], ['g-392', 'g-731', 'g-100'], ['g-732', 'g-744', 'g-707'], ['g-705', 'g-375', 'g-704'], ['g-508', 'g-50', 'g-411'], ['g-234', 'g-58', 'g-520'], ['g-503', 'g-761', 'g-50'], ['g-113', 'g-75', 'g-178'], ['g-50', 'g-508', 'g-113'], ['g-113', 'g-375', 'g-75'], ['g-576', 'g-452', 'g-392'], ['g-50', 'g-37', 'g-36'], ['g-707', 'g-133', 'g-392'], ['g-484', 'g-392', 'g-544'], ['g-508', 'g-67', 'g-370'], ['g-123', 'g-731', 'g-100'], ['g-298', 'g-477', 'g-644'], ['g-72', 'g-370', 'g-50'], ['g-67', 'g-178', 'g-113'], ['g-744', 'g-608', 'g-100'], ['g-91', 'g-100', 'g-707'], ['g-37', 'g-228', 'g-202'], ['g-37', 'g-300', 'g-370'], ['g-234', 'g-508', 'g-595'], ['g-596', 'g-744', 'g-707'], ['g-300', 'g-227', 'g-591'], ['g-135', 'g-392', 'g-512'], ['g-731', 'g-744', 'g-158'], ['g-69', 'g-707', 'g-100'], ['g-276', 'g-653', 'g-291'], ['g-624', 'g-615', 'g-189'], ['g-181', 'g-707', 'g-38'], ['g-72', 'g-75', 'g-508'], ['g-231', 'g-707', 'g-392'], ['g-508', 'g-37', 'g-72'], ['g-725', 'g-712', 'g-640'], ['g-67', 'g-644', 'g-113'], ['g-508', 'g-228', 'g-656'], ['g-185', 'g-37', 'g-672'], ['g-370', 'g-50', 'g-503'], ['g-201', 'g-745', 'g-599'], ['g-332', 'g-50', 'g-571'], ['g-50', 'g-37', 'g-59'], ['g-508', 'g-113', 'g-231'], ['g-707', 'g-158', 'g-100'], ['g-257', 'g-50', 'g-72']]

for cols in prod_cols:
    name = "prod-" + " * ".join(cols)
    train_df[name] = train_df[cols].mean(axis=1)
    test_df[name] = test_df[cols].mean(axis=1)
    pub_test_df[name] = pub_test_df[cols].mean(axis=1)"""

PRODS = [col for col in train_df.columns if col.startswith("prod-")]

In [6]:
#out fold preprocessing

#variance threshold

VAR_THRESHOLD = 0.8
drop_cols = []
temp = pd.concat([train_df, pub_test_df])
for col in BIOS+PRODS:
    if temp[col].var() <= VAR_THRESHOLD:
        drop_cols.append(col)

print("drop cols num : {}".format(len(drop_cols)))
train_df.drop(columns=drop_cols, inplace=True)
test_df.drop(columns=drop_cols, inplace=True)
pub_test_df.drop(columns=drop_cols, inplace=True)

GENES_ = [col for col in train_df.columns if col.startswith("g-")]
CELLS_ = [col for col in train_df.columns if col.startswith("c-")]
BIOS_ = GENES_ + CELLS_
        
del temp

# onehot encode of categorical feature and drop
drop_cols = ["cp_time", "cp_dose", "time_dose"]
train_df = pd.concat([pd.get_dummies(train_df["time_dose"], prefix="onehot", drop_first=True), train_df.drop(drop_cols, axis=1) ], axis=1)
test_df = pd.concat([pd.get_dummies(test_df["time_dose"], prefix="onehot", drop_first=True), test_df.drop(drop_cols, axis=1) ], axis=1)
pub_test_df = pd.concat([pd.get_dummies(pub_test_df["time_dose"], prefix="onehot", drop_first=True), pub_test_df.drop(drop_cols, axis=1) ], axis=1)
"""
# aggregation feature
print("agg")
for df in [train_df, pub_test_df, test_df]:
    df["sum-g"] = df[GENES_].sum(axis=1)
    df["mean-g"] = df[GENES_].mean(axis=1)
    df["std-g"] = df[GENES_].std(axis=1)
    df["kurt-g"] = df[GENES_].kurt(axis=1)
    df["skew-g"] = df[GENES_].skew(axis=1)
    df["sum-c"] = df[CELLS_].sum(axis=1)
    df["mean-c"] = df[CELLS_].mean(axis=1)
    df["std-c"] = df[CELLS_].std(axis=1)
    df["kurt-c"] = df[CELLS_].kurt(axis=1)
    df["skew-c"] = df[CELLS_].skew(axis=1)
    df["sum-gc"] = df[BIOS_].sum(axis=1)
    df["mean-gc"] = df[BIOS_].mean(axis=1)
    df["std-gc"] = df[BIOS_].std(axis=1)
    df["kurt-gc"] = df[BIOS_].kurt(axis=1)
    df["skew-gc"] = df[BIOS_].skew(axis=1)
"""

drop cols num : 67


'\n# aggregation feature\nprint("agg")\nfor df in [train_df, pub_test_df, test_df]:\n    df["sum-g"] = df[GENES_].sum(axis=1)\n    df["mean-g"] = df[GENES_].mean(axis=1)\n    df["std-g"] = df[GENES_].std(axis=1)\n    df["kurt-g"] = df[GENES_].kurt(axis=1)\n    df["skew-g"] = df[GENES_].skew(axis=1)\n    df["sum-c"] = df[CELLS_].sum(axis=1)\n    df["mean-c"] = df[CELLS_].mean(axis=1)\n    df["std-c"] = df[CELLS_].std(axis=1)\n    df["kurt-c"] = df[CELLS_].kurt(axis=1)\n    df["skew-c"] = df[CELLS_].skew(axis=1)\n    df["sum-gc"] = df[BIOS_].sum(axis=1)\n    df["mean-gc"] = df[BIOS_].mean(axis=1)\n    df["std-gc"] = df[BIOS_].std(axis=1)\n    df["kurt-gc"] = df[BIOS_].kurt(axis=1)\n    df["skew-gc"] = df[BIOS_].skew(axis=1)\n'

In [7]:
X = train_df.drop("sig_id", axis=1)
y_nonv = y_nonv.drop("sig_id", axis=1).values
y = y.drop("sig_id", axis=1).values
y_all_nonv = y_all_nonv.drop("sig_id", axis=1).values

## Dateset Class

In [8]:
class MoAResNetDataset:
    def __init__(self, features1, features2, targets):
        self.features1 = features1
        self.features2 = features2
        self.targets = targets
        
    def __len__(self):
        return (self.features1.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x1' : torch.tensor(self.features1[idx, :], dtype=torch.float),
            'x2' : torch.tensor(self.features2[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestResNetDataset:
    def __init__(self, features1, features2):
        self.features1 = features1
        self.features2 = features2
        
    def __len__(self):
        return (self.features1.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x1' : torch.tensor(self.features1[idx, :], dtype=torch.float),
            'x2' : torch.tensor(self.features2[idx, :], dtype=torch.float)
        }
        return dct

## func 

In [9]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    for data in dataloader:
        optimizer.zero_grad()
        inputs1, inputs2, targets = data['x1'].to(device), data['x2'].to(device), data['y'].to(device)
        outputs = model(inputs1, inputs2)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        # if cycle
        scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    
    return final_loss


def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    
    for data in dataloader:
        inputs1, inputs2, targets = data['x1'].to(device), data['x2'].to(device), data['y'].to(device)
        outputs = model(inputs1, inputs2)
        loss = loss_fn(outputs, targets)
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    
    return final_loss


def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    for data in dataloader:
        inputs1, inputs2 = data['x1'].to(device), data['x2'].to(device)

        with torch.no_grad():
            outputs = model(inputs1, inputs2)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds

## Model

In [10]:
class Model(nn.Module):
    def __init__(self, num_features1, num_features2, num_targets):
        super(Model, self).__init__()
        self.h1_1 = 1500
        self.h1_2 = 750
        
        self.h2_1 = num_features2+self.h1_2
        self.h2_2 = 1500
        self.h2_3 = 750
        
        self.h3_1 = 750
        
        self.head1 = nn.Sequential(
            nn.BatchNorm1d(num_features1),
            nn.Dropout(0.3),
            nn.Linear(num_features1, self.h1_1),
            nn.LeakyReLU(),
            
            nn.BatchNorm1d(self.h1_1),
            nn.Linear(self.h1_1, self.h1_2),
            nn.LeakyReLU(),
        )
        
        self.head2 = nn.Sequential(
            nn.BatchNorm1d(self.h2_1),
            nn.Dropout(0.30),
            nn.Linear(self.h2_1, self.h2_2),
            nn.ReLU(),
            
            nn.BatchNorm1d(self.h2_2),
            nn.Linear(self.h2_2, self.h2_2),
            nn.ELU(),            
            
            nn.BatchNorm1d(self.h2_2),
            nn.Linear(self.h2_2, self.h2_3),
            nn.ReLU(),  
            
            nn.BatchNorm1d(self.h2_3),
            nn.Linear(self.h2_3, self.h2_3),
            nn.ELU(),            
        )
        self.head3 = nn.Sequential(
            nn.BatchNorm1d(self.h3_1),
            nn.Linear(self.h3_1, self.h3_1),
            nn.LeakyReLU(),
            
            nn.BatchNorm1d(self.h3_1),
            nn.Linear(self.h3_1, self.h3_1),
            nn.LeakyReLU(),
            
            nn.BatchNorm1d(self.h3_1),
            nn.Linear(self.h3_1, num_targets),
        )

    
    def forward(self, input1, input2):
        input3 = self.head1(input1)
        concat = torch.cat((input3, input2), dim=1)
        input4 = self.head2(concat)
        avg = torch.add(input3, input4)
        #avg = torch.div(torch.add(input3, input4), 2)
        
        out = self.head3(avg)
        
        return out

## run train

In [12]:
def run_training(model, trainloader, validloader, tag, epochs, optimizer, scheduler, fine_tune_scheduler, loss_fn, loss_tr, early_stopping_steps, verbose, device, fold, seed):
    
    early_step = 0
    best_loss = np.inf
    best_epoch = 0
    weight_path = 'resnet_weights2/{}_{}_{}.pt'.format(tag, seed, fold)
    
    start = time.time()
    t = time.time() - start
    for epoch in range(epochs):
        # fine tune 
        if fine_tune_scheduler is not None:
            fine_tune_scheduler.step(epoch, model)
            
        train_loss = train_fn(model, optimizer, scheduler, loss_tr, trainloader, device)
        valid_loss = valid_fn(model, loss_fn, validloader, device)

        if epoch % verbose==0:
            t = time.time() - start
            print(f"EPOCH: {epoch}, train_loss: {train_loss}, valid_loss: {valid_loss}, time: {t}")
        
        if valid_loss < best_loss:
            best_loss = valid_loss
            torch.save(model.state_dict(),weight_path)
            early_step = 0
            best_epoch = epoch
        
        elif early_stopping_steps != 0:
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                t = time.time() - start
                print(f"early stopping in iteration {epoch},  : best itaration is {best_epoch}, valid loss is {best_loss}, time: {t}")
                return model
            
    print(f"training until max epoch {epochs},  : best itaration is {best_epoch}, valid loss is {best_loss}, time: {t}")
    return model
            
    
def predict(model, testloader, device):
    model.to(device)
    predictions = inference_fn(model, testloader, device)
    
    return predictions

In [13]:
class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [14]:
class FineTuneScheduler:
    def __init__(self, epochs):
        self.epochs = epochs
        self.epochs_per_step = 0
        self.frozen_layers = []
        self.cnt=0

    def copy_without_top(self, model, num_features, num_targets, num_targets_new):
        model_new = Model(num_features, num_features, num_targets)
        model_new.load_state_dict(model.state_dict())

        # Freeze all weights
        for name, param in model_new.named_parameters():
            param.requires_grad = False
            
        #self.epochs_per_step = self.epochs // len(self.frozen_layers)+1  # 24 // 4 = 6
        #self.epochs_per_step = self.epochs // (len(self.frozen_layers)+1)  # 24 // 4 = 6
        self.epochs_per_step = 4
        
        # Replace the top layers with another ones, 最後に追加されてく
        model_new.head3[-2] == nn.BatchNorm1d(model.h3_1)
        model_new.head3[-1] = nn.Linear(model.h3_1, 206)
        model_new.to(DEVICE)
        return model_new

    def step(self, epoch, model):
        if epoch == 0:
            return

        if epoch % self.epochs_per_step == 0:
            self.cnt+=1
            
            if self.cnt == 1:
                for name, param in model.head3.named_parameters():
                    param.requires_grad = True
                
            elif self.cnt == 2:
                for name, param in model.head2.named_parameters():
                    l_num = int(name[0:2].replace('.', ""))
                    if l_num in [7, 8, 10, 11]:
                        param.requires_grad = True
                for name, param in model.head1.named_parameters():
                    l_num = int(name[0:2].replace('.', ""))
                    if l_num in [4, 5]:
                        param.requires_grad = True

            elif self.cnt == 3:
                for name, param in model.head2.named_parameters():
                    l_num = int(name[0:2].replace('.', ""))
                    if l_num in [0, 2, 4, 5]:
                        param.requires_grad = True
                for name, param in model.head1.named_parameters():
                    l_num = int(name[0:2].replace('.', ""))
                    if l_num in [0, 2]:
                        param.requires_grad = True

## Training by Fold

In [15]:
BATCH_SIZE = 128
DEVICE = ('cuda:3' if torch.cuda.is_available() else 'cpu')
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
EPOCHS = 24
EARLY_STOPPING_STEPS = 10

train_preds = np.zeros((X.shape[0], y_nonv.shape[1]))
preds = np.zeros((test_df.shape[0], y_nonv.shape[1]))
imps = []
imp_cols = []
folds = []
test_cv_preds = []

for seed in seeds:
    seed_everything(seed)
    K = 5
    kf = MultilabelStratifiedKFold(n_splits=K, random_state=seed, shuffle=True)
    train_pred = np.zeros(train_preds.shape)
    
    
    ###############################################################################################
    # LOAD LIBRARIES
    targets = SCORED_MOAS.copy()

    # LOCATE DRUGS
    vc = scored["drug_id"].value_counts()
    vc1 = vc.loc[vc<=18].index.sort_values()
    vc2 = vc.loc[vc>18].index.sort_values()

    # STRATIFY DRUGS 18X OR LESS
    dct1 = {}; dct2 = {}
    skf = MultilabelStratifiedKFold(n_splits=K, shuffle=True, random_state=seed)
    tmp = scored.groupby('drug_id')[targets].mean().loc[vc1]
    for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
        dd = {k:fold for k in tmp.index[idxV].values} # drug id がどのフォールドに属すか格納
        dct1.update(dd)

    # STRATIFY DRUGS MORE THAN 18X
    skf = MultilabelStratifiedKFold(n_splits=K, shuffle=True, random_state=seed)
    tmp = scored.loc[scored["drug_id"].isin(vc2)].reset_index(drop=True)
    for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
        dd = {k:fold for k in tmp["sig_id"][idxV].values}
        dct2.update(dd)

    # ASSIGN K
    scored['fold'] = scored.drug_id.map(dct1)
    scored.loc[scored["fold"].isna(),'fold'] = scored.loc[scored["fold"].isna(),'sig_id'].map(dct2)
    scored["fold"] = scored["fold"].astype('int8')
    ###############################################################################################

    #for fold, (train_index, valid_index) in enumerate(kf.split(X, y_nonv)):    
    for fold in range(K):
        train_index = scored[scored["fold"] != fold].index.to_list()
        valid_index = scored[scored["fold"] == fold].index.to_list()
        print("======================== fold {} ========================".format(fold+1))
        folds.append(train_index)
                
        # split data
        train_X = X.iloc[train_index]
        train_y = y_nonv[train_index]
        train_y_all = y_all_nonv[train_index]
        valid_X = X.iloc[valid_index]
        valid_y = y_nonv[valid_index]
        valid_y_all = y_all_nonv[valid_index]
        test_X = (test_df.drop("sig_id", axis=1))
        pub_test_X = (pub_test_df.drop("sig_id", axis=1))

        
        ### scaler ##########
        # validの分布を知らせてはいけない
        print(SCALE)
        scale_cols = BIOS_+PRODS
        scaler = make_scaler(SCALE, seed).fit(train_X.append(pub_test_X)[scale_cols])
        for df in [train_X, valid_X, test_X, pub_test_X]:
            df[scale_cols] = scaler.transform(df[scale_cols])

    
        ### PCA ##########
        # validの分布を知らせてはいけない
        print("PCA")
        n_decom_g = 80 # 80
        n_decom_c = 10 # 10
        decom_g_cols = [f"pca_g-{i}" for i in range(n_decom_g)]
        decom_c_cols = [f"pca_c-{i}" for i in range(n_decom_c)]
        
        pca_g = PCA(n_components = n_decom_g, random_state = seed).fit(train_X.append(pub_test_X)[GENES_])
        pca_c = PCA(n_components = n_decom_c, random_state = seed).fit(train_X.append(pub_test_X)[CELLS_])
        for df in [train_X, valid_X, test_X, pub_test_X]:
            df[decom_g_cols] = pca_g.transform(df[GENES_])
            df[decom_c_cols] = pca_c.transform(df[CELLS_])


        # prepare data for training
        train_X1 = train_X.values
        train_X2 = train_X.values
        valid_X1 = valid_X.values
        valid_X2 = valid_X.values
        test_X1 = test_X.values
        test_X2 = test_X.values
        print(train_X1.shape[1])
        print(train_X2.shape[1])
        
        
        
        # ================================model training===========================
        train_dataset = MoAResNetDataset(train_X1, train_X2, train_y_all)
        valid_dataset = MoAResNetDataset(valid_X1, valid_X2, valid_y_all)
        test_dataset = TestResNetDataset(test_X1, test_X2)
        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
        testloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

        model = Model(
            num_features1=train_X1.shape[1],
            num_features2=train_X2.shape[1],
            num_targets=train_y_all.shape[1],
        )

        model.to(DEVICE)
        
        optimizer = torch.optim.Adam( model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY,)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer,pct_start=0.1, div_factor=1e3, max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader) )
        fine_tune_scheduler = FineTuneScheduler(EPOCHS)
    
        loss_fn = nn.BCEWithLogitsLoss()
        loss_tr = SmoothBCEwLogits(smoothing=1e-3)
        
        # train
        model = run_training(
            model=model,
            trainloader=trainloader,
            validloader=validloader,
            tag="ALL",
            epochs=EPOCHS,
            optimizer=optimizer,
            scheduler=scheduler,
            fine_tune_scheduler=None,
            loss_fn=loss_fn,
            loss_tr=loss_tr,
            early_stopping_steps=EARLY_STOPPING_STEPS,
            device=DEVICE,
            verbose=5,
            fold=fold,
            seed=seed,)
        model.load_state_dict(torch.load('resnet_weights2/ALL_{}_{}.pt'.format(seed, fold)))
        
        model = fine_tune_scheduler.copy_without_top(model, train_X.shape[1], train_y_all.shape[1], train_y.shape[1])
        
        # train
        optimizer = torch.optim.Adam( model.parameters(), lr=LEARNING_RATE, weight_decay=3e-6, eps=1e-6)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer,pct_start=0.1, div_factor=1e2, max_lr=3e-3, epochs=EPOCHS, steps_per_epoch=len(trainloader) )
        train_dataset = MoAResNetDataset(train_X1, train_X2, train_y)
        valid_dataset = MoAResNetDataset(valid_X1, valid_X2, valid_y)
        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
        
        model = run_training(
            model=model,
            trainloader=trainloader,
            validloader=validloader,
            tag="SCORED",
            epochs=EPOCHS,
            optimizer=optimizer,
            scheduler=scheduler,
            fine_tune_scheduler=fine_tune_scheduler,
            loss_fn=loss_fn,
            loss_tr=loss_tr,
            early_stopping_steps=EARLY_STOPPING_STEPS,
            device=DEVICE,
            verbose=5,
            fold=fold,
            seed=seed,)
        
        model.load_state_dict(torch.load('resnet_weights2/SCORED_{}_{}.pt'.format(seed, fold)))
        #valid predict
        val_preds = predict(
            model=model,
            testloader=validloader,
            device=DEVICE,)
        
        #test predict
        test_preds = predict(
            model=model,
            testloader=testloader,
            device=DEVICE)
        
        # ================================model training===========================

        train_pred[valid_index] +=  val_preds
        preds += test_preds / (K*len(seeds))


    print("seed {} , cv score : {}".format(seed, metric(y_nonv, train_pred)))
    train_preds += train_pred/len(seeds)

print("cv score : {}".format(metric(y_nonv, train_preds)))

quantile
PCA
900
900
EPOCH: 0, train_loss: 0.4340013805979296, valid_loss: 0.01069300046988896, time: 6.35381817817688
EPOCH: 5, train_loss: 0.013122130698267963, valid_loss: 0.010098876005836895, time: 25.15514063835144
EPOCH: 10, train_loss: 0.013083131639691799, valid_loss: 0.010149232617446354, time: 43.585336446762085
EPOCH: 15, train_loss: 0.012732728195471176, valid_loss: 0.009554882267756122, time: 61.797035217285156
EPOCH: 20, train_loss: 0.012245868548642899, valid_loss: 0.00935284129476973, time: 80.76892590522766
training until max epoch 24,  : best itaration is 23, valid loss is 0.009284629273627486, time: 80.76892590522766
EPOCH: 0, train_loss: 0.24736583952292585, valid_loss: 0.026441891012447222, time: 2.1323156356811523
EPOCH: 5, train_loss: 0.02009248544556507, valid_loss: 0.017633314510541304, time: 13.868730068206787
EPOCH: 10, train_loss: 0.019658282561146694, valid_loss: 0.017586714667933327, time: 26.472619771957397
EPOCH: 15, train_loss: 0.01924966051634671, val

EPOCH: 20, train_loss: 0.012301127752629074, valid_loss: 0.009160971308925321, time: 78.94689297676086
training until max epoch 24,  : best itaration is 23, valid loss is 0.009096625912934541, time: 78.94689297676086
EPOCH: 0, train_loss: 0.24629050294739485, valid_loss: 0.02626347744039127, time: 2.1840856075286865
EPOCH: 5, train_loss: 0.0201434152214414, valid_loss: 0.01768411985997643, time: 13.177754402160645
EPOCH: 10, train_loss: 0.01980576465708496, valid_loss: 0.017634844141347066, time: 26.84462308883667
EPOCH: 15, train_loss: 0.019403204145113918, valid_loss: 0.017425556374447686, time: 44.22876977920532
EPOCH: 20, train_loss: 0.018346348063625992, valid_loss: 0.017433884127863817, time: 62.96764779090881
training until max epoch 24,  : best itaration is 18, valid loss is 0.01741960399917194, time: 62.96764779090881
quantile
PCA
900
900
EPOCH: 0, train_loss: 0.43383523700353893, valid_loss: 0.01073170020518934, time: 3.695509433746338
EPOCH: 5, train_loss: 0.0130768064342007

EPOCH: 5, train_loss: 0.020191427536200787, valid_loss: 0.01769583033663886, time: 13.104014873504639
EPOCH: 10, train_loss: 0.019877927020138155, valid_loss: 0.017509432536150726, time: 26.966150522232056
EPOCH: 15, train_loss: 0.019390680543754413, valid_loss: 0.017259053273924758, time: 45.55992293357849
EPOCH: 20, train_loss: 0.018346232928983543, valid_loss: 0.017001296087567296, time: 64.56409406661987
training until max epoch 24,  : best itaration is 21, valid loss is 0.016988489484148366, time: 64.56409406661987
quantile
PCA
900
900
EPOCH: 0, train_loss: 0.43534644128904293, valid_loss: 0.010540581095431533, time: 3.4921422004699707
EPOCH: 5, train_loss: 0.013088061331506194, valid_loss: 0.010682850810033935, time: 21.832183837890625
EPOCH: 10, train_loss: 0.013061343746626899, valid_loss: 0.010152138317269938, time: 39.12446689605713
EPOCH: 15, train_loss: 0.01270381441462214, valid_loss: 0.009728668736560003, time: 58.44748663902283
EPOCH: 20, train_loss: 0.012178563254538678

EPOCH: 15, train_loss: 0.01894451940999083, valid_loss: 0.01739955392799207, time: 44.95643591880798
EPOCH: 20, train_loss: 0.017536941679545504, valid_loss: 0.01728220387761082, time: 63.14128518104553
training until max epoch 24,  : best itaration is 17, valid loss is 0.017218319566122124, time: 63.14128518104553
quantile
PCA
900
900
EPOCH: 0, train_loss: 0.43437114170334046, valid_loss: 0.010941381406571185, time: 3.765695571899414
EPOCH: 5, train_loss: 0.013023793947059607, valid_loss: 0.010724165556686266, time: 22.956011056900024
EPOCH: 10, train_loss: 0.013008975376631472, valid_loss: 0.010287851999912943, time: 40.97979164123535
EPOCH: 15, train_loss: 0.012659749556062878, valid_loss: 0.009841969369777612, time: 60.20433187484741
EPOCH: 20, train_loss: 0.01218324058783659, valid_loss: 0.009573350234755447, time: 79.53020191192627
training until max epoch 24,  : best itaration is 23, valid loss is 0.009548833593726158, time: 79.53020191192627
EPOCH: 0, train_loss: 0.246079618381

EPOCH: 5, train_loss: 0.013098471993715435, valid_loss: 0.01016149896063975, time: 22.65220332145691
EPOCH: 10, train_loss: 0.013108203720733307, valid_loss: 0.010071503131517343, time: 41.31143546104431
EPOCH: 15, train_loss: 0.012778731893100168, valid_loss: 0.009718786472720758, time: 60.26016092300415
EPOCH: 20, train_loss: 0.012220086178917816, valid_loss: 0.00931091901979276, time: 79.62376952171326
training until max epoch 24,  : best itaration is 22, valid loss is 0.009231965469994715, time: 79.62376952171326
EPOCH: 0, train_loss: 0.2532251123608886, valid_loss: 0.02619465733213084, time: 2.043818473815918
EPOCH: 5, train_loss: 0.02000932898912309, valid_loss: 0.01752994850810085, time: 13.634782314300537
EPOCH: 10, train_loss: 0.019669862356090893, valid_loss: 0.017473053533051695, time: 27.354305028915405
EPOCH: 15, train_loss: 0.019188451445728973, valid_loss: 0.017406568569796426, time: 45.962541341781616
EPOCH: 20, train_loss: 0.01811024713991345, valid_loss: 0.01740812483

EPOCH: 15, train_loss: 0.012804606335534565, valid_loss: 0.009477746563360971, time: 59.99393391609192
EPOCH: 20, train_loss: 0.012296723228865776, valid_loss: 0.00911999324007946, time: 79.3207905292511
training until max epoch 24,  : best itaration is 22, valid loss is 0.009062352645046571, time: 79.3207905292511
EPOCH: 0, train_loss: 0.24389820436582618, valid_loss: 0.025776458191959298, time: 2.02762508392334
EPOCH: 5, train_loss: 0.020280963970699173, valid_loss: 0.017105550998274016, time: 13.356502056121826
EPOCH: 10, train_loss: 0.0199093631050293, valid_loss: 0.016913929132415968, time: 27.01883101463318
EPOCH: 15, train_loss: 0.019512814847563488, valid_loss: 0.0168936052633559, time: 44.735431432724
EPOCH: 20, train_loss: 0.01840503532709419, valid_loss: 0.016777705291614813, time: 63.56040978431702
training until max epoch 24,  : best itaration is 19, valid loss is 0.01676813217208666, time: 63.56040978431702
quantile
PCA
900
900
EPOCH: 0, train_loss: 0.43637850842554204, v

In [16]:
train_preds2 = np.zeros((TR_SIZE,  y.shape[1]))
train_preds2[train_nonvehicle_index] = train_preds


preds2 = np.zeros((TE_SIZE, y.shape[1]))
preds2[test_nonvehicle_index] = preds

print("cv score : {}".format(metric(y, train_preds2)))
#cv score : 0.015684400394041592

cv score : 0.015697378419419396


In [None]:
sub_df = pd.read_csv("../../../Data/Raw/sample_submission.csv")
#sub_df = pd.read_csv("../input/lish-moa/sample_submission.csv")
cols = [col for col in sub_df.columns if col != "sig_id"]
sub_df[cols] = preds2
sub_df.to_csv("submission.csv", index=False)

In [None]:
!zip resnet_weights2.zip resnet_weights2/SCORED*