まずは普通のモデル  
レイヤーの幅を少し大きくした

特徴セット
・prod
・stats
・pca(80, 10)

In [None]:
import warnings, random, os, sys, tqdm, time
sys.path.append("../")
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler, MinMaxScaler, QuantileTransformer, RobustScaler

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import KFold
from scipy.spatial import distance
from scipy.stats import pearsonr

import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.modules.loss import _WeightedLoss
from torch.optim.lr_scheduler import ReduceLROnPlateau

from pytorch_tabnet.tab_model import TabNetRegressor
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

pd.set_option("display.max_columns", 1200)
pd.set_option("display.max_rows", 1200)
%matplotlib inline



In [None]:
def metric(y_true, y_pred):
    res = []
    for i in range(0, y_true.shape[1]):
        y = y_true[:,i]
        pred = y_pred[:,i]
        res.append(log_loss(y, pred))
    return np.mean(res)

def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True
seed_everything(42)
        
    
def make_scaler(flag, seed):
    if flag == "quantile":
        return QuantileTransformer(n_quantiles=100,random_state=seed, output_distribution="normal")
    elif flag == "gauss":
        return GaussRankScaler()
    elif flag == "standard":
        return StandardScaler()
    elif flag == "minmax":
        return MinMaxScaler()
    elif flag == "robust":
        return RobustScaler()
    
def special_pearsonr(X, Y):
    pearsonr_value = pearsonr(X, Y)
    if pearsonr_value[0] < 0:
        return abs(pearsonr_value[0])
    else:
        return 1 - pearsonr_value[0]    

seeds = [0, 1, 2, 3, 4, 5, 6]
SCALE = "quantile"

    

In [None]:
# g772, c100, 206クラス、402クラスの分類

train_df = pd.read_csv("../../../Data/Raw/train_features.csv")
test_df = pd.read_csv("../../../Data/Raw/test_features.csv")
pub_test_df = pd.read_csv("../../../Data/Raw/test_features.csv")
drug_df = pd.read_csv("../../../Data/Raw/train_drug.csv")#

train_df = train_df.merge(drug_df, on="sig_id")

y = pd.read_csv("../../../Data/Raw/train_targets_scored.csv")
y_non = pd.read_csv("../../../Data/Raw/train_targets_nonscored.csv")
y_all = pd.concat([y, y_non.drop("sig_id", axis=1)], axis=1)
y = y.merge(drug_df, on='sig_id', how='left') #

GENES = [col for col in train_df.columns if col.startswith("g-")]
CELLS = [col for col in train_df.columns if col.startswith("c-")]
BIOS = GENES + CELLS


SCORED_MOAS = [col for col in y.columns if col != "sig_id" and col != "drug_id"]#
NONSCORED_MOAS = [col for col in y_non.columns if col != "sig_id"]
ALL_MOAS = SCORED_MOAS + NONSCORED_MOAS


TR_SIZE = train_df.shape[0]
TE_SIZE = test_df.shape[0]

train_nonvehicle_index = train_df[train_df["cp_type"] != "ctl_vehicle"].index
test_nonvehicle_index = test_df[test_df["cp_type"] != "ctl_vehicle"].index

train_df["time_dose"] = train_df["cp_time"].astype(str) + " * " + train_df["cp_dose"]
test_df["time_dose"] = test_df["cp_time"].astype(str) + " * " + test_df["cp_dose"]
pub_test_df["time_dose"] = pub_test_df["cp_time"].astype(str) + " * " + pub_test_df["cp_dose"]

# remove cp_type = ctl_vehicle
mask = train_df["cp_type"] != "ctl_vehicle"
train_df = train_df[mask].drop("cp_type", axis=1).reset_index(drop=True)
test_df = test_df[test_df["cp_type"] != "ctl_vehicle"].drop("cp_type", axis=1).reset_index(drop=True)
pub_test_df = pub_test_df[pub_test_df["cp_type"] != "ctl_vehicle"].drop("cp_type", axis=1).reset_index(drop=True)
y_nonv = y[mask].reset_index(drop=True)#

scored = y_nonv.copy()#
y_nonv.drop("drug_id", axis=1, inplace=True)#
y.drop("drug_id", axis=1, inplace=True)#

In [None]:
#prod_cols = [    ['g-712',  'g-208',  'g-38',  'g-100',  'g-123',  'g-328',  'g-744',  'g-248',  'g-460',  'g-731',  'g-417',  'g-349',  'g-131'], ['g-228',  'g-75',  'g-67',  'g-760',  'g-37',  'g-406',  'g-50',  'g-672',  'g-63',  'g-72',  'g-195'], ['g-100', 'g-157', 'g-178'],['g-183', 'g-300', 'g-767'],['g-50', 'g-37', 'g-489', 'g-257', 'g-332'],['g-270', 'g-135', 'g-231', 'g-158', 'g-478', 'g-146', 'g-491', 'g-392'],['g-745', 'g-635', 'g-235'], ['g-300', 'g-414', 'g-62', 'g-34'], ['g-91', 'g-392'], ['g-75', 'g-113'], ['g-599', 'g-261', 'g-38', 'g-146', 'g-392', 'g-512', 'g-744'], ['g-50', 'g-332', 'g-37', 'g-58', 'g-705'], ['g-157', 'g-178'],['g-759', 'g-100', 'g-167', 'g-75', 'g-431', 'g-189', 'g-522', 'g-91'],['g-202', 'g-385', 'g-769'],           ]
prod_cols = [['g-145', 'g-201', 'g-208'], ['g-370', 'g-508', 'g-37'], ['g-38', 'g-392', 'g-707'], ['g-328', 'g-28', 'g-392'], ['g-441', 'g-157', 'g-392'], ['g-181', 'g-100', 'g-392'], ['g-67', 'g-760', 'g-50'], ['g-731', 'g-100', 'g-707'], ['g-478', 'g-468', 'g-310'], ['g-91', 'g-145', 'g-208'], ['g-106', 'g-744', 'g-91'], ['g-131', 'g-208', 'g-392'], ['g-144', 'g-123', 'g-86'], ['g-228', 'g-72', 'g-67'], ['g-31', 'g-328', 'g-460'], ['g-392', 'g-731', 'g-100'], ['g-732', 'g-744', 'g-707'], ['g-705', 'g-375', 'g-704'], ['g-508', 'g-50', 'g-411'], ['g-234', 'g-58', 'g-520'], ['g-503', 'g-761', 'g-50'], ['g-113', 'g-75', 'g-178'], ['g-50', 'g-508', 'g-113'], ['g-113', 'g-375', 'g-75'], ['g-576', 'g-452', 'g-392'], ['g-50', 'g-37', 'g-36'], ['g-707', 'g-133', 'g-392'], ['g-484', 'g-392', 'g-544'], ['g-508', 'g-67', 'g-370'], ['g-123', 'g-731', 'g-100'], ['g-298', 'g-477', 'g-644'], ['g-72', 'g-370', 'g-50'], ['g-67', 'g-178', 'g-113'], ['g-744', 'g-608', 'g-100'], ['g-91', 'g-100', 'g-707'], ['g-37', 'g-228', 'g-202'], ['g-37', 'g-300', 'g-370'], ['g-234', 'g-508', 'g-595'], ['g-596', 'g-744', 'g-707'], ['g-300', 'g-227', 'g-591'], ['g-135', 'g-392', 'g-512'], ['g-731', 'g-744', 'g-158'], ['g-69', 'g-707', 'g-100'], ['g-276', 'g-653', 'g-291'], ['g-624', 'g-615', 'g-189'], ['g-181', 'g-707', 'g-38'], ['g-72', 'g-75', 'g-508'], ['g-231', 'g-707', 'g-392'], ['g-508', 'g-37', 'g-72'], ['g-725', 'g-712', 'g-640'], ['g-67', 'g-644', 'g-113'], ['g-508', 'g-228', 'g-656'], ['g-185', 'g-37', 'g-672'], ['g-370', 'g-50', 'g-503'], ['g-201', 'g-745', 'g-599'], ['g-332', 'g-50', 'g-571'], ['g-50', 'g-37', 'g-59'], ['g-508', 'g-113', 'g-231'], ['g-707', 'g-158', 'g-100'], ['g-257', 'g-50', 'g-72']]

for cols in prod_cols:
    name = "prod-" + " * ".join(cols)
    train_df[name] = train_df[cols].mean(axis=1)
    test_df[name] = test_df[cols].mean(axis=1)
    pub_test_df[name] = pub_test_df[cols].mean(axis=1)

PRODS = [col for col in train_df.columns if col.startswith("prod-")]

#for df in [train_df, test_df, pub_test_df]:
#    df["prod-p"] = df[['g-712',  'g-208',  'g-38',  'g-100',  'g-123',  'g-328',  'g-744',  'g-248',  'g-460',  'g-731',  'g-417',  'g-349',  'g-131']].mean(axis=1)
#    df["prod-n"] = df[['g-228', 'g-75', 'g-67', 'g-760', 'g-37', 'g-406', 'g-50', 'g-672', 'g-63', 'g-72', 'g-195']].mean(axis=1)
#    df["prod"] = df.apply(lambda x : 1 if x["prod-p"] >= 4 and x["prod-n"] <= -5.8 else 0, axis=1)
#    df.drop("prod-p", inplace=True, axis=1)
#    df.drop("prod-n", inplace=True, axis=1)

In [None]:
#out fold preprocessing

#variance threshold
VAR_THRESHOLD = 0.8
drop_cols = []
temp = pd.concat([train_df, pub_test_df])
for col in BIOS+PRODS:
    if temp[col].var() <= VAR_THRESHOLD:
        drop_cols.append(col)

print("drop cols num : {}".format(len(drop_cols)))
print(drop_cols)
train_df.drop(columns=drop_cols, inplace=True)
test_df.drop(columns=drop_cols, inplace=True)
pub_test_df.drop(columns=drop_cols, inplace=True)

GENES_ = [col for col in train_df.columns if col.startswith("g-")]
CELLS_ = [col for col in train_df.columns if col.startswith("c-")]
BIOS_ = GENES_ + CELLS_
PRODS = [col for col in train_df.columns if col.startswith("prod-")]
        
del temp

# onehot encode of categorical feature and drop
drop_cols = ["cp_time", "cp_dose", "time_dose"]
train_df = pd.concat([pd.get_dummies(train_df["time_dose"], prefix="onehot", drop_first=True), train_df.drop(drop_cols, axis=1) ], axis=1)
test_df = pd.concat([pd.get_dummies(test_df["time_dose"], prefix="onehot", drop_first=True), test_df.drop(drop_cols, axis=1) ], axis=1)
pub_test_df = pd.concat([pd.get_dummies(pub_test_df["time_dose"], prefix="onehot", drop_first=True), pub_test_df.drop(drop_cols, axis=1) ], axis=1)

# aggregation feature
print("agg")
"""
for df in [train_df, pub_test_df, test_df]:
    df["agg-sum-g"] = (df[GENES_].sum(axis=1) - df[GENES_].sum(axis=1).min() + 1).map(np.log)
    df["agg-mean-g"] = (df[GENES_].mean(axis=1) - df[GENES_].mean(axis=1).min() + 1).map(np.log)
    df["agg-std-g"] = df[GENES_].std(axis=1).map(np.log)
    df["agg-kurt-g"] = (df[GENES_].kurt(axis=1) - df[GENES_].kurt(axis=1).min() + 1).map(np.log)
    df["agg-skew-g"] = (df[GENES_].skew(axis=1) - df[GENES_].skew(axis=1).min() + 1).map(np.log)
    df["agg-sum-c"] = (df[CELLS_].sum(axis=1) - df[CELLS_].sum(axis=1).min() +1).map(np.log)
    df["agg-mean-c"] = (df[CELLS_].mean(axis=1) - df[CELLS_].mean(axis=1).min() +1).map(np.log)
    df["agg-std-c"] = df[CELLS_].std(axis=1).map(np.log)
    df["agg-kurt-c"] = (df[CELLS_].kurt(axis=1) - df[CELLS_].kurt(axis=1).min() +1).map(np.log)
    df["agg-skew-c"] = (df[CELLS_].skew(axis=1) - df[CELLS_].skew(axis=1).min() +1).map(np.log)
    df["agg-sum-gc"] = (df[BIOS_].sum(axis=1) - df[BIOS_].sum(axis=1).min() + 1).map(np.log)
    df["agg-mean-gc"] = (df[BIOS_].mean(axis=1) - df[BIOS_].mean(axis=1).min() + 1).map(np.log)
    df["agg-std-gc"] = df[BIOS_].std(axis=1).map(np.log)
    df["agg-kurt-gc"] = (df[BIOS_].kurt(axis=1) - df[BIOS_].kurt(axis=1).min() + 1).map(np.log)
    df["agg-skew-gc"] = (df[BIOS_].skew(axis=1) - df[BIOS_].skew(axis=1).min() + 1).map(np.log)
"""

for df in [train_df, pub_test_df, test_df]:
    df["agg-sum-g"] = df[GENES_].sum(axis=1)
    df["agg-mean-g"] = df[GENES_].mean(axis=1)
    df["agg-std-g"] = df[GENES_].std(axis=1)
    df["agg-kurt-g"] = df[GENES_].kurt(axis=1)
    df["agg-skew-g"] = df[GENES_].skew(axis=1)
    df["agg-sum-c"] = df[CELLS_].sum(axis=1)
    df["agg-mean-c"] = df[CELLS_].mean(axis=1)
    df["agg-std-c"] = df[CELLS_].std(axis=1)
    df["agg-kurt-c"] = df[CELLS_].kurt(axis=1)
    df["agg-skew-c"] = df[CELLS_].skew(axis=1)
    df["agg-sum-gc"] = df[BIOS_].sum(axis=1)
    df["agg-mean-gc"] = df[BIOS_].mean(axis=1)
    df["agg-std-gc"] = df[BIOS_].std(axis=1)
    df["agg-kurt-gc"] = df[BIOS_].kurt(axis=1)
    df["agg-skew-gc"] = df[BIOS_].skew(axis=1)


AGG = [col for col in train_df.columns if col.startswith("agg-")]

In [None]:
['g-1', 'g-6', 'g-15', 'g-18', 'g-19', 'g-21', 'g-23', 'g-25', 'g-71', 'g-77', 'g-78', 'g-94', 'g-104', 'g-151', 'g-153', 'g-184', 'g-193', 'g-216', 'g-219', 'g-233', 'g-262', 'g-267', 'g-268', 'g-274', 'g-279', 'g-292', 'g-303', 'g-307', 'g-312', 'g-318', 'g-324', 'g-331', 'g-340', 'g-345', 'g-363', 'g-382', 'g-396', 'g-404', 'g-420', 'g-435', 'g-454', 'g-471', 'g-472', 'g-481', 'g-482', 'g-485', 'g-507', 'g-518', 'g-523', 'g-536', 'g-542', 'g-547', 'g-549', 'g-550', 'g-552', 'g-560', 'g-581', 'g-583', 'g-611', 'g-633', 'g-649', 'g-687', 'g-716', 'g-718', 'g-734', 'g-754', 'g-756']
['g-1', 'g-6', 'g-15', 'g-18', 'g-19', 'g-21', 'g-23', 'g-25', 'g-71', 'g-77', 'g-78', 'g-94', 'g-104', 'g-151', 'g-153', 'g-184', 'g-193', 'g-216', 'g-219', 'g-233', 'g-262', 'g-267', 'g-268', 'g-274', 'g-279', 'g-292', 'g-303', 'g-307', 'g-312', 'g-318', 'g-324', 'g-331', 'g-340', 'g-345', 'g-363', 'g-382', 'g-396', 'g-404', 'g-420', 'g-435', 'g-454', 'g-471', 'g-472', 'g-481', 'g-482', 'g-485', 'g-507', 'g-518', 'g-523', 'g-536', 'g-542', 'g-547', 'g-549', 'g-550', 'g-552', 'g-560', 'g-581', 'g-583', 'g-611', 'g-633', 'g-649', 'g-687', 'g-716', 'g-718', 'g-734', 'g-754', 'g-756', 'prod-g-300 * g-227 * g-591']

In [None]:
X = train_df.drop("sig_id", axis=1)
y_nonv = y_nonv.drop("sig_id", axis=1).values
y = y.drop("sig_id", axis=1).values

## Dateset Class

In [None]:
class MoADataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct

## func 

In [None]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        # if cycle
        scheduler.step()
        
        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    
    return final_loss


def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []
    
    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)


        final_loss += loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    
    return final_loss


def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    
    return preds

## Model

In [None]:
"""class Model(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model, self).__init__()
        self.hidden1 = 1500
        self.hidden2 =  1500
        self.hidden3 = 500
        
        self.nn_model = nn.Sequential(
            nn.BatchNorm1d(num_features),
            nn.utils.weight_norm(nn.Linear(num_features, self.hidden1)),
            nn.LeakyReLU(),
            
            nn.BatchNorm1d(self.hidden1),
            nn.Dropout(0.45),
            nn.utils.weight_norm(nn.Linear(self.hidden1, self.hidden2)),
            nn.LeakyReLU(),
            
            nn.BatchNorm1d(self.hidden2),
            nn.Dropout(0.35),
            nn.utils.weight_norm(nn.Linear(self.hidden2, self.hidden3)),
            nn.LeakyReLU(),
            
            nn.BatchNorm1d(self.hidden3),
            nn.Dropout(0.25),
            nn.utils.weight_norm(nn.Linear(self.hidden3, num_targets)),
        )

    
    def forward(self, x):
        return self.nn_model(x)
"""
class Model(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model, self).__init__()
        self.hidden1 = 2048
        self.hidden2 =  1024
        
        self.nn_model = nn.Sequential(
            nn.BatchNorm1d(num_features),
            nn.utils.weight_norm(nn.Linear(num_features, self.hidden1)),
            nn.LeakyReLU(),
            
            nn.BatchNorm1d(self.hidden1),
            nn.Dropout(0.25),
            nn.utils.weight_norm(nn.Linear(self.hidden1, self.hidden2)),
            nn.LeakyReLU(),
            
            nn.BatchNorm1d(self.hidden2),
            nn.Dropout(0.25),
            nn.utils.weight_norm(nn.Linear(self.hidden2, num_targets)),
        )

    
    def forward(self, x):
        return self.nn_model(x)

## run train

In [None]:
def run_training(model, trainloader, validloader, epoch_, optimizer, scheduler, loss_fn, loss_tr, early_stopping_steps, verbose, device, fold, seed):
    
    early_step = 0
    best_loss = np.inf
    best_epoch = 0
    
    start = time.time()
    t = time.time() - start
    for epoch in range(epoch_):
        train_loss = train_fn(model, optimizer, scheduler, loss_tr, trainloader, device)
        valid_loss = valid_fn(model, loss_fn, validloader, device)
        if epoch % verbose==0 or epoch==epoch_-1:
            t = time.time() - start
            print(f"EPOCH: {epoch}, train_loss: {train_loss}, valid_loss: {valid_loss}, time: {t}")
        
        if valid_loss < best_loss:
            best_loss = valid_loss
            torch.save(model.state_dict(), 'dnn_weights/{}_{}.pt'.format(seed, fold))
            early_step = 0
            best_epoch = epoch
        
        elif early_stopping_steps != 0:
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                t = time.time() - start
                print(f"early stopping in iteration {epoch},  : best itaration is {best_epoch}, valid loss is {best_loss}, time: {t}")
                return model
    t = time.time() - start       
    print(f"training until max epoch {epoch_},  : best itaration is {best_epoch}, valid loss is {best_loss}, time: {t}")
    return model
            
    
def predict(model, testloader, device):
    model.to(device)
    predictions = inference_fn(model, testloader, device)
    
    return predictions

In [None]:
class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

## Training by Fold

In [None]:
BATCH_SIZE = 128
DEVICE = ('cuda:2' if torch.cuda.is_available() else 'cpu')
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
EPOCHS = 25
EARLY_STOPPING_STEPS = 10

train_preds = np.zeros((X.shape[0], y_nonv.shape[1]))
preds = np.zeros((test_df.shape[0], y_nonv.shape[1]))
imps = []
imp_cols = []
folds = []
test_cv_preds = []

for seed in seeds:
    seed_everything(seed)
    K = 5
    kf = MultilabelStratifiedKFold(n_splits=K, random_state=seed, shuffle=True)
    train_pred = np.zeros(train_preds.shape)
    
    
    ###############################################################################################
    # LOAD LIBRARIES
    targets = SCORED_MOAS.copy()

    # LOCATE DRUGS
    vc = scored["drug_id"].value_counts()
    vc1 = vc.loc[vc<=18].index.sort_values()
    vc2 = vc.loc[vc>18].index.sort_values()

    # STRATIFY DRUGS 18X OR LESS
    dct1 = {}; dct2 = {}
    skf = MultilabelStratifiedKFold(n_splits=K, shuffle=True, random_state=seed)
    tmp = scored.groupby('drug_id')[targets].mean().loc[vc1]
    for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
        dd = {k:fold for k in tmp.index[idxV].values} # drug id がどのフォールドに属すか格納
        dct1.update(dd)

    # STRATIFY DRUGS MORE THAN 18X
    skf = MultilabelStratifiedKFold(n_splits=K, shuffle=True, random_state=seed)
    tmp = scored.loc[scored["drug_id"].isin(vc2)].reset_index(drop=True)
    for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
        dd = {k:fold for k in tmp["sig_id"][idxV].values}
        dct2.update(dd)

    # ASSIGN K
    scored['fold'] = scored.drug_id.map(dct1)
    scored.loc[scored["fold"].isna(),'fold'] = scored.loc[scored["fold"].isna(),'sig_id'].map(dct2)
    scored["fold"] = scored["fold"].astype('int8')
    ###############################################################################################

    #for fold, (train_index, valid_index) in enumerate(kf.split(X, y_nonv)):    
    for fold in range(K):
        train_index = scored[scored["fold"] != fold].index.to_list()
        valid_index = scored[scored["fold"] == fold].index.to_list()
        print("======================== fold {} ========================".format(fold+1))
        folds.append(train_index)
                
        ### split data ##########
        train_X = X.iloc[train_index]
        train_y = y_nonv[train_index]
        valid_X = X.iloc[valid_index]
        valid_y = y_nonv[valid_index]
        test_X = (test_df.drop("sig_id", axis=1))
        pub_test_X = (pub_test_df.drop("sig_id", axis=1))

        
    
        train_X.drop("drug_id", axis=1, inplace=True)
        valid_X.drop("drug_id", axis=1, inplace=True)
        
        ### scaler ##########
        print(SCALE)
        scale_cols = BIOS_#+PRODS+AGG
        scaler = make_scaler(SCALE, seed).fit(train_X.append(pub_test_X)[scale_cols])
        for df in [train_X, valid_X, test_X, pub_test_X]:
            df[scale_cols] = scaler.transform(df[scale_cols])

        



        ### PCA ##########
        print("PCA")
        n_decom_g = 80
        n_decom_c = 10
        decom_g_cols = [f"pca_g-{i}" for i in range(n_decom_g)]
        decom_c_cols = [f"pca_c-{i}" for i in range(n_decom_c)]
        
        pca_g = PCA(n_components = n_decom_g, random_state = seed).fit(train_X.append(pub_test_X)[GENES_])
        pca_c = PCA(n_components = n_decom_c, random_state = seed).fit(train_X.append(pub_test_X)[CELLS_])
        for df in [train_X, valid_X, test_X, pub_test_X]:
            df[decom_g_cols] = pca_g.transform(df[GENES_])
            df[decom_c_cols] = pca_c.transform(df[CELLS_])

        

        # prepare data for training
        train_X = train_X.values
        valid_X = valid_X.values
        test_X = test_X.values
        print(train_X.shape)
        
        
        # ================================model training===========================
        train_dataset = MoADataset(train_X, train_y)
        valid_dataset = MoADataset(valid_X, valid_y)
        test_dataset = TestDataset(test_X)
        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
        testloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

        model = Model(
            num_features=train_X.shape[1],
            num_targets=train_y.shape[1],
        )

        model.to(DEVICE)
        
        
        optimizer = torch.optim.Adam( model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY,)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader) )
        
        loss_fn = nn.BCEWithLogitsLoss()
        loss_tr = SmoothBCEwLogits(smoothing=1e-3)
        
        # train
        model = run_training(
            model=model,
            trainloader=trainloader,
            validloader=validloader,
            epoch_=EPOCHS,
            optimizer=optimizer,
            scheduler=scheduler,
            loss_fn=loss_fn,
            loss_tr=loss_tr,
            early_stopping_steps=EARLY_STOPPING_STEPS,
            device=DEVICE,
            verbose=5,
            fold=fold,
            seed=seed,)
        model.load_state_dict(torch.load('dnn_weights/{}_{}.pt'.format(seed, fold)), DEVICE)
        
        #valid predict
        val_preds = predict(
            model=model,
            testloader=validloader,
            device=DEVICE,)
        
        #test predict
        test_preds = predict(
            model=model,
            testloader=testloader,
            device=DEVICE)
        
        # ================================model training===========================

        train_pred[valid_index] +=  val_preds
        
        preds += test_preds / (K*len(seeds))

    print("seed {} , cv score : {}".format(seed, metric(y_nonv, train_pred)))
    train_preds += train_pred/len(seeds)
print("cv score : {}".format(metric(y_nonv, train_preds)))

In [None]:
train_preds2 = np.zeros((TR_SIZE,  y.shape[1]))
train_preds2[train_nonvehicle_index] = train_preds


preds2 = np.zeros((TE_SIZE, y.shape[1]))
preds2[test_nonvehicle_index] = preds

print("cv score : {}".format(metric(y, train_preds2)))

1500 : cv score : 0.01562684439261902
2048, 1024 : cv score : 0.015613774063774623
2048, 1024, no stats : cv score : 0.015621595486988864
2048, 1024, kmeans : cv score : 0.015624567785741424
2048, 1024, rankgauss stats : cv score : 0.015615502003812978
2048, 1024, onehot : cv score : 0.015617178433682525

In [None]:
sub_df = pd.read_csv("../../../Data/Raw/sample_submission.csv")
#sub_df = pd.read_csv("../input/lish-moa/sample_submission.csv")
cols = [col for col in sub_df.columns if col != "sig_id"]
sub_df[cols] = preds2
sub_df.to_csv("submission.csv", index=False)

In [None]:
train_sub = pd.read_csv("../../../Data/Raw/train_targets_scored.csv")
cols = [col for col in train_sub.columns if col != "sig_id"]
train_sub[cols] = train_preds2
train_sub.to_csv("train_preds.csv", index=False)

In [None]:
!zip dnn_weights.zip dnn_weights/*