- decrease to 1 models to check correlation
- return model to version 2

In [1]:
import os
import sys
import random
import warnings
import numpy as np
import pandas as pd 
from tqdm import tqdm_notebook as tqdm
from sklearn import preprocessing
from sklearn.metrics import log_loss,roc_auc_score
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.multioutput import MultiOutputClassifier
from sklearn.feature_selection import VarianceThreshold

sys.path.append('../input/multilabelstraifier/')
from ml_stratifiers import MultilabelStratifiedKFold
warnings.filterwarnings('ignore')

import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import tensorflow as tf
from torch.nn.modules.loss import _WeightedLoss

from sklearn.pipeline import make_pipeline,make_union
from sklearn.compose import make_column_transformer,ColumnTransformer
from sklearn.base import BaseEstimator,TransformerMixin

# functions

In [2]:
def train_short_form_loader(feature_file,target_file,extra_target_file=None):
    '''takes the original target and features and creates a train dataset 
    in col long format'''
    train_features = pd.read_csv(feature_file)
    train_targets = pd.read_csv(target_file)

    if extra_target_file is not None:
        extra_targets = pd.read_csv(extra_target_file)
        train_targets = pd.merge(train_targets,extra_targets,on ='sig_id')
        del extra_targets

    targets = train_targets.columns[1:]

    train_melt=train_targets.merge(train_features,how="left",on="sig_id")

    del train_features,train_targets
    
    train_melt.set_index("sig_id",inplace=True)

    train_melt["cp_type"]= train_melt["cp_type"].astype('category')
    train_melt["cp_dose"]= train_melt["cp_dose"].astype('category')

    return train_melt , targets.to_list()

def test_short_form_loader(feature_file):
    '''takes the original target and features and creates a train dataset 
    in col long format'''

    train_features = pd.read_csv(feature_file)

    train_melt =  train_features.copy()
    del train_features

    train_melt.set_index("sig_id",inplace=True)
    train_melt["cp_type"]= train_melt["cp_type"].astype('category')
    train_melt["cp_dose"]= train_melt["cp_dose"].astype('category')

    return train_melt 

In [3]:
# feature selection transformer
class CatIntMapper( BaseEstimator, TransformerMixin ):
    #Class constructor method that takes in a list of values as its argument
    def __init__(self ,col,dicti):
        self.col = col
        self.dicti = dicti
        
    def fit(self, X, y = None):
        
        return self
    #Return self nothing else to do here
    def fit_transform( self, X, y = None  ):
        assert  X[self.col].isin(self.dicti.keys()).all() 
        return pd.concat([X.drop(self.col,axis=1),X[self.col].map(self.dicti).astype(int).rename(self.col)],axis=1) 
    
    def transform( self, X):
        assert  X[self.col].isin(self.dicti.keys()).all() 
        
        return pd.concat([X.drop(self.col,axis=1),X[self.col].map(self.dicti).astype(int).rename(self.col)],axis=1) 
    
class NamedOutTWrapper(BaseEstimator, TransformerMixin):
    
    def __init__(self,transformer,columns,inplace=False,prefix='_' ):
        self.transformer = transformer
        self.cols = columns
        self.inplace =  inplace
        self.prefix = prefix
        self.transformer_name = self._get_transformer_name()
        
    def fit(self, X, y = None): 
        self.transformer =   self.transformer.fit(X[self.cols] , y )
        return self
    
    #Return self nothing else to do here
    def fit_transform( self, X, y = None  ): 
        transformed_columns = self.transformer.fit_transform(X[self.cols] , y )
        out=pd.DataFrame(index=X.index)
        
        if self.inplace:
            out = X[self.cols]
            out[self.cols] = transformed_columns
            return pd.concat([X.drop(self.cols,axis=1),out],axis=1)
        else:
            for i,values in enumerate(transformed_columns.transpose()):
                out[ self.transformer_name + self.prefix + str(i)] = values
            return   pd.concat([X,out],axis=1)
        
    def transform( self, X):
        transformed_columns = self.transformer.transform(X[self.cols]  )
        out=pd.DataFrame(index=X.index)
        
        if self.inplace:
            out = X[self.cols]
            out[self.cols] = transformed_columns
            return pd.concat([X.drop(self.cols,axis=1),out],axis=1)
        else:
            for i,values in enumerate(transformed_columns.transpose()):
                out[ self.transformer_name + self.prefix + str(i)] = values
        return   pd.concat([X,out],axis=1)
    
    def _get_transformer_name(self):
        return str(self.transformer.__class__).split('.')[-1][0:-2]

In [4]:
# cv folds
def multifold_indexer(train,target_columns,n_splits=10,random_state=12347,**kwargs):
    folds = train.copy()

    mskf = MultilabelStratifiedKFold(n_splits=n_splits,random_state=random_state,**kwargs)
    folds[ 'kfold']=0
    for f, (t_idx, v_idx) in enumerate(mskf.split(X=train, y=train[target_columns])):
        folds.iloc[v_idx,-1] = int(f)

    folds['kfold'] = folds['kfold'].astype(int)
    return folds

In [5]:
# dataset class
class MoADataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)            
        }
        return dct
    
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }
        return dct
    
class ColumnDropper( BaseEstimator, TransformerMixin ):
    #Class Constructor 
    def __init__( self, cols ):
        self.cols=cols
    #Return self nothing else to do here    
    def fit( self, X, y = None ):
        return self 
    
    #Method that describes what we need this transformer to do
    def transform( self, X, y = None ):
        return X.drop(self.cols,axis=1)

In [6]:
# dae model
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

class DAE_Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size=1100,hidden_size2=1300):
        super(DAE_Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        #self.dropout1 = nn.Dropout(drop_rate1)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))
        
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        #self.dropout2 = nn.Dropout(drop_rate2)
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size, hidden_size2))
        
        self.batch_norm3 = nn.BatchNorm1d(hidden_size2)
        #self.dropout3 = nn.Dropout(drop_rate2)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size2, hidden_size))
        
        #self.batch_norm4 = nn.BatchNorm1d(hidden_size)
        #self.dropout4 = nn.Dropout(drop_rate3)
        self.dense4 = nn.utils.weight_norm(nn.Linear(hidden_size, num_features))
        
    def forward(self, x,mode='DAE'):
        #x = self.batch_norm1(x)
        #x1 = self.dropout1(x1)
        x1 = F.relu(self.dense1(x))
            
        x2 = self.batch_norm2(x1)
        #x = self.dropout2(x)
        x2 = F.relu(self.dense2(x2))
        
        x3 = self.batch_norm3(x2)
      
        x3 = F.relu(self.dense3(x3))
        
        out = self.dense4(x3)
        
        if mode == 'DAE':
            return out
        else:
            return x1,x2,x3

In [7]:
def infer_features_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs,mode='get_features')
        
        preds.append(torch.cat(outputs,axis=1).detach().cpu().numpy())
    preds = np.concatenate(preds)
    
    return preds

def run_inference(X_train,y_train,X_valid,y_valid,X_test,fold, seed,inference_only=False,**kwargs):
    seed_everything(seed)
    if not  inference_only:
        train_dataset = MoADataset(X_train, y_train)
        valid_dataset = MoADataset(X_valid, y_valid)
        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

    testdataset = TestDataset(X_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    model = DAE_Model(
        num_features= X_train.shape[1] ,
        num_targets=  X_train.shape[1],
       # hidden_size=hidden_size,
        **kwargs
    )

    model.to(DEVICE)
    
    if not  inference_only:
        oof = inference_infer_features_fn(model, validloader, DEVICE)    
    else:
        oof= 0
    
    predictions = infer_features_fn(model, testloader, DEVICE)
    predictions = predictions
    
    return oof, predictions

# denosing auto encoder

In [8]:
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 1000
BATCH_SIZE = 640
LEARNING_RATE = 2e-3
WEIGHT_DECAY = 1e-8
NFOLDS = 10
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False
hidden_size=1100
hidden_size2=1300
PATIENCE=10
THRESHOLD = 5e-3

In [9]:
SEED = [0]
train,target_cols = train_short_form_loader('../input/lish-moa/train_features.csv','../input/lish-moa/train_targets_scored.csv')
test = test_short_form_loader("../input/lish-moa/test_features.csv")

train = pd.concat([train,test])
train[target_cols]= train[target_cols].fillna(0)
test = train.copy()

GENES = [col for col in train.columns if col.startswith('g-')]
CELLS = [col for col in train.columns if col.startswith('c-')]

map_controls = CatIntMapper('cp_type',{'ctl_vehicle': 0, 'trt_cp': 1})    
map_dose = CatIntMapper('cp_dose',{'D1': 1, 'D2': 0})    
map_time = CatIntMapper('cp_time',{24: 0, 48: 1, 72: 2})  

Rankg_g_tansform = NamedOutTWrapper(preprocessing.QuantileTransformer(n_quantiles=1000,random_state=0, output_distribution="normal"),columns= GENES+CELLS,inplace=True)
PCA_g_tansform =  NamedOutTWrapper(PCA(20),columns= GENES,prefix ='_g' )
PCA_c_tansform =  NamedOutTWrapper(PCA(20),columns= CELLS,prefix ='_c' )

CatDropper =ColumnDropper(cols=['cp_type','cp_time','cp_dose'])
transformers_list=[map_controls,map_dose,map_time,Rankg_g_tansform,CatDropper]
exp_name = 'test_DAE_0.2_all_together'

oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))

for seed in SEED:
    
    train = multifold_indexer(train,target_cols,n_splits=NFOLDS)
    
    for fold in range(NFOLDS):
            
        train_df = train[train['kfold'] != fold]
        valid_df = train[train['kfold'] == fold]
            
        feature_cols = [col for col in train_df.columns if not (col in target_cols+['kfold'])]
                
        pipeline_val = make_pipeline(*transformers_list)
        
        X_train, y_train  = train_df[feature_cols], train_df[target_cols]
        X_valid, y_valid =  valid_df[feature_cols], valid_df[target_cols].values
        
        X_train = pipeline_val.fit_transform(X_train)
        X_train = X_train.values
        X_valid = pipeline_val.transform(X_valid)
        
        valid_index = X_valid.index
        X_valid = X_valid.values
        
        y_train = y_train.values
        
        X_test = test[feature_cols]
        X_test = pipeline_val.transform(X_test).values
        
        pred_ = run_inference(X_train,y_train,X_valid,y_valid,X_test,fold, seed,inference_only=True)    
        break
        
transformed_features = pd.DataFrame(pred_[1],index=test.index)

In [10]:
print(transformed_features.shape)
transformed_features.columns = [str(i) for i in range(len(transformed_features.columns))]
transformed_features.reset_index().to_feather('./features_0.2_altogether.fth')
transformed_features = transformed_features.reset_index(drop=False)

(27796, 3500)


# prediction

In [11]:
DATA_DIR = '/kaggle/input/lish-moa/'
train = pd.read_csv(DATA_DIR + 'train_features.csv')
targets = pd.read_csv(DATA_DIR + 'train_targets_scored.csv')
test = pd.read_csv(DATA_DIR + 'test_features.csv')
sub = pd.read_csv(DATA_DIR + 'sample_submission.csv')
drug = pd.read_csv(DATA_DIR + 'train_drug.csv')

noncons_train_index = train[train.cp_type=="ctl_vehicle"].index
cons_train_index = train[train.cp_type!="ctl_vehicle"].index
noncons_test_index = test[test.cp_type=="ctl_vehicle"].index
cons_test_index = test[test.cp_type!="ctl_vehicle"].index

train = train[train.index.isin(cons_train_index)].copy().reset_index(drop=True)
test = test[test.index.isin(cons_test_index)].copy().reset_index(drop=True)
targets = targets[targets.index.isin(cons_train_index)].copy().reset_index(drop=True)

In [12]:
target_feats = [ i for i in targets.columns if i != "sig_id"]
g_feats = [i for i in train.columns if "g-" in i]
c_feats = [i for i in train.columns if "c-" in i]

In [13]:
NB_SPLITS = 7
seed = 34

folds = []
    
# LOAD FILES
train_score = targets.merge(drug, on='sig_id', how='left') 

# LOCATE DRUGS
vc = train_score.drug_id.value_counts()
vc1 = vc.loc[(vc==6)|(vc==12)|(vc==18)].index.sort_values()
vc2 = vc.loc[(vc!=6)&(vc!=12)&(vc!=18)].index.sort_values()
    
# STRATIFY DRUGS 18X OR LESS
dct1 = {}; dct2 = {}
skf = MultilabelStratifiedKFold(n_splits = NB_SPLITS, shuffle = True, random_state = seed)
tmp = train_score.groupby('drug_id')[target_feats].mean().loc[vc1]
for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[target_feats])):
    dd = {k:fold for k in tmp.index[idxV].values}
    dct1.update(dd)
    
# STRATIFY DRUGS MORE THAN 18X
skf = MultilabelStratifiedKFold(n_splits = NB_SPLITS, shuffle = True, random_state = seed)
tmp = train_score.loc[train_score.drug_id.isin(vc2)].reset_index(drop = True)
for fold,(idxT,idxV) in enumerate(skf.split(tmp,tmp[target_feats])):
    dd = {k:fold for k in tmp.sig_id[idxV].values}
    dct2.update(dd)

# ASSIGN FOLDS
train_score['fold'] = train_score.drug_id.map(dct1)
train_score.loc[train_score.fold.isna(),'fold'] = train_score.loc[train_score.fold.isna(),'sig_id'].map(dct2)
train_score.fold = train_score.fold.astype('int8')
folds.append(train_score.fold.values)
    
np.array(folds)

array([[6, 4, 3, ..., 3, 3, 2]], dtype=int8)

In [14]:
train = train.drop(g_feats + c_feats + ["cp_type"], axis=1)
test = test.drop(g_feats + c_feats + ["cp_type"], axis=1)

In [15]:
train = train.merge(transformed_features, on="sig_id", how="left")
test = test.merge(transformed_features, on="sig_id", how="left")

In [16]:
def fe(df):
    tmp = df.copy()
    tmp = pd.get_dummies(tmp, columns=['cp_time','cp_dose'])
    tmp.drop(["sig_id"], axis=1, inplace=True)
    return tmp
train = fe(train)
test = fe(test)

In [17]:
#train["fold"] = np.array(folds).reshape(-1,1)

In [18]:
fn_train = train.copy().to_numpy()
fn_test = test.copy().to_numpy()

fn_targets = targets.drop("sig_id", axis=1).copy().to_numpy()

In [19]:
class SmoothCrossEntropyLoss(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets, n_classes, smoothing=0.0):
        assert 0 <= smoothing <= 1
        with torch.no_grad():
            targets = targets * (1 - smoothing) + torch.ones_like(targets).to(device) * smoothing / n_classes
        return targets

    def forward(self, inputs, targets):
        targets = SmoothCrossEntropyLoss()._smooth(targets, inputs.shape[1], self.smoothing)

        if self.weight is not None:
            inputs = inputs * self.weight.unsqueeze(0)

        loss = F.binary_cross_entropy_with_logits(inputs, targets)

        return loss

In [20]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets, hidden_size1=388,hidden_size2=512,drop_rate1=0.0,drop_rate2=0.3,drop_rate3=0.3):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(drop_rate1)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size1))

        self.batch_norm2 = nn.BatchNorm1d(hidden_size1)
        self.dropout2 = nn.Dropout(drop_rate2)
        self.dense2 = nn.utils.weight_norm(nn.Linear(hidden_size1, hidden_size2))

        self.batch_norm3 = nn.BatchNorm1d(hidden_size2)
        self.dropout3 = nn.Dropout(drop_rate3)
        self.dense3 = nn.utils.weight_norm(nn.Linear(hidden_size2, num_targets))
        
    def forward(self, x):
        
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.relu(self.dense2(x))
        
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)
        
        return x

In [21]:
#params for one cycle schedule
device = ('cuda' if torch.cuda.is_available() else 'cpu')
train_epochs = 30
batch_size = 256
n_folds = 7

smoothing = 0.001
p_min = smoothing
p_max = 1 - p_min

def mean_log_loss(y_true, y_pred):
    metrics = []
    for i, target in enumerate(target_feats):
        metrics.append(log_loss(y_true[:, i], y_pred[:, i].astype(float), labels=[0,1]))
    return np.mean(metrics)

def modelling_dae(tr, target, te, sample_seed, init_num, last_num):
    seed_everything(seed=sample_seed) 
    X_train = tr.copy()
    y_train = target.copy()
    X_test = te.copy()
    test_len = X_test.shape[0]
    
    metric = lambda inputs, targets : F.binary_cross_entropy((torch.clamp(torch.sigmoid(inputs), p_min, p_max)), targets)

    models = []
    
    X_test2 = torch.tensor(X_test, dtype=torch.float32)
    test = torch.utils.data.TensorDataset(X_test2) 
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)
    
    oof = np.zeros([len(X_train),y_train.shape[1]])
    oof_targets = np.zeros([len(X_train),y_train.shape[1]])
    pred_value = np.zeros([test_len, y_train.shape[1]])
    scores = []
    mskf=MultilabelStratifiedKFold(n_splits = n_folds, shuffle=True, random_state=224)
    for fold, (train_index, valid_index) in enumerate(mskf.split(X_train, y_train)):
        print("Fold "+str(fold+1))
        X_train2 = torch.tensor(X_train[train_index,:], dtype=torch.float32)
        X_valid2 = torch.tensor(X_train[valid_index,:], dtype=torch.float32)

        y_train2 = torch.tensor(y_train[train_index], dtype=torch.float32)
        y_valid2 = torch.tensor(y_train[valid_index], dtype=torch.float32)
        
        train = torch.utils.data.TensorDataset(X_train2, y_train2)
        valid = torch.utils.data.TensorDataset(X_valid2, y_valid2)
        
        train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True) 
        valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False)
            
        clf = Model(init_num, last_num)
        loss_fn = SmoothCrossEntropyLoss(smoothing=smoothing)

        optimizer = optim.Adam(clf.parameters(), lr = 5e-3, weight_decay=1e-5) 
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=0.1, eps=1e-4, verbose=True)                   
        
        clf.to(device)
        
        best_val_loss = np.inf
        stop_counts = 0
        for epoch in range(train_epochs):
            start_time = time.time()
            clf.train()
            avg_loss = 0.
            sm_avg_loss = 0.
            for x_batch, y_batch in tqdm(train_loader, disable=True):
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)
                y_pred = clf(x_batch) 
                loss = loss_fn(y_pred, y_batch)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                avg_loss += loss.item() / len(train_loader)  
                sm_avg_loss += metric(y_pred, y_batch) / len(train_loader) 
                
            clf.eval()
            avg_val_loss = 0.
            sm_avg_val_loss = 0.
            for i, (x_batch, y_batch) in enumerate(valid_loader): 
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)
                y_pred = clf(x_batch).detach()
                avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
                sm_avg_val_loss += metric(y_pred, y_batch) / len(valid_loader)
        
            elapsed_time = time.time() - start_time 
            scheduler.step(sm_avg_val_loss)
                    
            if sm_avg_val_loss < best_val_loss:
                best_val_loss = sm_avg_val_loss
                print('Epoch {}  loss={:.5f}  val_loss={:.5f}  sm_loss={:.5f}  sm_val_loss={:.5f}  time={:.2f}s'.format(
                    epoch + 1, avg_loss, avg_val_loss, sm_avg_loss, sm_avg_val_loss, elapsed_time))
                torch.save(clf.state_dict(), 'best-model-parameters.pt')
            else:
                stop_counts += 1
        
        pred_model = Model(init_num, last_num)
        pred_model.load_state_dict(torch.load('best-model-parameters.pt'))         
        pred_model.eval()
        
        # validation check ----------------
        oof_epoch = np.zeros([X_valid2.size(0), y_train.shape[1]])
        target_epoch = np.zeros([X_valid2.size(0), y_train.shape[1]])
        for i, (x_batch, y_batch) in enumerate(valid_loader): 
                y_pred = pred_model(x_batch).detach()
                oof_epoch[i * batch_size:(i+1) * batch_size,:] = torch.clamp(torch.sigmoid(y_pred.cpu()), p_min, p_max)
                target_epoch[i * batch_size:(i+1) * batch_size,:] = y_batch.cpu().numpy()
        print("Fold {} log loss: {}".format(fold+1, mean_log_loss(target_epoch, oof_epoch)))
        scores.append(mean_log_loss(target_epoch, oof_epoch))
        oof[valid_index,:] = oof_epoch
        oof_targets[valid_index,:] = target_epoch
        #-----------------------------------
        
        # test predcition --------------
        test_preds = np.zeros([test_len, y_train.shape[1]])
        for i, (x_batch,) in enumerate(test_loader): 
            y_pred = pred_model(x_batch).detach()
            test_preds[i * batch_size:(i+1) * batch_size, :] = torch.clamp(torch.sigmoid(y_pred.cpu()), p_min, p_max)
        pred_value += test_preds / n_folds
        # ------------------------------
        
    print("Seed {}".format(seed_))
    for i, ele in enumerate(scores):
        print("Fold {} log loss: {}".format(i+1, scores[i]))
    print("Std of log loss: {}".format(np.std(scores)))
    print("Total log loss: {}".format(mean_log_loss(oof_targets, oof)))
    
    return oof, oof_targets, pred_value

In [22]:
#    for fold in range(n_folds):
#        valid_index = X_train[:,-1] == fold
#        train_index = X_train[:,-1] != fold
#        print("Fold "+str(fold+1))
#        X_train2 = torch.tensor(X_train[train_index,:], dtype=torch.float32)
#        X_valid2 = torch.tensor(X_train[valid_index,:], dtype=torch.float32)
#        X_train2 = X_train2[:,:-1]
#        X_valid2 = X_valid2[:,:-1]

In [23]:
seeds = [0]
target_oof = np.zeros([len(fn_train),fn_targets.shape[1]])
target_pred = np.zeros([len(fn_test),fn_targets.shape[1]])

for seed_ in seeds:
    oof, oof_targets, pytorch_pred = modelling_dae(fn_train, fn_targets, fn_test, seed_, fn_train.shape[1], fn_targets.shape[1])
    target_oof += oof / len(seeds)
    target_pred += pytorch_pred / len(seeds)

print("Total log loss in targets: {}".format(mean_log_loss(oof_targets, target_oof)))

Fold 1
Epoch 1  loss=0.21784  val_loss=0.02529  sm_loss=0.21783  sm_val_loss=0.02529  time=0.74s
Epoch 2  loss=0.02284  val_loss=0.02110  sm_loss=0.02282  sm_val_loss=0.02106  time=0.77s
Epoch 3  loss=0.02018  val_loss=0.01922  sm_loss=0.02018  sm_val_loss=0.01924  time=0.85s
Epoch 4  loss=0.01886  val_loss=0.01844  sm_loss=0.01890  sm_val_loss=0.01845  time=0.85s
Epoch 5  loss=0.01805  val_loss=0.01799  sm_loss=0.01810  sm_val_loss=0.01804  time=0.68s
Epoch 6  loss=0.01751  val_loss=0.01778  sm_loss=0.01759  sm_val_loss=0.01784  time=0.64s
Epoch 7  loss=0.01730  val_loss=0.01775  sm_loss=0.01739  sm_val_loss=0.01783  time=0.65s
Epoch 8  loss=0.01679  val_loss=0.01750  sm_loss=0.01691  sm_val_loss=0.01763  time=0.79s
Epoch    12: reducing learning rate of group 0 to 5.0000e-04.
Epoch 13  loss=0.01505  val_loss=0.01709  sm_loss=0.01524  sm_val_loss=0.01723  time=0.63s
Epoch    17: reducing learning rate of group 0 to 5.0000e-05.
Fold 1 log loss: 0.017434605999642454
Fold 2
Epoch 1  loss

In [24]:
t = pd.read_csv(DATA_DIR + 'train_targets_scored.csv')
train_checkscore = t.copy()
train_checkscore.loc[train_checkscore.index.isin(cons_train_index),target_feats] = target_oof
train_checkscore.loc[train_checkscore.index.isin(noncons_train_index),target_feats] = 0
t.drop("sig_id", axis=1, inplace=True)
print('OOF log loss: ', log_loss(np.ravel(t), np.ravel(np.array(train_checkscore.iloc[:,1:]))))

OOF log loss:  0.01589027488578855


In [25]:
train_checkscore.to_csv("dae_oof.csv", index=False)

In [26]:
sub.loc[cons_test_index, target_feats] = target_pred
sub.loc[noncons_test_index,target_feats] = 0
sub.to_csv('submission.csv', index=False)