In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import datetime
import random
import os
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score

## CFG

In [None]:
config = {
          'folds':18, 'lr': 0.00028, 'seed': 666, 
          'batch size': 128,    
          'inf_in':7, 'inf_out':32,
          'g_in': 256, 'env2': 256, 'env3': 512, 
          'g_in': 772, 'g_h1': 256, 'g_h2': 256, 'g_out': 2048, 'g_dropout':0.3015,
          'c_in': 100, 'c_h1': 256, 'c_h2': 256, 'c_out': 128, 'c_dropout':0.3015,
          'merge_out':2048, 'classes_h1': 4096, 'classes_h2': 512, 'classes':206, 'dropout':0.3015, 
         }

## SEED

In [None]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    

seed_everything(666)

## LOAD DATA

In [None]:
train_features = pd.DataFrame(pd.read_csv('../input/lish-moa/train_features.csv'))
train_targets_nonscored = pd.DataFrame(pd.read_csv('../input/lish-moa/train_targets_nonscored.csv'))
train_targets_scored = pd.DataFrame(pd.read_csv('../input/lish-moa/train_targets_scored.csv'))
test_features = pd.DataFrame(pd.read_csv('../input/lish-moa/test_features.csv'))

## Class Dataset

In [None]:
cp_type={'trt_cp':np.array([1,0]), 'ctl_vehicle':np.array([0,1])}
cp_time={24:np.array([0,0,1]), 48:np.array([0,1,0]), 72:np.array([1,0,0])}
cp_dose={'D1':np.array([1,0]), 'D2':np.array([0,1])}


class Dataset:
    def __init__(self, x, y):        
        self.x = x      
        self.y = y.set_index('sig_id')
                

    def __getitem__(self, item):  
        row = self.x.iloc[item].values
        
        
        y = self.y.loc[row[0]]
        xg = np.float32(row[4:4+772])        
        xc = np.float32(row[4+772:])
        X = np.float32(np.concatenate([cp_type[row[1]], cp_time[row[2]], cp_dose[row[3]], xg, xc]))       
        Y = np.float32(y.values)        
        
        return X, Y


    def __len__(self):
        return len(self.x)
    
class DatasetTest:
    def __init__(self, x):        
        self.x = x      
                

    def __getitem__(self, item):  
        row = self.x.iloc[item].values
        
        X = np.float32(np.concatenate([cp_type[row[1]], cp_time[row[2]], cp_dose[row[3]], row[4:]]))
        id_ = row[0]
        
        return X, id_


    def __len__(self):
        return len(self.x)

## Class MOAModel

In [None]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1), self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

def init_layer(layer):
    nn.init.xavier_uniform_(layer.weight)

    if hasattr(layer, "bias"):
        if layer.bias is not None:
            layer.bias.data.fill_(0.)

def init_bn(bn):
    bn.bias.data.fill_(0.)
    bn.weight.data.fill_(1.0)    


class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        x = x * (torch.tanh(F.softplus(x)))
        return x


class SigmoidSoftplus(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, x):
        x = torch.sigmoid(x*F.softplus(x))
        return x

    

    
class FC(nn.Module):
    def __init__(self, in_features, h1, h2, out_features, drop, layer_end=True):
        
        super().__init__()
        self.l1   = nn.Linear(in_features, h1)
        self.bn1  = nn.BatchNorm1d(h1)
        self.act1 = nn.LogSigmoid()            
        self.dr1  = nn.Dropout(drop)
        
        self.l2 = nn.Linear(h1, h2)
        
        if layer_end == True:
            self.bn2  = nn.BatchNorm1d(h2)
            self.act2 = nn.LogSigmoid()            
            self.dr2  = nn.Dropout(drop)  
            
            self.l3   = nn.utils.weight_norm(nn.Linear(h2, out_features))            
        else:
            self.l3 = None
        
        init_layer(self.l1)      
        init_layer(self.l2)  
        if layer_end:
            init_layer(self.l3)
            
        init_bn(self.bn1)
        if layer_end:
            init_bn(self.bn2)
        
        
    def forward(self, x):
        x = self.l1(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.dr1(x)
        
        x = self.l2(x)
        
        if self.l3 is not None:
            x = self.bn2(x)
            x = self.act2(x)
            x = self.dr2(x)

            x = self.l3(x)
        return x        
        
class ENV(nn.Module):

    def __init__(self, config):
        super().__init__()
        
        self.bn_inf = nn.BatchNorm1d(config['inf_in'])      
        
        self.inf = nn.Linear(config['inf_in'], config['inf_out'])        
        self.g = FC(config['g_in'], config['g_h1'], config['g_h2'], config['g_out'], config['g_dropout'], True)
        self.c = FC(config['c_in'], config['c_h1'], config['c_h2'], config['c_out'], config['c_dropout'], True)
        
        self.merge = nn.utils.weight_norm(nn.Linear(config['g_out'] + config['c_out'] + 32, config['merge_out']))
        
        init_bn(self.bn_inf)
        init_layer(self.inf)

    def forward(self, x):  
        x_0 = x
        x1 = x[:, :7]
        x2 = x[:, 7:7+772]        
        x3 = x[:, 7+772:]
        
        inf = self.inf(self.bn_inf(x1))           
        g = self.g(x2)
        c = self.c(x3)           

        x = torch.cat([g, c, inf], 1)     
        x = self.merge(x)  
        
        return x        

class MOAModel(nn.Module):

    def __init__(self, config):
        super().__init__()
        
        self.env  = ENV(config)
        self.act1 = SigmoidSoftplus()
        
        self.fc = FC(config['merge_out'], config['classes_h1'], config['classes_h2'], config['classes'], config['dropout'], False)
        #self.mish = Mish()

        self.bn2  = nn.BatchNorm1d(config['classes_h2'])
        self.act2 = nn.LogSigmoid()        
        self.dr2  = nn.Dropout(config['dropout'])
        self.l2   = nn.utils.weight_norm(nn.Linear(config['classes_h2'], config['classes']))
               
        init_bn(self.bn2)    
        init_layer(self.l2)    

    def forward(self, x):
        x = self.env(x)
        
        #x = x*x.sigmoid()
        #x = self.mish(x)
        x = self.act1(x) # <---?  
        
        b = x.size(0)
        x = x.reshape(b, -1)
        
        x = self.fc(x)
        
        x = self.bn2(x)
        x = self.act2(x)
        x = self.dr2(x)
        x = self.l2(x)
        
        return x

## Create Dataset

In [None]:
class_idx = np.stack([train_targets_scored.sum()[1:].index, train_targets_scored.sum()[1:].values], 1)
idxs_sort = np.argsort(class_idx[:, 1])

sort_classes = []
for cl in class_idx[:, 0][idxs_sort]:
    sort_classes += [cl]


FOLDS = config['folds']
dataset_train = []
dataset_valid = []
folds = {}
td = train_targets_scored.copy()
for cl in sort_classes:    
    v = td[td[cl] == 1].sample(frac=1).reset_index(drop=True)    
    k = np.int32(np.ceil(len(v)/FOLDS))
    x = np.arange(FOLDS)
    x = np.tile(x, k)
    for k, d in zip(v['sig_id'].values, x[:len(v)]):        
        folds[k] = d
    
    td = td[td[cl] != 1].copy()
    
    #print(cl)
v = td[td.sum(1) == 0]
k = np.int32(np.ceil(len(v)/FOLDS))
x = np.arange(FOLDS)
x = np.tile(x, k)
for k, d in zip(v['sig_id'].values, x[:len(v)]):        
    folds[k] = d
    
scored = train_targets_scored.copy()
def func(v):
    return folds[v]
scored['fold'] = train_targets_scored['sig_id'].apply(func)

In [None]:
dataset_train = train_features[scored['fold'] != config['folds'] - 1].reset_index(drop=True)
dataset_valid = train_features[scored['fold'] == config['folds'] - 1].reset_index(drop=True)

print(len(dataset_train), len(dataset_valid))

In [None]:
BATCH_SIZE = config["batch size"]
dts_train = Dataset(dataset_train, train_targets_scored)
dts_valid = Dataset(dataset_valid, train_targets_scored)

dts_test  = DatasetTest(test_features)
train_loader =  torch.utils.data.DataLoader(dts_train, 
                                            batch_size=BATCH_SIZE, 
                                            shuffle=True, 
                                            sampler=None, 
                                            #collate_fn=collate_fn, 
                                            num_workers=4)
valid_loader =  torch.utils.data.DataLoader(dts_valid, 
                                            batch_size=1, 
                                            shuffle=False, 
                                            sampler=None, 
                                            #collate_fn=collate_fn, 
                                            num_workers=4)
print(len(train_loader), len(valid_loader))

## Create Model

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MOAModel(config)
model = model.to(device)

In [None]:
#criteric1 = SmoothBCEwLogits(smoothing = 0.001)
criteric1 = nn.BCEWithLogitsLoss()
criteric2 = nn.BCEWithLogitsLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1,  last_epoch=-1)

In [None]:
class GaussianNoise(nn.Module):
    """Gaussian noise regularizer.

    Args:
        sigma (float, optional): relative standard deviation used to generate the
            noise. Relative means that it will be multiplied by the magnitude of
            the value your are adding the noise to. This means that sigma can be
            the same regardless of the scale of the vector.
        is_relative_detach (bool, optional): whether to detach the variable before
            computing the scale of the noise. If `False` then the scale of the noise
            won't be seen as a constant but something to optimize: this will bias the
            network to generate vectors with smaller values.
    """

    def __init__(self, sigma=0.1, is_relative_detach=True):
        super().__init__()
        self.sigma = sigma
        self.is_relative_detach = is_relative_detach
        self.noise = torch.tensor(0).to(device)

    def forward(self, x):
        if self.training and self.sigma != 0:
            scale = self.sigma * x.detach() if self.is_relative_detach else self.sigma * x
            sampled_noise = self.noise.repeat(*x.size()).float().normal_() * scale
            x = x + sampled_noise
        return x 
    
    

In [None]:
def mixup_data(x, y, alpha=1.0, use_cuda=True):
    if alpha > 0.:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1.
    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index,:]
    y_a, y_b = y, y[index]
    
    return mixed_x, y_a, y_b, lam


In [None]:
best_loss = np.inf
epoch_start = 1
model_name_best = ''

## Train

In [None]:
seed_everything(config['seed'])

In [None]:
print(datetime.datetime.now())

EARLY_STOPPING_STEPS = 30
EARLY_STOP = True

early_stopping_steps = EARLY_STOPPING_STEPS
early_step = 0
    
noise = GaussianNoise(0.33)
history = []
for epoch in range(epoch_start, 15000+1):
    model.train()
    loss_total = 0.0
    count = 0
    for b_idx, xy in enumerate(train_loader):
        X = torch.FloatTensor(xy[0])
        Y = torch.FloatTensor(xy[1])
        
        X = X.to(device)
        Y = Y.to(device)
              
        for param in model.parameters():
            param.grad = None
        
            
        k = 6 #np.random.randint(2)
        if k == 0:            
            pred = model(X)   
            loss = criteric1(pred, Y)  
            
        if k == 1:
            mixed_x, y_a, y_b, lam = mixup_data(X, Y, 0.4)
            pred = model(mixed_x)    
            loss = lam * criteric1(pred, y_a) + (1 - lam) * criteric1(pred, y_b) 
            
        if k == 2:   
            X = noise(X)
            pred = model(X)   
            loss = criteric1(pred, Y) 
            
        if k == 3:  
            mixed_x, y_a, y_b, lam = mixup_data(X, Y, 0.4)
            mixed_x = noise(mixed_x)
            pred = model(mixed_x)    
            loss = lam * criteric1(pred, y_a) + (1 - lam) * criteric1(pred, y_b) 

        if k == 4:  
            X = noise(X)
            mixed_x, y_a, y_b, lam = mixup_data(X, Y, 0.4)
            mixed_x = noise(mixed_x)
            pred = model(mixed_x)    
            loss = lam * criteric1(pred, y_a) + (1 - lam) * criteric1(pred, y_b) 
            

        if k == 5:              
            mixed_x, y_a, y_b, lam = mixup_data(X, Y, 0.4)
            mixed_x = noise(mixed_x)
            pred = model(mixed_x)    
            loss = lam * criteric1(pred, y_a) + (1 - lam) * criteric1(pred, y_b) 
            
            
        if k == 6:   
            X = noise(X)
            X = noise(X)
            pred = model(X)   
            loss = criteric1(pred, Y) 
         
        loss.backward()      
        if b_idx % 1 == 0:
            optimizer.step()  
        
        scheduler.step(epoch + b_idx / len(train_loader))
        
        pred = torch.sigmoid(pred)
        
        loss_total += loss.item()
        count += 1
        
        loss_toat_mean = loss_total/count

    
    print(f"{datetime.datetime.now()}: {epoch}. Train | loss={(loss_toat_mean):.6f}")       
    
    model.eval()
    loss_total = 0.0
    count = 0
    
    for b_idx, xy in enumerate(valid_loader):
        X = torch.FloatTensor(xy[0])
        Y = torch.FloatTensor(xy[1])#
        X = X.to(device)
        Y = Y.to(device)
        
        with torch.no_grad():            
            pred = model(X)
        loss = criteric2(pred, Y)
        
        loss_total += loss.item()
        count += 1
        
        pred = torch.sigmoid(pred)        
        loss_toat_mean = loss_total/count        
        
    print(f"{datetime.datetime.now()}: {epoch}. Valid | loss={(loss_toat_mean):.6f}")
    
    if best_loss >= loss_total/count :        
        print(f"\tSave: {best_loss} -> {(loss_total/count)}")
        
        best_loss = loss_total/count
        if os.path.isfile(model_name_best):
            os.remove(model_name_best)
        model_name_best = f'{best_loss}_model_base.pt'
        torch.save(model.state_dict(), model_name_best)
        
        early_step = 0
        
    elif(EARLY_STOP == True):            
        early_step += 1
        if (early_step >= early_stopping_steps):
            break
            
    epoch_start += 1

In [None]:
print(f'BEST: {best_loss})

In [None]:
msd = torch.load(f'{best_loss}_model_base.pt', map_location=device)
model.load_state_dict(msd)
model = model.to(device)

In [None]:
BATCH_SIZE = 1
test_features = pd.DataFrame(pd.read_csv('../input/lish-moa/test_features.csv'))

test_loading =  torch.utils.data.DataLoader(DatasetTest(test_features), 
                                            batch_size=BATCH_SIZE, 
                                            shuffle=False, 
                                            sampler=None, 
                                            num_workers=4)

In [None]:
train_targets_scored = pd.DataFrame(pd.read_csv('../input/lish-moa/train_targets_scored.csv'))
train_targets_scored

In [None]:
test_features = pd.DataFrame(pd.read_csv('../input/lish-moa/test_features.csv'))
sample_submission = pd.DataFrame(pd.read_csv('../input/lish-moa/sample_submission.csv'))

In [None]:
columns=train_targets_scored.columns

In [None]:
print(datetime.datetime.now())
res = []
model.eval()
for b_idx, xy in enumerate(test_loading):
    X = torch.FloatTensor(xy[0])#[None, :]
    X = X.to(device)

    
    with torch.no_grad():
        pred = model(X)
        pred = torch.sigmoid(pred)

    for i in range(len(pred)):
        pr = pred[i].cpu().data.numpy()
        pr = np.clip(pr,0.0005,0.999)
        res.append([xy[1][i]] + pr.tolist())
    
print(len(res))

In [None]:
res_test = pd.DataFrame(res, columns=train_targets_scored.columns)
res_test

In [None]:
target_cols = train_targets_scored.drop('sig_id', axis=1).columns.values.tolist()

In [None]:
sub = sample_submission.drop(columns=target_cols).merge(res_test[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
sub.to_csv('submission.csv', index=False)

In [None]:
sub

## End