In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

from sklearn.model_selection import train_test_split

In [None]:
class MyModel(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features

        self.model = nn.Sequential(
            nn.BatchNorm1d(in_features),
            nn.Dropout(0.5),
            nn.Linear(in_features, 2048),#2 * in_features),
            nn.ReLU(),

            nn.BatchNorm1d(2048),
            nn.Dropout(0.5),
            nn.Linear(2048, 1024),
            nn.ReLU(),

#             nn.BatchNorm1d(4 * in_features),
#             nn.Dropout(0.5),
#             nn.Linear(4 * in_features, 2 * in_features),
#             nn.ReLU(),

            nn.BatchNorm1d(1024),
            nn.Dropout(0.5),
            nn.Linear(1024, out_features),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [None]:
def train_model(model, train_dataloader, test_dataloader=None, optimizer=None, scheduler=None, max_epoch=10, criterion=None):
    if optimizer is None:
        optimizer = torch.optim.Adam(model.parameters()) #SGD(model.parameters(), lr=1e-3)
    if scheduler is None:
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 0.1, 2)
    if criterion is None:
        criterion = nn.BCELoss()
    for epoch in range(max_epoch):
        model.train()
        for i, (x_batch, y_batch) in enumerate(train_dataloader):
            preds = model(x_batch)

            optimizer.zero_grad()
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            scheduler.step()
            if i % 20 == 0:
                print(f'Epoch: {epoch}, train loss: {loss.item():12.5f}')

        model.eval()
        with torch.no_grad():
            train, y_train = train_dataloader.dataset.tensors
            train_preds = model(train)
            train_loss = criterion(train_preds, y_train).item()
            if test_dataloader is not None:
                test, y_test = test_dataloader.dataset.tensors
                test_preds = model(test)
                test_loss = criterion(test_preds, y_test).item()
                print(f'Epoch: {epoch} final', f'test loss: {test_loss}', f'train loss: {train_loss}')
            else:
                print(f'Epoch: {epoch} final: train loss: {train_loss}')

In [None]:
from sklearn.model_selection import KFold
from copy import deepcopy

def train_ensemble(X, y, base_model=MyModel, n_models=5, max_epoch=50):#, optimizer=None, scheduler=None):
    criterion = nn.BCELoss()
    
    kf = KFold(n_splits=n_models, shuffle=True, random_state=48)
    models = []
    for i, (train_ind, val_ind) in enumerate(kf.split(X, y)):
        print(f'Training model {i+1}/{kf.n_splits}')

        x_train = torch.tensor(X.iloc[train_ind,:].values, dtype=torch.float32)
        y_train = torch.tensor(y.iloc[train_ind,:].values, dtype=torch.float32)
        x_val = torch.tensor(X.iloc[val_ind,:].values, dtype=torch.float32)
        y_val = torch.tensor(y.iloc[val_ind,:].values, dtype=torch.float32)

        train_dataloader = DataLoader(TensorDataset(x_train, y_train), batch_size=128, shuffle=True)
        val_dataloader = DataLoader(TensorDataset(x_val, y_val), batch_size=128)

        best_loss = np.inf
        best_model = None
        cur_model = base_model(x_train.shape[1], y_train.shape[1]).to('cuda')
#         if optimizer is None:
        optimizer = torch.optim.Adam(cur_model.parameters(),weight_decay=1e-5)
#         if scheduler is None:
#         scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 0.1, 20)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, eps=1e-4)
            
        for epoch in range(max_epoch):
            cur_model.train()
            for i, (x_batch, y_batch) in enumerate(train_dataloader):
                preds = cur_model(x_batch.to('cuda'))

                optimizer.zero_grad()
                loss = criterion(preds, y_batch.to('cuda'))
                loss.backward()
                optimizer.step()
#                 scheduler.step()

            cur_model.eval()
            with torch.no_grad(): 
                train_preds = cur_model(x_train.to('cuda'))
                train_loss = criterion(train_preds, y_train.to('cuda')).item()
                val_preds = cur_model(x_val.to('cuda'))
                val_loss = criterion(val_preds, y_val.to('cuda')).item()
            print(f'Epoch {epoch}: train loss: {train_loss}, val loss: {val_loss}')
            scheduler.step(val_loss)
            if val_loss < best_loss:
                best_model = deepcopy(cur_model)
                best_loss = val_loss

        models += [best_model]

    return models

In [None]:
best_features = ['g-392', 'c-65', 'g-100', 'c-9', 'g-50', 'c-79', 'c-98', 'g-37',
       'c-6', 'c-26', 'g-439', 'g-628', 'g-744', 'g-351', 'g-298', 'c-42',
       'g-410', 'g-761', 'c-18', 'g-418', 'g-146', 'c-57', 'c-64',
       'g-322', 'c-48', 'c-38', 'c-82', 'g-63', 'g-534', 'c-92', 'g-186',
       'c-28', 'g-486', 'c-70', 'g-672', 'g-91', 'g-731', 'c-10', 'g-386',
       'g-121', 'g-443', 'g-206', 'g-723', 'c-81', 'c-36', 'c-33', 'g-85',
       'g-235', 'g-406', 'g-683', 'c-52', 'c-62', 'c-63', 'g-365', 'c-21',
       'c-60', 'c-15', 'c-66', 'g-629', 'c-49', 'g-248', 'c-59', 'c-24',
       'c-76', 'g-669', 'g-106', 'g-38', 'g-140', 'c-30', 'c-22', 'g-72',
       'c-25', 'c-23', 'c-8', 'c-83', 'g-489', 'g-369', 'c-47', 'g-158',
       'g-297', 'g-147', 'c-5', 'c-77', 'g-163', 'g-332', 'g-344', 'c-50',
       'g-335', 'c-2', 'g-503', 'g-208', 'g-152', 'c-17', 'c-41', 'g-353',
       'c-34', 'g-664', 'c-96', 'g-228', 'c-67', 'g-569', 'g-750', 'g-30',
       'g-578', 'c-90', 'c-72', 'g-257', 'c-75', 'c-97', 'g-98', 'g-500',
       'c-1', 'g-728', 'c-44', 'g-360', 'c-85', 'g-195', 'c-31', 'c-11',
       'c-40', 'g-135', 'g-65', 'c-95', 'c-80', 'g-261', 'g-590', 'c-54',
       'c-51', 'c-13', 'c-12', 'g-201', 'g-83', 'g-468', 'g-58', 'g-478',
       'g-460', 'g-574', 'c-45', 'c-94', 'c-4', 'g-367', 'c-69', 'g-407',
       'c-73', 'g-349', 'g-155', 'g-113', 'g-350', 'c-91', 'g-546',
       'g-131', 'g-52', 'g-745', 'c-55', 'c-27', 'c-14', 'g-379', 'g-51',
       'g-199', 'g-241', 'g-568', 'g-10', 'c-93', 'g-508', 'c-84', 'c-78',
       'g-433', 'c-20', 'c-39', 'g-7', 'g-177', 'g-185']

def preprocess(features, target=None, is_train=True):
    out_features = features.drop(['sig_id'], axis=1).copy() #features[best_features].copy()
    out_features['cp_type'] = (features.cp_dose == 'trt_cp').astype(float)
    out_features['cp_dose'] = (features.cp_dose == 'D1').astype(float)
    out_features['cp_time'] = (features.cp_time == 48).astype(float)
#     if is_train:
#         out_features = out_features[features.cp_type == 'trt_cp']
        
    out_target = None   
    if target is not None:
        out_target = target.drop('sig_id', axis=1).copy()
#         if is_train:
#             out_target = out_target[features.cp_type == 'trt_cp']
            
    return out_features, out_target

In [None]:
data_dir = '../input/lish-moa/'

train_features = pd.read_csv(data_dir+'train_features.csv')
test_features = pd.read_csv(data_dir+'test_features.csv')
train_target = pd.read_csv(data_dir+'train_targets_scored.csv')

# train_target

In [None]:
x_train, y_train = preprocess(train_features, train_target)
test_zero_mask = test_features.cp_type == 'ctl_vehicle'
x_test, _ = preprocess(test_features, is_train=False)

In [None]:
class Autoencoder(nn.Module):
    def __init__(self, n_inputs, dropout=0.1):# encoding_dim=64, dropout=0.1):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.BatchNorm1d(n_inputs),
            nn.Dropout(dropout),
            nn.Linear(n_inputs, n_inputs // 2), #4 * encoding_dim),
            nn.ReLU(),
            
            nn.BatchNorm1d(n_inputs // 2), #4 * encoding_dim),
            nn.Dropout(dropout),
            nn.Linear(n_inputs // 2, n_inputs // 4),# 2 * encoding_dim),
            nn.ReLU(),
            
            nn.BatchNorm1d(n_inputs // 4), #2 * encoding_dim),
            nn.Dropout(dropout),
            nn.Linear(n_inputs // 4, n_inputs // 8), #2 * encoding_dim,  encoding_dim),
            nn.ReLU()
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(n_inputs // 8, n_inputs // 4),#encoding_dim, 2 * encoding_dim),
            nn.ReLU(),            
            nn.BatchNorm1d(n_inputs // 4),#2 * encoding_dim),
            nn.Dropout(dropout),
            
            nn.Linear(n_inputs // 4, n_inputs // 2),#2 * encoding_dim, 4 * encoding_dim),
            nn.ReLU(),            
            nn.BatchNorm1d(n_inputs // 2),#4 * encoding_dim),
            nn.Dropout(dropout),
            
            nn.Linear(n_inputs // 2, n_inputs)#4 * encoding_dim, n_inputs)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

In [None]:
gen_f = [col for col in x_train.columns if col.startswith('g-')]
cell_f = [col for col in x_train.columns if col.startswith('c-')]

In [None]:
x_train_cell, x_test_cell = train_test_split(x_train[cell_f], test_size = 0.2, random_state=111)

In [None]:
train_dataset = TensorDataset(torch.tensor(x_train_cell.values, dtype=torch.float32),
                              torch.tensor(x_train_cell.values, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(x_test_cell.values, dtype=torch.float32),
                             torch.tensor(x_test_cell.values, dtype=torch.float32))

train_dataloader = DataLoader(train_dataset, batch_size=256)
test_dataloader = DataLoader(test_dataset, batch_size=256)

In [None]:
cells_autoencoder = Autoencoder(len(cell_f))

optimizer = torch.optim.Adam(cells_autoencoder.parameters(), weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, eps=1e-4)

train_model(cells_autoencoder, train_dataloader, test_dataloader, max_epoch=30, criterion=nn.MSELoss())

In [None]:
with torch.no_grad():
    ae_cells_train = cells_autoencoder.encoder(torch.tensor(x_train[cell_f].values, dtype=torch.float32)).numpy()

In [None]:
x_train_gen, x_test_gen = train_test_split(x_train[gen_f], test_size = 0.2, random_state=111)

In [None]:
train_dataset = TensorDataset(torch.tensor(x_train_gen.values, dtype=torch.float32),
                              torch.tensor(x_train_gen.values, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(x_test_gen.values, dtype=torch.float32),
                             torch.tensor(x_test_gen.values, dtype=torch.float32))

train_dataloader = DataLoader(train_dataset, batch_size=256)
test_dataloader = DataLoader(test_dataset, batch_size=256)

In [None]:
gen_autoencoder = Autoencoder(len(gen_f))#, encoding_dim=128)

optimizer = torch.optim.Adam(gen_autoencoder.parameters(), weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, eps=1e-4)

train_model(gen_autoencoder, train_dataloader, test_dataloader, max_epoch=30, criterion=nn.MSELoss())

In [None]:
with torch.no_grad():
    ae_gen_train = gen_autoencoder.encoder(torch.tensor(train_features[gen_f].values, dtype=torch.float32)).numpy()

In [None]:
train_enc = pd.DataFrame(np.concatenate((ae_gen_train, ae_cells_train),axis=1))
train_enc = pd.concat((x_train, train_enc), axis=1)

In [None]:
ensemble = train_ensemble(train_enc, y_train)

In [None]:
np.save('best_ensemble_pca.npy', ensemble)

In [None]:
ensemble = np.load('best_ensemble_pca.npy', allow_pickle=True)

In [None]:
with torch.no_grad():
    ae_gen_test= gen_autoencoder.encoder(torch.tensor(x_test[gen_f].values, dtype=torch.float32)).numpy()
    ae_cells_test = cells_autoencoder.encoder(torch.tensor(x_test[cell_f].values, dtype=torch.float32)).numpy()

In [None]:
test_enc = pd.DataFrame(np.concatenate((ae_gen_test, ae_cells_test),axis=1))
test_enc = pd.concat((x_test, test_enc), axis=1)

In [None]:
# ensemble = np.load('../input/nn-ensemble-for-moa-predictions/best_ensemble_pca.npy', allow_pickle=True).tolist()

In [None]:
# model.eval()
# with torch.no_grad():
#     preds = model(torch.tensor(x_test.values, dtype=torch.float32))

preds = []
for model in ensemble:
    model.eval()
    with torch.no_grad():
        preds += [model(torch.tensor(test_enc.values, dtype=torch.float32, device='cuda'))]

preds = torch.stack(preds, dim=0).mean(dim=0)

In [None]:
preds[test_zero_mask] = 0.

In [None]:
sample_submission = pd.read_csv(data_dir + 'sample_submission.csv')

In [None]:
sample_submission

In [None]:
sample_submission.iloc[:, 1:] = preds.cpu()

In [None]:
sample_submission

In [None]:
sample_submission.to_csv('submission.csv', index=False)