In [None]:
import torch
import torch.utils.data as data_utils
import pandas as pd
import numpy as np

In [None]:
train_df = pd.read_csv('../input/lish-moa/train_features.csv')
target_df = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
test_df = pd.read_csv('../input/lish-moa/test_features.csv')

In [None]:
def feature_array(df):
    cp_type_encoding = {'trt_cp': 1.0, 'ctl_vehicle': -1.0}
    cp_time_encoding = {24: -1.0, 48: 0.0, 72: 1.0}
    cp_dose_encoding = {'D1': -1.0, 'D2': 1.0}

    df['cp_type_enc'] = [cp_type_encoding[x] for x in df.cp_type]
    df['cp_time_enc'] = [cp_time_encoding[x] for x in df.cp_time]
    df['cp_dose_enc'] = [cp_dose_encoding[x] for x in df.cp_dose]

    cols = ['cp_type_enc', 'cp_time_enc', 'cp_dose_enc']
    cols += [c for c in df.columns if c.startswith('g-') or c.startswith('c-')]
    return df[cols].values

def target_array(df):
    return target_df[target_df.columns[1:]].values

def train_val_split(train_df, target_df, val_size=0.1):
    assert 0.0 <= val_size <= 1.0, 'val_size must lie within (0; 1)'
    X = feature_array(train_df)
    y = target_array(target_df)
    assert len(X) == len(y)
    val_count = int(len(X) * val_size)
    val_indices = np.random.choice(len(X), size=val_count, replace=False)
    X_val, y_val = X[val_indices], y[val_indices]
    X_train, y_train = np.delete(X, val_indices, axis=0), np.delete(y, val_indices, axis=0)
    return X_train, y_train, X_val, y_val

In [None]:
X_train, y_train, X_val, y_val = train_val_split(train_df, target_df)

X_train.shape, y_train.shape, X_val.shape, y_val.shape

In [None]:
train_dataset = data_utils.TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
val_dataset = data_utils.TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val))

train_loader = data_utils.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = data_utils.DataLoader(val_dataset, batch_size=32)

In [None]:
class MoaModel(torch.nn.Module):
    def __init__(self, features_num, targets_num):
        super(MoaModel, self).__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Linear(features_num, 512),
            torch.nn.Dropout(p=0.4),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 256),
            torch.nn.Dropout(p=0.4),
            torch.nn.ReLU(),
            torch.nn.Linear(256, targets_num),
        )

    def forward(self, x):
        logits = self.model(x)
        return logits

In [None]:
import torch.nn.functional as F

from ignite.contrib.handlers import ProgressBar
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss, RunningAverage

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = MoaModel(
    features_num=X_train.shape[1], 
    targets_num=y_train.shape[1],
)

model.to(device)

optimizer = torch.optim.Adam(model.parameters(), weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.3)

trainer = create_supervised_trainer(model, optimizer, F.binary_cross_entropy_with_logits, device=device)
evaluator = create_supervised_evaluator(
    model, metrics={"bce": Loss(F.binary_cross_entropy_with_logits)}, device=device
)

RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")

pbar = ProgressBar(persist=True)
pbar.attach(trainer, metric_names="all")

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
    evaluator.run(train_loader)
    metrics = evaluator.state.metrics
    avg_bce = metrics["bce"]
    pbar.log_message(
        "Training Results - Epoch: {}  Avg loss: {:.5f}".format(engine.state.epoch, avg_bce)
    )
    
@trainer.on(Events.EPOCH_COMPLETED)
def decrease_lr(engine):
    scheduler.step()

@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    evaluator.run(val_loader)
    metrics = evaluator.state.metrics
    avg_bce = metrics["bce"]
    pbar.log_message(
        "Validation Results - Epoch: {}  Avg loss: {:.5f}".format(engine.state.epoch, avg_bce)
    )
    pbar.n = pbar.last_print_n = 0

trainer.run(train_loader, max_epochs=10)

In [None]:
X_test = torch.FloatTensor(feature_array(test_df)).to(device)

with torch.no_grad():
    y_pred = torch.sigmoid(model(X_test)).cpu().numpy()

ss = pd.read_csv('../input/lish-moa/sample_submission.csv')

ss[ss.columns[1:]] = y_pred

ss.to_csv('submission.csv', index=None)