In [None]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import seaborn as sns
 
from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
 
import warnings
warnings.filterwarnings('ignore')

In [None]:
!pip install /kaggle/input/iterative-stratification/iterative-stratification-master/
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [None]:
seed = 42

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
seed_everything(seed)

In [None]:
def preprocessor_nn_transfer():
    variance_threshould = 0.8
    ncompo_genes = 600
    ncompo_cells = 50
    
    data_dir = '../input/lish-moa/'
    train_features = pd.read_csv(data_dir + 'train_features.csv')
    train_targets_scored = pd.read_csv(data_dir + 'train_targets_scored.csv')
    train_targets_nonscored = pd.read_csv(data_dir + 'train_targets_nonscored.csv')
    train_drug = pd.read_csv(data_dir + 'train_drug.csv')
    test_features = pd.read_csv(data_dir + 'test_features.csv')
    sample_submission = pd.read_csv(data_dir + 'sample_submission.csv')
    
    train_features = train_features[train_features['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)
    test_features = test_features[test_features['cp_type'] != 'ctl_vehicle'].reset_index(drop=True)
    # drop cp_type
    train_features = train_features.drop('cp_type', axis=1)
    test_features = test_features.drop('cp_type', axis=1)

    
    GENES = [col for col in train_features.columns if col.startswith('g-')]
    CELLS = [col for col in train_features.columns if col.startswith('c-')]
    
    # Rank Gauss
    for col in (GENES + CELLS):
        transformer = QuantileTransformer(n_quantiles=100,random_state=0, output_distribution="normal")
        vec_len = len(train_features[col].values)
        vec_len_test = len(test_features[col].values)
        raw_vec = train_features[col].values.reshape(vec_len, 1)
        transformer.fit(raw_vec)

        train_features[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
        test_features[col] = transformer.transform(test_features[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]
    
#     print('Rank Gauss')
#     print('train_features: {}'.format(train_features.shape))
#     print('test_features: {}'.format(test_features.shape))
    
    # PCA
    # GENES
    pca_genes = PCA(n_components = ncompo_genes, random_state = seed)
    pca_genes_train = pca_genes.fit_transform(train_features[GENES])
    # CELLS
    pca_cells = PCA(n_components = ncompo_cells, random_state = seed)
    pca_cells_train = pca_cells.fit_transform(train_features[CELLS])
    #train
    pca_genes_train = pd.DataFrame(pca_genes_train, columns = [f"pca_g-{i}" for i in range(ncompo_genes)])
    pca_cells_train = pd.DataFrame(pca_cells_train, columns = [f"pca_c-{i}" for i in range(ncompo_cells)])
    train_features = pd.concat([train_features, pca_genes_train, pca_cells_train], axis = 1)
    #test
    pca_genes_test = pca_genes.transform(test_features[GENES])
    pca_cells_test = pca_cells.transform(test_features[CELLS])
    
    pca_genes_test = pd.DataFrame(pca_genes_test, columns = [f"pca_g-{i}" for i in range(ncompo_genes)])
    pca_cells_test = pd.DataFrame(pca_cells_test, columns = [f"pca_c-{i}" for i in range(ncompo_cells)])
    test_features = pd.concat([test_features, pca_genes_test, pca_cells_test], axis = 1)

#     print('\n\nPCA')
#     print('train_features: {}'.format(train_features.shape))
#     print('test_features: {}'.format(test_features.shape))
    
    # feature selection
    cols_numeric = [feat for feat in list(train_features.columns) if feat not in ["sig_id", "cp_time", "cp_dose"]]
    mask = (train_features[cols_numeric].var() >= variance_threshould).values
    tmp = train_features[cols_numeric].loc[:, mask]
    train_features = pd.concat([train_features[["sig_id", "cp_time", "cp_dose"]], tmp], axis = 1)
    cols_numeric = [feat for feat in list(train_features.columns) if feat not in ["sig_id", "cp_time", "cp_dose"]]
    test_features = pd.concat([test_features[["sig_id", "cp_time", "cp_dose"]], test_features.loc[:,cols_numeric]], axis = 1)
    
    # one hot
    train_features = pd.get_dummies(train_features, columns = ['cp_time', 'cp_dose'])
    test_features = pd.get_dummies(test_features, columns = ['cp_time', 'cp_dose'])

#     print('\n\nFeature selection')
#     print('train_features: {}'.format(train_features.shape))
#     print('test_features: {}'.format(test_features.shape))
    
    # Join
    train = train_features.merge(train_targets_scored, on='sig_id')
    train = train.merge(train_targets_nonscored, on='sig_id')
    train = train.merge(train_drug, on='sig_id')
    test = test_features
    
    gsquarecols=['g-574','g-211','g-216','g-0','g-255','g-577',
             'g-153','g-389','g-60','g-370','g-248','g-167',
             'g-203','g-177','g-301','g-332','g-517','g-6',
             'g-744','g-224','g-162','g-3','g-736','g-486',
             'g-283','g-22','g-359','g-361','g-440','g-335',
             'g-106','g-307','g-745','g-146','g-416','g-298',
             'g-666','g-91','g-17','g-549','g-145','g-157','g-768','g-568','g-396']

    for df in [train, test]:
        df['c52_c42'] = df['c-52'] * df['c-42']
        df['c13_c73'] = df['c-13'] * df['c-73']
        df['c26_c13'] = df['c-23'] * df['c-13']
        df['c33_c6'] = df['c-33'] * df['c-6']
        df['c11_c55'] = df['c-11'] * df['c-55']
        df['c38_c63'] = df['c-38'] * df['c-63']
        df['c38_c94'] = df['c-38'] * df['c-94']
        df['c13_c94'] = df['c-13'] * df['c-94']
        df['c4_c52'] = df['c-4'] * df['c-52']
        df['c4_c42'] = df['c-4'] * df['c-42']
        df['c13_c38'] = df['c-13'] * df['c-38']
        df['c55_c2'] = df['c-55'] * df['c-2']
        df['c55_c4'] = df['c-55'] * df['c-4']
        df['c4_c13'] = df['c-4'] * df['c-13']
        df['c82_c42'] = df['c-82'] * df['c-42']
        df['c66_c42'] = df['c-66'] * df['c-42']
        df['c6_c38'] = df['c-6'] * df['c-38']
        df['c2_c13'] = df['c-2'] * df['c-13']
        df['c62_c42'] = df['c-62'] * df['c-42']
        df['c90_c55'] = df['c-90'] * df['c-55']
    
        for feature in gsquarecols:
            if feature in GENES:
                df[f'{feature}_squared'] = df[feature] ** 2  
        for feature in CELLS:
            df[f'{feature}_squared'] = df[feature] ** 2 

#     print('\n\nJoin')
#     print('train: {}'.format(train.shape))
#     print('test: {}'.format(test.shape))
    
    
    target_cols = [x for x in train_targets_scored.columns if x != 'sig_id']
    aux_target_cols = [x for x in train_targets_nonscored.columns if x != 'sig_id']
    all_target_cols = target_cols + aux_target_cols

    num_targets = len(target_cols)
    num_aux_targets = len(aux_target_cols)
    num_all_targets = len(all_target_cols)

#     print('\n\nnum_targets: {}'.format(num_targets))
#     print('num_aux_targets: {}'.format(num_aux_targets))
#     print('num_all_targets: {}'.format(num_all_targets))
#     print('\n\n')
#     print(train.shape)
#     print(test.shape)
#     print(sample_submission.shape)
#     display(train.head())
#     print(train.columns.to_list())

    return train, test, num_targets, num_aux_targets, num_all_targets, target_cols, aux_target_cols, all_target_cols

In [None]:
train, test, num_targets, num_aux_targets, num_all_targets, target_cols, aux_target_cols, all_target_cols = preprocessor_nn_transfer()

In [None]:
pd.set_option('max_rows', None)
pd.set_option('max_columns', None)
pd.set_option('max_colwidth', None)
display(train.head())
display(test.head())

# Dataset Classes

In [None]:
class MoADataset:
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)
        }
        
        return dct
    
class TestDataset:
    def __init__(self, features):
        self.features = features
        
    def __len__(self):
        return (self.features.shape[0])
    
    def __getitem__(self, idx):
        dct = {
            'x' : torch.tensor(self.features[idx, :], dtype=torch.float)
        }

        return dct

In [None]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0
    
    for data in dataloader:
        optimizer.zero_grad()
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()

        final_loss += loss.item()
        
    final_loss /= len(dataloader)
    return final_loss

def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []
    
    for data in dataloader:
        inputs, targets = data['x'].to(device), data['y'].to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)

        final_loss += loss.item()
        valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    return final_loss, valid_preds

def inference_fn(model, dataloader, device):
    model.eval()
    preds = []
    
    for data in dataloader:
        inputs = data['x'].to(device)

        with torch.no_grad():
            outputs = model(inputs)
        
        preds.append(outputs.sigmoid().detach().cpu().numpy())
        
    preds = np.concatenate(preds)
    return preds

In [None]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1

        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
            
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

# Model

In [None]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model, self).__init__()
        self.hidden_size = [1500, 1250, 1000, 750]
        self.dropout_value = [0.5, 0.35, 0.3, 0.25]

        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dense1 = nn.Linear(num_features, self.hidden_size[0])
        
        self.batch_norm2 = nn.BatchNorm1d(self.hidden_size[0])
        self.dropout2 = nn.Dropout(self.dropout_value[0])
        self.dense2 = nn.Linear(self.hidden_size[0], self.hidden_size[1])

        self.batch_norm3 = nn.BatchNorm1d(self.hidden_size[1])
        self.dropout3 = nn.Dropout(self.dropout_value[1])
        self.dense3 = nn.Linear(self.hidden_size[1], self.hidden_size[2])

        self.batch_norm4 = nn.BatchNorm1d(self.hidden_size[2])
        self.dropout4 = nn.Dropout(self.dropout_value[2])
        self.dense4 = nn.Linear(self.hidden_size[2], self.hidden_size[3])

        self.batch_norm5 = nn.BatchNorm1d(self.hidden_size[3])
        self.dropout5 = nn.Dropout(self.dropout_value[3])
        self.dense5 = nn.utils.weight_norm(nn.Linear(self.hidden_size[3], num_targets))
    
    def forward(self, x):
        x = self.batch_norm1(x)
        x = F.leaky_relu(self.dense1(x))
        
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = F.leaky_relu(self.dense2(x))

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = F.leaky_relu(self.dense3(x))

        x = self.batch_norm4(x)
        x = self.dropout4(x)
        x = F.leaky_relu(self.dense4(x))

        x = self.batch_norm5(x)
        x = self.dropout5(x)
        x = self.dense5(x)
        return x

In [None]:
class FineTuneScheduler:
    def __init__(self, epochs):
        self.epochs = epochs
        self.epochs_per_step = 0
        self.frozen_layers = []

    def copy_without_top(self, model, num_features, num_targets, num_targets_new):
        self.frozen_layers = []

        model_new = Model(num_features, num_targets)
        model_new.load_state_dict(model.state_dict())

        # Freeze all weights
        for name, param in model_new.named_parameters():
            layer_index = name.split('.')[0][-1]

            if layer_index == 5:
                continue

            param.requires_grad = False

            # Save frozen layer names
            if layer_index not in self.frozen_layers:
                self.frozen_layers.append(layer_index)

        self.epochs_per_step = self.epochs // len(self.frozen_layers)

        # Replace the top layers with another ones
        model_new.batch_norm5 = nn.BatchNorm1d(model_new.hidden_size[3])
        model_new.dropout5 = nn.Dropout(model_new.dropout_value[3])
        model_new.dense5 = nn.utils.weight_norm(nn.Linear(model_new.hidden_size[-1], num_targets_new))
        model_new.to(DEVICE)
        return model_new

    def step(self, epoch, model):
        if len(self.frozen_layers) == 0:
            return

        if epoch % self.epochs_per_step == 0:
            last_frozen_index = self.frozen_layers[-1]
            
            # Unfreeze parameters of the last frozen layer
            for name, param in model.named_parameters():
                layer_index = name.split('.')[0][-1]

                if layer_index == last_frozen_index:
                    param.requires_grad = True

            del self.frozen_layers[-1]  # Remove the last layer as unfrozen

# Preprocessing steps

In [None]:
feature_cols = [c for c in train.columns if c not in all_target_cols]
feature_cols = [c for c in feature_cols if c not in ['kfold', 'sig_id', 'drug_id']]
num_features = len(feature_cols)
num_features

In [None]:
# HyperParameters

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 24
BATCH_SIZE = 128

WEIGHT_DECAY = {'ALL_TARGETS': 1e-5, 'SCORED_ONLY': 3e-6}
MAX_LR = {'ALL_TARGETS': 1e-2, 'SCORED_ONLY': 3e-3}
DIV_FACTOR = {'ALL_TARGETS': 1e3, 'SCORED_ONLY': 1e2}
PCT_START = 0.1

In [None]:
# Show model architecture
model = Model(num_features, num_all_targets)
model

# Single fold training

In [None]:
from sklearn.model_selection import KFold

def make_cv_folds(train, SEEDS, NFOLDS, DRUG_THRESH):
    vc = train.drug_id.value_counts()
    vc1 = vc.loc[vc <= DRUG_THRESH].index.sort_values()
    vc2 = vc.loc[vc > DRUG_THRESH].index.sort_values()

    for seed_id in SEEDS:
        kfold_col = 'kfold_{}'.format(seed_id)
        
        # STRATIFY DRUGS 18X OR LESS
        dct1 = {}
        dct2 = {}

        skf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=seed_id)
        tmp = train.groupby('drug_id')[target_cols].mean().loc[vc1]

        for fold,(idxT, idxV) in enumerate(skf.split(tmp, tmp[target_cols])):
            dd = {k: fold for k in tmp.index[idxV].values}
            dct1.update(dd)

        # STRATIFY DRUGS MORE THAN 18X
        skf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=seed_id)
        tmp = train.loc[train.drug_id.isin(vc2)].reset_index(drop=True)

        for fold,(idxT, idxV) in enumerate(skf.split(tmp, tmp[target_cols])):
            dd = {k: fold for k in tmp.sig_id[idxV].values}
            dct2.update(dd)

        # ASSIGN FOLDS
        train[kfold_col] = train.drug_id.map(dct1)
        train.loc[train[kfold_col].isna(), kfold_col] = train.loc[train[kfold_col].isna(), 'sig_id'].map(dct2)
        train[kfold_col] = train[kfold_col].astype('int8')
        
    return train

SEEDS = list(range(42,49))
NFOLDS = 10
DRUG_THRESH = 18

train = make_cv_folds(train, SEEDS, NFOLDS, DRUG_THRESH)
train.head()

In [None]:
def run_training(fold_id, seed_id):
    seed_everything(seed_id)
    
    train_ = train
    test_ = test
    
    kfold_col = f'kfold_{seed_id}'
    trn_idx = train_[train_[kfold_col] != fold_id].index
    val_idx = train_[train_[kfold_col] == fold_id].index
    
    train_df = train_[train_[kfold_col] != fold_id].reset_index(drop=True)
    valid_df = train_[train_[kfold_col] == fold_id].reset_index(drop=True)
    
    def train_model(model, tag_name, target_cols_now, fine_tune_scheduler=None):
        x_train, y_train  = train_df[feature_cols].values, train_df[target_cols_now].values
        x_valid, y_valid =  valid_df[feature_cols].values, valid_df[target_cols_now].values
        
        train_dataset = MoADataset(x_train, y_train)
        valid_dataset = MoADataset(x_valid, y_valid)

        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
        
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=WEIGHT_DECAY[tag_name])
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer,
                                                  steps_per_epoch=len(trainloader),
                                                  pct_start=PCT_START,
                                                  div_factor=DIV_FACTOR[tag_name], 
                                                  max_lr=MAX_LR[tag_name],
                                                  epochs=EPOCHS)
        
        loss_fn = nn.BCEWithLogitsLoss()
        loss_tr = SmoothBCEwLogits(smoothing=0.001)

        oof = np.zeros((len(train), len(target_cols_now)))
        best_loss = np.inf
        
        for epoch in range(EPOCHS):
            if fine_tune_scheduler is not None:
                fine_tune_scheduler.step(epoch, model)

            train_loss = train_fn(model, optimizer, scheduler, loss_tr, trainloader, DEVICE)
            valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
            print(f"SEED: {seed_id}, FOLD: {fold_id}, {tag_name}, EPOCH: {epoch}, train_loss: {train_loss:.6f}, valid_loss: {valid_loss:.6f}")

            if np.isnan(valid_loss):
                break
            
            if valid_loss < best_loss:
                best_loss = valid_loss
                oof[val_idx] = valid_preds
                torch.save(model.state_dict(), f"{tag_name}_SEED_{seed_id}_FOLD{fold_id}_.pth")

        return oof

#     fine_tune_scheduler = FineTuneScheduler(EPOCHS)

#     pretrained_model = Model(num_features, num_all_targets)
#     pretrained_model.to(DEVICE)

#     # Train on scored + nonscored targets
#     train_model(pretrained_model, 'ALL_TARGETS', all_target_cols)

#     # Load the pretrained model with the best loss
#     pretrained_model = Model(num_features, num_all_targets)
#     pretrained_model.load_state_dict(torch.load(f"ALL_TARGETS_SEED_{seed_id}_FOLD{fold_id}_.pth"))
#     pretrained_model.to(DEVICE)

#     # Copy model without the top layer
#     final_model = fine_tune_scheduler.copy_without_top(pretrained_model, num_features, num_all_targets, num_targets)

#     # Fine-tune the model on scored targets only
#     oof = train_model(final_model, 'SCORED_ONLY', target_cols, fine_tune_scheduler)

    # Load the fine-tuned model with the best loss
    model = Model(num_features, num_targets)
    model.load_state_dict(torch.load(f"../input/nn-transferlearning-7seeds4248-10folds/SCORED_ONLY_SEED_{seed_id}_FOLD{fold_id}_.pth"))
    model.to(DEVICE)

    #--------------------- PREDICTION---------------------
    x_test = test_[feature_cols].values
    testdataset = TestDataset(x_test)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)
    
    predictions = np.zeros((len(test_), num_targets))
    predictions = inference_fn(model, testloader, DEVICE)
#     return oof, predictions
    print(f"Pretrained_Model_Loaded_SCORED_ONLYSEED: {seed_id}, FOLD: {fold_id} ")
    return 0.02, predictions

In [None]:
def run_k_fold(NFOLDS, seed_id):
    oof = np.zeros((len(train), len(target_cols)))
    predictions = np.zeros((len(test), len(target_cols)))
    
    for fold_id in range(NFOLDS):
        oof_, pred_ = run_training(fold_id, seed_id)
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

In [None]:
from time import time

# Averaging on multiple SEEDS
# SEED = [0, 1, 2, 3, 4, 5, 6]
oof = np.zeros((len(train), len(target_cols)))
predictions = np.zeros((len(test), len(target_cols)))

time_begin = time()

for seed_id in SEEDS:
    oof_, predictions_ = run_k_fold(NFOLDS, seed_id)
    oof += oof_ / len(SEEDS)
    predictions += predictions_ / len(SEEDS)

time_diff = time() - time_begin

train[target_cols] = oof
test[target_cols] = predictions

In [None]:
from datetime import timedelta
str(timedelta(seconds=time_diff))

In [None]:
data_dir = '../input/lish-moa/'
train_targets_scored = pd.read_csv(data_dir + 'train_targets_scored.csv')
train_targets_scored.head()

In [None]:
len(target_cols)

In [None]:
valid_results = train_targets_scored.drop(columns=target_cols).merge(train[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)

y_true = train_targets_scored[target_cols].values
y_pred = valid_results[target_cols].values

y_pred = np.clip(y_pred, 0.0005, 0.9995)

score = 0

for i in range(len(target_cols)):
    score += log_loss(y_true[:, i], y_pred[:, i])

print("CV log_loss: ", score / y_pred.shape[1])

In [None]:
sample_submission = pd.read_csv(data_dir + 'sample_submission.csv')
sub = sample_submission.drop(columns=target_cols).merge(test[['sig_id']+target_cols], on='sig_id', how='left').fillna(0)
sub.to_csv('submission.csv', index=False)

In [None]:
sub.shape

In [None]:
saved_path_name = '../input/nn-transferlearning-7seeds4248-10folds/'
oof_TabNet_all = np.load(saved_path_name + 'oof_nn_transfer_all.npy')
print(oof_TabNet_all.shape)


In [None]:
def log_loss_numpy(y_pred, y_true):
    y_true_ravel = np.asarray(y_true).ravel()
    y_pred = np.asarray(y_pred).ravel()
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = np.where(y_true_ravel == 1, - np.log(y_pred), - np.log(1 - y_pred))
    return loss.mean()

In [None]:
log_loss_numpy(y_pred, y_true)