In [30]:
import numpy as np
import pandas as pd
import os
import random

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import warnings
warnings.filterwarnings('ignore')

In [76]:
input_path = '../datasets/'
train_df = pd.read_csv('../output/train_df.csv')
test_X = pd.read_csv('../output/test_df.csv')
test_df = pd.read_csv(os.path.join(input_path, 'test.csv'))

train_Y = pd.read_csv('../output/train_Y.csv')

In [77]:
train_df.head()

Unnamed: 0,session_id,user_id_x,date,hour,register_number,time_elapsed,month,day,weekday,jp_holiday,...,537_given,539_given,629_given,768_given,child_items,alone_items,cook_items,user_id_y,given_buy_num,avg_qoupon
0,105,CN9sWHXp6RdCuyFkW5aemG,2019-02-14,9,1005,152.0,2,14,3,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CN9sWHXp6RdCuyFkW5aemG,3.0,
1,106,Wi5hmLRCmUPXMRheu354dd,2019-02-14,9,1010,147.0,2,14,3,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Wi5hmLRCmUPXMRheu354dd,2.0,
2,107,kTFrFDLeaaggCoubWZJHpg,2019-02-14,9,1010,177.0,2,14,3,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,kTFrFDLeaaggCoubWZJHpg,1.0,
3,108,exwdBc8tNJYAjhc4Gd6qtj,2019-02-14,9,1011,247.0,2,14,3,0,...,0.0,0.0,0.0,0.0,0.0,1.0,2.0,exwdBc8tNJYAjhc4Gd6qtj,4.0,1.0
4,109,XUeiScqGsozKQFxcd3RDsD,2019-02-14,9,1013,147.0,2,14,3,0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.0,XUeiScqGsozKQFxcd3RDsD,2.0,1.0


In [5]:
target_category = [38, 110, 113, 114, 134, 171, 172, 173, 376, 435, 467, 537, 539, 629, 768]
target_category_str = [str(col) for col in target_category]

In [23]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [7]:
def multi_roc_auc_score(y_true, y_pred):
    scores = []

    for i in range(y_pred.shape[1]):
        try:
            score_i = roc_auc_score(y_true[:, i], y_pred[:, i])
        except:
            # よくないですが多少雑でも無理やり計算したい
            y_true[0,i]=1
            score_i = roc_auc_score(y_true[:, i], y_pred[:, i])
        scores.append(score_i)
    auc_score = sum(scores) / len(scores)
    return roc_auc_score(y_true, y_pred, average='macro')

In [122]:
DEVICE = ('cpu')
EPOCHS = 4
BATCH_SIZE = 256
LEARNING_RATE = 1e-6
WEIGHT_DECAY = 1e-5
NUM_TARGETS = len(target_category_str)

In [16]:
feature_cols = ['age', 'gender', 'hour', 'weekday', 'jp_holiday', 'tgif', 'num_visit', 'month', 'day', 'register_number', 'max_time_avg', 'time_elapsed', 'hanakin', '38_avg', '110_avg', '113_avg', '114_avg', '134_avg', '171_avg', '172_avg', '173_avg', '376_avg', '435_avg', '467_avg', '537_avg', '539_avg', '629_avg', '768_avg', '38_given', '110_given', '113_given', '114_given', '134_given', '171_given', '172_given', '173_given', '376_given', '435_given', '467_given', '537_given', '539_given', '629_given', '768_given', '38_price_avg', '110_price_avg', '113_price_avg', '114_price_avg', '134_price_avg', '171_price_avg', '172_price_avg', '173_price_avg', '376_price_avg', '435_price_avg', '467_price_avg', '537_price_avg', '539_price_avg', '629_price_avg', '768_price_avg', 'child_items_sum', 'child_items_avg', 'child_items', '1_depart_avg', '2_depart_avg', '3_depart_avg', '4_depart_avg', '5_depart_avg', '7_depart_avg', '9_depart_avg', '10_depart_avg', '13_depart_avg', '14_depart_avg', '15_depart_avg', '16_depart_avg', '18_depart_avg', '19_depart_avg', '20_depart_avg', '21_depart_avg', '22_depart_avg', '23_depart_avg', '24_depart_avg', '25_depart_avg', '26_depart_avg', '27_depart_avg', '28_depart_avg', '29_depart_avg', '30_depart_avg', '32_depart_avg', '33_depart_avg', '34_depart_avg', '35_depart_avg', '36_depart_avg', '37_depart_avg', '38_depart_avg', '39_depart_avg', '40_depart_avg', '41_depart_avg', '46_depart_avg', '47_depart_avg', '49_depart_avg', '50_depart_avg', '58_depart_avg', '59_depart_avg', '60_depart_avg', '69_depart_avg', '70_depart_avg', '71_depart_avg', '72_depart_avg', '73_depart_avg', '74_depart_avg', '75_depart_avg', '77_depart_avg', '78_depart_avg', '79_depart_avg', '80_depart_avg', '81_depart_avg', '82_depart_avg', '83_depart_avg', '84_depart_avg', '87_depart_avg', '88_depart_avg', '89_depart_avg', '91_depart_avg', '92_depart_avg', '93_depart_avg', '94_depart_avg', '95_depart_avg', '96_depart_avg', '97_depart_avg', '98_depart_avg', '106_depart_avg', '107_depart_avg', '109_depart_avg', '117_depart_avg', '118_depart_avg', '121_depart_avg', '124_depart_avg', '131_depart_avg', '132_depart_avg', '133_depart_avg', '136_depart_avg', '137_depart_avg', '138_depart_avg', '141_depart_avg', '151_depart_avg', '152_depart_avg', '153_depart_avg', '154_depart_avg', '155_depart_avg', '156_depart_avg', '161_depart_avg', '162_depart_avg', '163_depart_avg', '165_depart_avg', '172_depart_avg', '173_depart_avg', '174_depart_avg', '178_depart_avg', '179_depart_avg', '182_depart_avg', '183_depart_avg', '185_depart_avg', '187_depart_avg', '194_depart_avg', '201_depart_avg', '202_depart_avg', '203_depart_avg', '206_depart_avg', '207_depart_avg', '210_depart_avg', '214_depart_avg', '215_depart_avg', '217_depart_avg', '219_depart_avg', '220_depart_avg', '221_depart_avg', '223_depart_avg', '224_depart_avg', '225_depart_avg', '226_depart_avg', '227_depart_avg', '228_depart_avg', '229_depart_avg', '230_depart_avg', '231_depart_avg', '232_depart_avg', '233_depart_avg', '234_depart_avg', 'cancel_items_sum', 'cancel_items_avg', 'buy_num_items_sum', 'buy_num_items_avg', 'given_buy_num', 'cancel10_items_sum', 'cancel10_items_avg', 'alone_items_sum', 'alone_items_avg', 'alone_items', 'cook_items_sum', 'cook_items_avg', 'cook_items', 'qoupon_avg', 'avg_qoupon', 'category_35_avg', 'category_37_avg', 'category_39_avg', 'category_40_avg', 'category_86_avg', 'category_111_avg', 'category_112_avg', 'category_135_avg', 'category_137_avg', 'category_141_avg', 'category_142_avg', 'category_143_avg', 'category_145_avg', 'category_148_avg', 'category_149_avg', 'category_150_avg', 'category_205_avg', 'category_206_avg', 'category_207_avg', 'category_208_avg', 'category_209_avg', 'category_210_avg', 'category_274_avg', 'category_275_avg', 'category_276_avg', 'category_289_avg', 'category_307_avg', 'category_310_avg', 'category_311_avg', 'category_312_avg', 'category_313_avg', 'category_316_avg', 'category_317_avg', 'category_319_avg', 'category_321_avg', 'category_328_avg', 'category_334_avg', 'category_340_avg', 'category_341_avg', 'category_342_avg', 'category_343_avg', 'category_344_avg', 'category_363_avg', 'category_365_avg', 'category_368_avg', 'category_370_avg', 'category_371_avg', 'category_372_avg', 'category_373_avg', 'category_374_avg', 'category_375_avg', 'category_376_avg', 'category_377_avg', 'category_378_avg', 'category_391_avg', 'category_392_avg', 'category_406_avg', 'category_407_avg', 'category_408_avg', 'category_410_avg', 'category_411_avg', 'category_414_avg', 'category_415_avg', 'category_416_avg', 'category_417_avg', 'category_420_avg', 'category_421_avg', 'category_422_avg', 'category_423_avg', 'category_424_avg', 'category_425_avg', 'category_426_avg', 'category_431_avg', 'category_432_avg', 'category_433_avg', 'category_436_avg', 'category_469_avg', 'category_470_avg', 'category_471_avg', 'category_472_avg', 'category_473_avg', 'category_474_avg', 'category_508_avg', 'category_509_avg', 'category_536_avg', 'category_538_avg', 'category_561_avg', 'category_562_avg', 'category_565_avg', 'category_566_avg', 'category_567_avg', 'category_568_avg', 'category_579_avg', 'category_587_avg', 'category_588_avg', 'category_589_avg', 'category_590_avg', 'category_591_avg', 'category_594_avg', 'category_602_avg', 'category_617_avg', 'category_619_avg', 'category_620_avg', 'category_621_avg', 'category_623_avg', 'category_628_avg', 'category_630_avg', 'category_631_avg', 'category_632_avg', 'category_633_avg', 'category_634_avg', 'category_636_avg', 'category_655_avg', 'category_665_avg', 'category_666_avg', 'category_669_avg', 'category_674_avg', 'category_679_avg', 'category_684_avg', 'category_708_avg', 'category_711_avg', 'category_716_avg', 'category_720_avg', 'category_724_avg', 'category_769_avg', 'category_770_avg', 'category_771_avg', 'similar_110_avg', 'similar_113_avg', 'similar_114_avg', 'similar_134_avg', 'similar_171_avg', 'similar_172_avg', 'similar_173_avg', 'similar_376_avg', 'similar_38_avg', 'similar_435_avg', 'similar_467_avg', 'similar_537_avg', 'similar_539_avg', 'similar_629_avg', 'similar_768_avg', 'category_35_given', 'category_37_given', 'category_39_given', 'category_40_given', 'category_86_given', 'category_111_given', 'category_112_given', 'category_135_given', 'category_136_given', 'category_137_given', 'category_141_given', 'category_142_given', 'category_143_given', 'category_145_given', 'category_148_given', 'category_149_given', 'category_150_given', 'category_205_given', 'category_206_given', 'category_207_given', 'category_208_given', 'category_209_given', 'category_210_given', 'category_274_given', 'category_275_given', 'category_276_given', 'category_289_given', 'category_294_given', 'category_295_given', 'category_299_given', 'category_307_given', 'category_310_given', 'category_311_given', 'category_312_given', 'category_313_given', 'category_314_given', 'category_315_given', 'category_316_given', 'category_317_given', 'category_319_given', 'category_321_given', 'category_328_given', 'category_330_given', 'category_331_given', 'category_334_given', 'category_340_given', 'category_341_given', 'category_342_given', 'category_343_given', 'category_344_given', 'category_346_given', 'category_363_given', 'category_365_given', 'category_366_given', 'category_367_given', 'category_368_given', 'category_370_given', 'category_371_given', 'category_372_given', 'category_373_given', 'category_374_given', 'category_375_given', 'category_376_given', 'category_377_given', 'category_378_given', 'category_391_given', 'category_392_given', 'category_393_given', 'category_406_given', 'category_407_given', 'category_408_given', 'category_410_given', 'category_411_given', 'category_414_given', 'category_415_given', 'category_416_given', 'category_417_given', 'category_418_given', 'category_420_given', 'category_421_given', 'category_422_given', 'category_423_given', 'category_424_given', 'category_425_given', 'category_426_given', 'category_430_given', 'category_431_given', 'category_432_given', 'category_433_given', 'category_434_given', 'category_436_given', 'category_468_given', 'category_469_given', 'category_470_given', 'category_471_given', 'category_472_given', 'category_473_given', 'category_474_given', 'category_508_given', 'category_509_given', 'category_510_given', 'category_536_given', 'category_538_given', 'category_546_given', 'category_547_given', 'category_548_given', 'category_561_given', 'category_562_given', 'category_565_given', 'category_566_given', 'category_567_given', 'category_568_given', 'category_569_given', 'category_579_given', 'category_587_given', 'category_588_given', 'category_589_given', 'category_590_given', 'category_591_given', 'category_594_given', 'category_602_given', 'category_613_given', 'category_615_given', 'category_616_given', 'category_617_given', 'category_619_given', 'category_620_given', 'category_623_given', 'category_628_given', 'category_630_given', 'category_631_given', 'category_632_given', 'category_633_given', 'category_634_given', 'category_635_given', 'category_636_given', 'category_655_given', 'category_662_given', 'category_665_given', 'category_666_given', 'category_667_given', 'category_669_given', 'category_674_given', 'category_679_given', 'category_684_given', 'category_696_given', 'category_708_given', 'category_711_given', 'category_716_given', 'category_720_given', 'category_724_given', 'category_726_given', 'category_756_given', 'category_769_given', 'category_770_given', 'category_771_given', 'similar_110_given', 'similar_113_given', 'similar_114_given', 'similar_134_given', 'similar_171_given', 'similar_172_given', 'similar_173_given', 'similar_376_given', 'similar_38_given', 'similar_435_given', 'similar_467_given', 'similar_537_given', 'similar_539_given', 'similar_629_given', 'similar_768_given']

In [71]:
from sklearn.preprocessing import QuantileTransformer

def normalize_num_feat(train_features, valid_features, test_features, cols):
    transformer = QuantileTransformer(n_quantiles=100, random_state=seed, output_distribution="normal")
    for col in cols:
        vec_len = len(train_features[col].values)
        vec_len_valid = len(valid_features[col].values)
        vec_len_test = len(test_features[col].values)
        raw_vec = train_features[col].values.reshape(vec_len, 1)
        transformer.fit(raw_vec)
        train_features[col] = transformer.transform(raw_vec).reshape(1, vec_len)[0]
        valid_features[col] = transformer.transform(valid_features[col].values.reshape(vec_len_valid, 1)).reshape(1, vec_len_valid)[0]
        test_features[col] = transformer.transform(test_features[col].values.reshape(vec_len_test, 1)).reshape(1, vec_len_test)[0]
    return train_features, valid_features, test_features

In [19]:
import torch
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

class SmoothBCEwLogits(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth(targets:torch.Tensor, n_labels:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = targets * (1.0 - smoothing) + 0.5 * smoothing
        return targets

    def forward(self, inputs, targets):
        targets = SmoothBCEwLogits._smooth(targets, inputs.size(-1),
            self.smoothing)
        loss = F.binary_cross_entropy_with_logits(inputs, targets,self.weight)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [94]:
class Atma9Dataset:
    def __init__(self, features, targets):
        self.features = features       
        self.targets = targets

    def __len__(self):
        return (len(self.features))

    def __getitem__(self, idx):
        dct = {
            'X' : torch.tensor(self.features[idx, :], dtype=torch.float),
            'y' : torch.tensor(self.targets[idx, :], dtype=torch.float)
        }
        return dct

class Atma9TestDataset:
    def __init__(self, features):
        self.features = features 

    def __len__(self):
        return (len(self.features))

    def __getitem__(self, idx):
        dct = {
            'X' : torch.tensor(self.features[idx, :], dtype=torch.float),
        }
        return dct

In [129]:
HIDDEN_SIZE = 1024


class Model(nn.Module):
    def __init__(self, num_features):
        super(Model, self).__init__()
        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(0.2)
        self.linear1 = nn.Linear(num_features, 1024)
        self.relu1 = nn.ReLU()

        self.batch_norm2 = nn.BatchNorm1d(1024)
        self.dropout2 = nn.Dropout(0.2)
        self.linear2 = nn.Linear(1024, NUM_TARGETS)
        self.relu2 = nn.ReLU()

    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = self.linear1(x)
        print(x)
        x = self.relu1(x)

        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.linear2(x)
        x = self.relu2(x)

        return x

In [130]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device):
    model.train()
    final_loss = 0

    for data in dataloader:
        optimizer.zero_grad()
        features = data['X'].to(device)
        targets = data['y'].to(device)
#         print(features)
        outputs = model(features)
#         print(outputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()

        final_loss += loss.item()

    final_loss /= len(dataloader)
    return final_loss


def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    final_loss = 0
    valid_preds = []

    for data in dataloader:
        features = data['X'].to(device)
        targets = data['y'].to(device)
        outputs = model(features)
        loss = loss_fn(outputs, targets)
        final_loss += loss.item()
        pred = outputs.sigmoid().detach().cpu().numpy()
#         print(pred)
        valid_preds.append(pred)

    final_loss /= len(dataloader)
    valid_preds = np.concatenate(valid_preds)
    return final_loss, valid_preds


def inference_fn(model, dataloader, device):
    model.eval()
    preds = []


    for data in dataloader:
        features = data['X'].to(device)
        with torch.no_grad():
            outputs = model(features)

        preds.append(outputs.sigmoid().detach().cpu().numpy())
    preds = np.concatenate(preds)
    return preds

In [133]:
seed = 42

def run_training(train_data, train_label, test_data, feature_cols):
    seed_everything(seed)

    nfolds = 5
    cv = KFold(n_splits=nfolds, shuffle=True, random_state=42)

    train_data = train_data.reset_index(drop=True)
    train_label = train_label.reset_index(drop=True)
    test_data = test_data.reset_index(drop=True)

    num_cols = len(feature_cols)
    oof = np.zeros((len(train_data), NUM_TARGETS))

    predictions = np.zeros((len(test_data), train_label.iloc[:, 1:].shape[1]))

    for i, (train_idx, valid_idx) in enumerate(cv.split(train_data)):
        print(f"############# start fold: {i+1} #############")
        X_train, y_train = train_data.iloc[train_idx], train_label.iloc[train_idx]
        X_valid, y_valid = train_data.iloc[valid_idx], train_label.iloc[valid_idx]

        X_train, X_valid, test_data = normalize_num_feat(X_train, X_valid, test_data, feature_cols)

        # independet_num
        X_train, X_valid, test_data = X_train[feature_cols], X_valid[feature_cols], test_data[feature_cols]

        # other features ......

        # create data lorder
        train_dataset = Atma9Dataset(X_train.values, y_train.iloc[:, 1:].values)
        valid_dataset = Atma9Dataset(X_valid.values, y_valid.iloc[:, 1:].values)
        trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)        

        # Model
        model = Model(
            len(feature_cols)
        )

        optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3, 
                                                  max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(trainloader))
        loss_fn = SmoothBCEwLogits(smoothing =0.001)
        best_loss = np.inf

        for epoch in range(EPOCHS):
            train_loss = train_fn(model, optimizer, scheduler, loss_fn, trainloader, DEVICE)
            valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
            valid_score = multi_roc_auc_score(y_valid.iloc[:, 1:].values, valid_preds)
            print(f"train_loss: {train_loss:.4f}\t valid_loss: {valid_loss:.4f}\t valid_score: {valid_score:.4f}")
            if valid_loss < best_loss:
                oof[valid_idx] = valid_preds
                best_loss = valid_loss
                print(f"update best loss: {best_loss:.4f} in epoch: {epoch}")
                best_model_path = f"../output/NORMAL_SEED{seed}_FOLD{i+1}.pth"
                torch.save(model.state_dict(), best_model_path)

        #model.load_state_dict(torch.load(best_model_path))
        testdataset = Atma9TestDataset(test_data.values)
        testloader = torch.utils.data.DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=False)


        _predictions = inference_fn(model, testloader, DEVICE)
        predictions += _predictions/nfolds
        gc.collect()
    return predictions, oof

In [134]:
preds_in_thre_time, oof = run_training(train_df, train_Y, test_X, feature_cols)

############# start fold: 1 #############
tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]])


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn