# NFL Baseline

# import libraries

In [1]:
# general
import os
import gc
import pickle
import glob
import random
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import cv2
import matplotlib.pyplot as plt
import time
import math

import sys
sys.path.append('/kaggle/input/timm-pytorch-image-models/pytorch-image-models-master')
import timm


# deep learning
from torch.utils.data import Dataset, DataLoader
from torch.optim import SGD, Adam, AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts, ReduceLROnPlateau
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import albumentations as A
from albumentations.pytorch import ToTensorV2


from sklearn.model_selection import StratifiedKFold

# loss metrics
from sklearn.metrics import matthews_corrcoef, confusion_matrix

import mlflow
# import wandb
# warningの表示方法の設定
import warnings
warnings.filterwarnings("ignore")

# Set Configurations

In [2]:
CFG = {
        "kaggle" : False,
        "DEBUG" : False,
        # model config
        "model_name" : "tf_efficientnet_b0",
        "out_features" : 1,
        "inp_channels": 3,
        "pretrained" : True,

        # learning config
        "n_epoch" : 20,
        "lr" : 1e-5,
        "T_max" : 10,
        "min_lr" : 1e-8,
        "weight_decay" : 1e-6,

        # etc
        "print_freq" : 100,
        "random_seed" : 21,

        # data config    
        "img_size" : (224, 224),
        "batch_size" : 128,
        "shuffle" : False, 
        "num_workers" : 0,
        "masksize_helmet_ratio" : 6, # helmetサイズにこの係数をかけたサイズだけ色を残して後は黒塗りする
        "TRAIN_VIDEO_NUM" : 4,
        "VALID_VIDEO_NUM" : 2,
        "ONLY_POSITIVE_TRAIN_VIDEO": 12,
        "sample_num" : -1,
        "ONLY_GROUND" : True,
        "ONLY_PLAYERS" : False,
        "USE_ONLY_HELMET_AVAIL" : True,

        "EXP_CATEGORY" : "make_baseline",
        "EXP_NAME" : "baseline012G",
}

if CFG["DEBUG"]:
    CFG["EXP_NAME"] = "DEBUG"
    CFG["n_epoch"] = 2
    CFG["sample_num"] = 1000

if CFG["kaggle"]:
    CFG["INPUT_DIR"] = "/kaggle/input/"
    CFG["OUTPUT_DIR"] = "/kaggle/working"
    CFG["TRAIN_HELMET_CSV"] = os.path.join(CFG["INPUT_DIR"], "nfl-player-contact-detection", "train_baseline_helmets.csv")
    CFG["TRAIN_TRACKING_CSV"] = os.path.join(CFG["INPUT_DIR"], "nfl-player-contact-detection", "train_player_tracking.csv")
    CFG["TRAIN_VIDEO_META_CSV"] = os.path.join(CFG["INPUT_DIR"], "nfl-player-contact-detection", "train_video_metadata.csv")
    CFG["TRAIN_LABEL_CSV"] = os.path.join(CFG["INPUT_DIR"], "nfl-player-contact-detection", "train_labels.csv")
    CFG["TARGET_CSV"] = os.path.join(CFG["INPUT_DIR"], "target_fillna0.csv")
    CFG["TRAIN_IMG_DIR"] = os.path.join(CFG["INPUT_DIR"], "nfl-baseline-saveframes")
    CFG["MODEL_DIR"] = os.path.join(CFG["OUTPUT_DIR"], "model")
else:
    CFG["INPUT_DIR"] = "/workspace/input"
    CFG["OUTPUT_DIR"] = "/workspace/output"
    CFG["TRAIN_HELMET_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_baseline_helmets.csv")
    CFG["TRAIN_TRACKING_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_player_tracking.csv")
    CFG["TRAIN_VIDEO_META_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_video_metadata.csv")
    CFG["TRAIN_LABEL_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_labels.csv")
    CFG["TARGET_CSV"] = os.path.join(CFG["INPUT_DIR"], "target_fillna0_3.csv")
    CFG["TRAIN_IMG_DIR"] = os.path.join(CFG["INPUT_DIR"], "train_images")
    CFG["MODEL_DIR"] = os.path.join(CFG["OUTPUT_DIR"], CFG["EXP_NAME"] ,"model")
    
if not CFG["kaggle"] and not CFG["DEBUG"]:
    os.mkdir(os.path.join(CFG["OUTPUT_DIR"], CFG["EXP_NAME"]))
    os.mkdir(CFG["MODEL_DIR"])


In [3]:
if CFG["kaggle"]:
    WANDB_CONFIG = {'competition': 'NFL', '_wandb_kernel': 'taro'}
    # Secrets
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    secret_value_0 = user_secrets.get_secret("wandb")

    !wandb login $secret_value_0
    #! TODO : logger settings
else:
    mlflow.set_tracking_uri("/workspace/mlruns")
    experiment = mlflow.get_experiment_by_name(CFG["EXP_CATEGORY"])
    if experiment is None:  # 当該Experiment存在しないとき、新たに作成
        experiment_id = mlflow.create_experiment(name=CFG["EXP_CATEGORY"])
    else: # 当該Experiment存在するとき、IDを取得
        experiment_id = experiment.experiment_id

# Utils

## Scoring Utils

In [4]:
def score_targetlong_concat(df_, long_df_):
    long_df_["pred"] = 0
    long_df_.iloc[-1]["pred"] = 1

    for threshold in [0.1, 0.5, 0.9]:
        scoring_df = pd.concat([long_df_[["contact_id", "contact", "pred"]], df_[["contact_id", "contact", "pred"]]], axis=0)
        scoring_df["pred"] = (scoring_df["pred"].values > threshold).astype(int)
        score = matthews_corrcoef(scoring_df["contact"].values, scoring_df["pred"].values)
        cm = confusion_matrix(scoring_df["contact"].values, scoring_df["pred"].values)
        tn, fp, fn, tp = cm.flatten()
        print(f"score = {score}, thr={threshold}")
        print(f"tn={tn}, fp={fp}, fn={fn}, tp={tp}")
        mlflow.log_metric("oof target concat score", score, step=int(threshold*10))

In [5]:
def logging_metrics_epoch(fold, epoch, train_loss_avg, valid_loss_avg, score, threshold, tn_best, fp_best, fn_best, tp_best):
    if CFG["kaggle"]:
            pass # set wandb logger
    else:
        mlflow.log_metric(f"fold{fold} train loss avg", train_loss_avg, step=epoch)
        mlflow.log_metric(f"fold{fold} valid loss avg", valid_loss_avg, step=epoch)
        mlflow.log_metric(f"fold{fold} score", score, step=epoch)
        mlflow.log_metric(f"fold{fold} score threshold", threshold, step=epoch)
        mlflow.log_metric(f"fold{fold} tn", tn_best, step=epoch)
        mlflow.log_metric(f"fold{fold} fp", fp_best, step=epoch)
        mlflow.log_metric(f"fold{fold} fn", fn_best, step=epoch)
        mlflow.log_metric(f"fold{fold} tp", tp_best, step=epoch)

In [6]:
def seed_everything(seed=CFG["random_seed"]):
    #os.environ['PYTHONSEED'] = str(seed)
    np.random.seed(seed%(2**32-1))
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic =True
    torch.backends.cudnn.benchmark = False
seed_everything()

# device optimization
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(f'Using device: {device}')

Using device: cuda


In [7]:
def asMinutes(s):
    """Convert Seconds to Minutes."""
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    """Accessing and Converting Time Data."""
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

class AverageMeter(object):
    """Computes and stores the average and current value."""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

## Dataset Utils

In [8]:
def set_inimg_window(crop_pos, mask_size, img_size=(720, 1280)):#crop_pos = [left, top, right, bot]
    if mask_size[1] >= img_size[0]:
        top, bot = 0, img_size[1]
    else:
        top=(crop_pos[1] + crop_pos[3])//2 - mask_size[1]//2
        bot=(crop_pos[1] + crop_pos[3])//2 + mask_size[1]//2
        if top < 0:
            bot = bot - top
            top = 0
        elif bot > img_size[0]:
            top = top - (bot-img_size[0])
            bot = img_size[0]

    if mask_size[0] >= img_size[1]:
        left, right = 0, img_size[1]
    else:
        left = (crop_pos[0] + crop_pos[2])//2 - mask_size[0]//2
        right = (crop_pos[0] + crop_pos[2])//2 + mask_size[0]//2
        if left < 0:
            right = right - left
            left = 0
        elif right > img_size[1]:
            left = left - (right - img_size[1])
            right = img_size[1]
    crop_area = np.array([left, top, right, bot]).astype(np.int)
    return crop_area

In [9]:
def get_crop_area(p1_helmet, input_size=(720, 1280)):#helmet[left, width, top, height]
    if p1_helmet[1]==0 and p1_helmet[3]==0:
        crop_area = [0, 0, input_size[1], input_size[0]]
        return crop_area
    else:
        crop_x_center, crop_y_center = p1_helmet[0] + (p1_helmet[1])//2, p1_helmet[2] + (p1_helmet[3])//2
        helmet_base_size = (p1_helmet[1] + p1_helmet[3])*0.5*CFG["masksize_helmet_ratio"]*2
        output_size = [helmet_base_size, helmet_base_size]
    crop_left = crop_x_center - output_size[1]//2
    crop_top = crop_y_center - output_size[0]//2
    crop_right = crop_x_center + output_size[1]//2
    crop_bot = crop_y_center + output_size[0]//2
    crop_area = [crop_left, crop_top, crop_right, crop_bot]
    crop_area = set_inimg_window(crop_area, output_size)
    return crop_area

# Load Data

In [10]:
target_df = pd.read_csv(CFG["TARGET_CSV"])
# target_game_plays = target_df["game_play"].unique()[:50]
train_game_plays = target_df["game_play"].unique()[:CFG["TRAIN_VIDEO_NUM"]]
train_only_positive_plays = target_df["game_play"].unique()[CFG["TRAIN_VIDEO_NUM"]: CFG["TRAIN_VIDEO_NUM"]+CFG["ONLY_POSITIVE_TRAIN_VIDEO"]]
valid_game_plays = target_df["game_play"].unique()[CFG["TRAIN_VIDEO_NUM"]+CFG["ONLY_POSITIVE_TRAIN_VIDEO"] : CFG["TRAIN_VIDEO_NUM"]+CFG["ONLY_POSITIVE_TRAIN_VIDEO"]+CFG["VALID_VIDEO_NUM"]]

target_game_plays = list(set(train_game_plays) | set(valid_game_plays) | set(train_only_positive_plays))
CFG["train_game_plays"] = list(set(train_game_plays) | set(train_only_positive_plays))
CFG["valid_game_plays"] = list(valid_game_plays)
target_df = target_df[(target_df["game_play"].isin(train_game_plays)) | (target_df["game_play"].isin(valid_game_plays)) | \
                        (target_df["game_play"].isin(train_only_positive_plays) & (target_df["contact"]==1))]

if CFG["ONLY_GROUND"]:
    target_df = target_df[target_df["nfl_player_id_2"]=="G"]
    if CFG["USE_ONLY_HELMET_AVAIL"]:
        target_df = target_df[target_df["E_width_1"]!=0]

if CFG["ONLY_PLAYERS"]:
    target_df = target_df[target_df["nfl_player_id_2"]!="G"]
    if CFG["USE_ONLY_HELMET_AVAIL"]:
        target_df = target_df[target_df["E_width_1"]!=0]
        target_df = target_df[target_df["E_width_2"]!=0]


if CFG["DEBUG"]:
    target_df = target_df.sample(CFG["sample_num"])
elif CFG["sample_num"] != -1:
    target_df = target_df.sample(CFG["sample_num"])
    
print(len(target_df))
display(target_df["contact"].value_counts())

8995


0    7609
1    1386
Name: contact, dtype: int64

# Dataset

In [11]:
train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    A.Normalize(mean=[0.], std=[1.]),
    ToTensorV2()
])

valid_transform = A.Compose([
    A.Normalize(mean=[0.], std=[1.]),
    ToTensorV2()
])

In [12]:
class NFLDataset(Dataset):
    def __init__(self, target_df, transform=None):
        self.target_df = target_df
        self.transform = transform

    def __len__(self):
        return len(self.target_df)

    def __getitem__(self, idx):
        target_info = self.target_df.iloc[idx]
        target = target_info.contact
        # read frame image
        game_play = target_info.game_play
        frame = target_info.frame
#         file_id = f"{game_play}_{view}_{frame:05}.png"
        file_id = f"{game_play}_Endzone_{frame:05}.png"
        filename = os.path.join(CFG["TRAIN_IMG_DIR"], file_id)
        img = cv2.imread(filename)
        if img is None:
            img = np.zeros((224, 224, 3))
            img = np.transpose(img, (2, 0, 1)).astype(np.float)
            img = torch.tensor(img, dtype=torch.float)
            return img, target
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # player highlight mask
        player1 = target_info.nfl_player_id_1
        p1_helmet = np.array([target_info.E_left_1, target_info.E_width_1,
                            target_info.E_top_1, target_info.E_height_1]).astype(np.int)
        crop_area = get_crop_area(p1_helmet)# crop_area=[left, top, right, bot]
        # print(p1_helmet)
        # print(crop_area)
        # rec_img = cv2.rectangle(img, (crop_area[0],crop_area[1]), (crop_area[2],crop_area[3]), (255, 0,0), thickness=10)
        # rec_img = cv2.rectangle(rec_img, (p1_helmet[0],p1_helmet[2]), (p1_helmet[0] + p1_helmet[1],p1_helmet[2]+p1_helmet[3]), (0, 255,0), thickness=10)
        # plt.figure()
        # plt.imshow(rec_img)
        # plt.show()

        img = img[crop_area[1]:crop_area[3], crop_area[0]:crop_area[2], :]
        img = cv2.resize(img, dsize=CFG["img_size"])
        img = (img / 255.).astype(np.float32) # convert to 0-1
        # img = np.transpose(img, (2, 0, 1)).astype(np.float)
        # img = torch.tensor(img, dtype=torch.float)
        if self.transform is not None:
            img = self.transform(image=img)["image"]

        target = torch.tensor(target, dtype=torch.float)
        return img, target

In [13]:
# positive_df = target_df.query('contact==1')
# train_dataset = NFLDataset(positive_df)
# show_img_num = 4
# train_loader = DataLoader(
#     train_dataset,
#     batch_size = show_img_num,
#     shuffle = True,
#     num_workers = CFG["num_workers"],
#     pin_memory = True
# )


# for batch_idx, (images, targets) in enumerate(train_loader):
#     fig = plt.figure(figsize=(12, 25))
#     for idx in range(show_img_num):
#         img = images[idx].numpy()
#         fig.add_subplot(1,show_img_num ,idx+1)
#         plt.imshow(img)
#         plt.title(targets[idx].numpy())
#     plt.show()
#     break
# del train_loader, train_dataset

# negative_df = target_df.query('contact!=1')
# train_dataset = NFLDataset(negative_df)
# show_img_num = 4
# train_loader = DataLoader(
#     train_dataset,
#     batch_size = show_img_num,
#     shuffle = True,
#     num_workers = CFG["num_workers"],
#     pin_memory = True
# )


# for batch_idx, (images, targets) in enumerate(train_loader):
#     fig = plt.figure(figsize=(12, 25))
#     for idx in range(show_img_num):
#         img = images[idx].numpy()
#         fig.add_subplot(1,show_img_num ,idx+1)
#         plt.imshow(img)
#         plt.title(targets[idx].numpy())
#     plt.show()
#     break
# del train_loader, train_dataset

# raise Exception()

# Model

In [14]:
# without meta
class NFLNet(nn.Module):
    def __init__(
        self,
        model_name = CFG["model_name"],
        out_features = CFG["out_features"],
        inp_channels= CFG["inp_channels"],
        pretrained = CFG["pretrained"]
    ):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels, num_classes = out_features)
    
    def forward(self, image):
        output = self.model(image)
        return output

# train fn

In [15]:
def train_fn(train_loader, model, criterion, epoch ,optimizer, scheduler):
    model.train()
    batch_time = AverageMeter()
    losses = AverageMeter()
    start = end = time.time()
    for batch_idx, (images, targets) in enumerate(train_loader):
        images = images.to(device, non_blocking = True).float()
        targets = targets.to(device, non_blocking = True).float().view(-1, 1)                
        preds = model(images)
        
        loss = criterion(preds, targets)
        losses.update(loss.item(), CFG["batch_size"]) 
        targets = targets.detach().cpu().numpy().ravel().tolist()
        preds = torch.sigmoid(preds).detach().cpu().numpy().ravel().tolist()

        loss.backward() # パラメータの勾配を計算
        optimizer.step() # モデル更新
        optimizer.zero_grad() # 勾配の初期化
                
        batch_time.update(time.time() - end)
        end = time.time()
        if batch_idx % CFG["print_freq"] == 0 or batch_idx == (len(train_loader)-1):
            print('\t Epoch: [{0}][{1}/{2}] '
                    'Elapsed {remain:s} '
                    'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                    .format(
                        epoch, batch_idx, len(train_loader), batch_time=batch_time, loss=losses,
                        remain=timeSince(start, float(batch_idx+1)/len(train_loader)),
            ))
        del preds, images, targets
    gc.collect()
    torch.cuda.empty_cache()
    return losses.avg

# valid fn

In [16]:
def valid_fn(model, valid_loader, criterion):
    model.eval()# モデルを検証モードに設定
    test_targets = []
    test_preds = []

    batch_time = AverageMeter()
    losses = AverageMeter()
    start = end = time.time()
    for batch_idx, (images, targets) in enumerate(valid_loader):
        images = images.to(device, non_blocking = True).float()
        targets = targets.to(device, non_blocking = True).float().view(-1, 1)
        with torch.no_grad():
            preds = model(images)
            loss = criterion(preds, targets)
        losses.update(loss.item(), CFG["batch_size"])
        batch_time.update(time.time() - end)

        targets = targets.detach().cpu().numpy().ravel().tolist()
        preds = torch.sigmoid(preds).detach().cpu().numpy().ravel().tolist()

        test_preds.extend(preds)
        test_targets.extend(targets)
        # score = matthews_corrcoef(preds, targets)
        if batch_idx % CFG["print_freq"] == 0 or batch_idx == (len(valid_loader)-1):
            print('\t EVAL: [{0}/{1}] '
                'Elapsed {remain:s} '
                'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                .format(
                    batch_idx, len(valid_loader), batch_time=batch_time, loss=losses,
                    remain=timeSince(start, float(batch_idx+1)/len(valid_loader)),
                ))
        del preds, images, targets
        gc.collect()
        torch.cuda.empty_cache()
    test_preds = np.array(test_preds)
    test_targets = np.array(test_targets)
    return test_targets, test_preds, losses.avg

# Train loop

In [17]:
def training_loop(target_df):
    
    # set model & learning fn
    model = NFLNet()
    model = model.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = AdamW(model.parameters(), lr=CFG["lr"], weight_decay=CFG["weight_decay"], amsgrad=False)
    scheduler = CosineAnnealingLR(optimizer, T_max=CFG["T_max"], eta_min=CFG["min_lr"], last_epoch=-1)

    oof_df = pd.DataFrame()

    fold = 0
    print(f'fold {fold} training start.')        
    # separate train/valid data 
    train_df = target_df[target_df["game_play"].isin(train_game_plays)]
    valid_df = target_df[target_df["game_play"].isin(valid_game_plays)]
    # train_dataset = NFLDataset(train_df)
    # valid_dataset = NFLDataset(valid_df)
    train_dataset = NFLDataset(train_df, train_transform)
    valid_dataset = NFLDataset(valid_df, valid_transform)
    train_loader = DataLoader(train_dataset,batch_size=CFG["batch_size"], shuffle = CFG["shuffle"],
                                num_workers = CFG["num_workers"], pin_memory = True)
    valid_loader = DataLoader(valid_dataset,batch_size=CFG["batch_size"], shuffle = CFG["shuffle"],
                                num_workers = CFG["num_workers"], pin_memory = True)

    # training
    best_score = -np.inf
    start_time = end = time.time()
    for epoch in range(1, CFG["n_epoch"] + 1):
        print(f'\t === epoch: {epoch}: training ===')
        train_loss_avg = train_fn(train_loader, model, criterion, epoch ,optimizer, scheduler)
        valid_targets, valid_preds, valid_loss_avg = valid_fn(model, valid_loader, criterion)

        valid_score = -np.inf
        valid_threshold = 0
        tn_best, fp_best, fn_best, tp_best = 0, 0, 0, 0
        for idx in range(1, 10, 1):
            thr = idx*0.1
            valid_targets = (np.array(valid_targets) > thr).astype(np.int32)
            valid_binary_preds = (np.array(valid_preds) > thr).astype(np.int32)
            score_tmp = matthews_corrcoef(valid_targets, valid_binary_preds)
            cm = confusion_matrix(valid_targets, valid_binary_preds)
            tn, fp, fn, tp = cm.flatten()
            if score_tmp > valid_score:
                valid_score = score_tmp 
                valid_threshold = thr
                tn_best, fp_best, fn_best, tp_best = tn, fp, fn, tp
        elapsed = time.time() - start_time
        print(f'\t epoch:{epoch}, avg train loss:{train_loss_avg:.4f}, avg valid loss:{valid_loss_avg:.4f}, score:{valid_score:.4f}(th={valid_threshold}) ::: time:{elapsed:.2f}s')
        scheduler.step()
        # validationスコアがbestを更新したらモデルを保存する
        if valid_score > best_score:
            best_score = valid_score
            model_name = CFG["model_name"]
            torch.save(model.state_dict(), f'{CFG["MODEL_DIR"]}/{model_name}_fold{fold}.pth')
            print(f'\t Epoch {epoch} - Save Best Score: {best_score:.4f}. Model is saved.')
            contact_id = valid_df["contact_id"].values
            _oof_df = pd.DataFrame({
                "contact_id" : contact_id,
                "pred" : valid_preds,
                "contact" : valid_targets,
                "fold" : fold,
            })
        logging_metrics_epoch(fold, epoch, train_loss_avg, valid_loss_avg, valid_score, valid_threshold, tn_best, fp_best, fn_best, tp_best)

    del train_loader, train_dataset, valid_loader, valid_dataset
    oof_df = pd.concat([oof_df, _oof_df], axis = 0)
    del _oof_df
    gc.collect()
    torch.cuda.empty_cache()
    return oof_df

In [18]:
if CFG["kaggle"]:
    oof_df = training_loop(target_df)
else:
    with mlflow.start_run(experiment_id=experiment_id, run_name=CFG["EXP_NAME"]) as run:
        mlflow.log_dict(CFG, "configuration.yaml")
        mlflow.log_param("positive data num", len(target_df[target_df["contact"]==1]))
        mlflow.log_param("negative data num", len(target_df[target_df["contact"]==0]))
        oof_df = training_loop(target_df)
        target_long_df = pd.read_csv("/workspace/input/long_distance_3_target.csv")
        score_targetlong_concat(oof_df, target_long_df)

fold 0 training start.
	 === epoch: 1: training ===
	 Epoch: [1][0/49] Elapsed 0m 12s (remain 9m 53s) Loss: 6.1916(6.1916) 
	 Epoch: [1][48/49] Elapsed 12m 10s (remain 0m 0s) Loss: 3.5037(4.0434) 
	 EVAL: [0/16] Elapsed 0m 12s (remain 3m 10s) Loss: 0.1907(0.1907) 
	 EVAL: [15/16] Elapsed 3m 28s (remain 0m 0s) Loss: 0.6004(0.4883) 
	 epoch:1, avg train loss:4.0434, avg valid loss:0.4883, score:0.0266(th=0.7000000000000001) ::: time:939.97s
	 Epoch 1 - Save Best Score: 0.0266. Model is saved.
	 === epoch: 2: training ===
	 Epoch: [2][0/49] Elapsed 0m 15s (remain 12m 18s) Loss: 2.3652(2.3652) 
	 Epoch: [2][48/49] Elapsed 12m 22s (remain 0m 0s) Loss: 1.7168(1.4600) 
	 EVAL: [0/16] Elapsed 0m 12s (remain 3m 11s) Loss: 1.8170(1.8170) 
	 EVAL: [15/16] Elapsed 3m 24s (remain 0m 0s) Loss: 0.7003(1.7559) 
	 epoch:2, avg train loss:1.4600, avg valid loss:1.7559, score:0.0379(th=0.30000000000000004) ::: time:1887.98s
	 Epoch 2 - Save Best Score: 0.0379. Model is saved.
	 === epoch: 3: training ===

KeyboardInterrupt: 

In [None]:
display(oof_df)
if CFG["kaggle"]:
    pass
else:
    oof_filename = os.path.join(CFG["OUTPUT_DIR"], CFG["EXP_NAME"], "oof_df.csv")
    oof_df.to_csv(oof_filename, index=False)

In [None]:
oof_df["pred"].hist(bins=100)

In [None]:
oof_df["contact"].value_counts()

In [None]:
target_df["contact"].value_counts()