# Directory settings

In [None]:
# ====================================================
# Directory settings
# ====================================================

import sys
if "google.colab" in sys.modules:
    from google.colab import drive
    drive.mount("/content/drive")
    base = "/content/drive/MyDrive/colab_notebooks/kaggle/nbme-score-clinical-patient-notes/notebooks"
    %cd {base}


import os
if 'kaggle_web_client' in sys.modules:
    OUTPUT_DIR = './'
else:
    OUTPUT_DIR = './nb001t-token-classifier/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

Mounted at /content/drive
/content/drive/MyDrive/colab_notebooks/kaggle/nbme-score-clinical-patient-notes/notebooks


# CFG

In [None]:
# ====================================================
# CFG
# ====================================================
class CFG:
    wandb=True
    wandbgroup="nb001t-token-classifier"
    wandbname="case-num-4"
    cv_case_num=False
    competition='NBME'
    _wandb_kernel='riow1983'
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model="microsoft/deberta-base"
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=5
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=12
    fc_dropout=0.2
    max_len=512
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]
    train=True
    
if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0]
    CFG.wandbname = "debug-" + CFG.wandbname

In [None]:
# ====================================================
# wandb
# ====================================================
if CFG.wandb:
    if 'google.colab' in sys.modules:
        !pip install wandb
    import wandb

    try:
        if 'kaggle_web_client' in sys.modules:
            from kaggle_secrets import UserSecretsClient
            user_secrets = UserSecretsClient()
            secret_value_0 = user_secrets.get_secret("wandb_api")
        else:
            import json
            f = open("../../wandb.json", "r")
            json_data = json.load(f)
            secret_value_0 = json_data["wandb_api"]
        wandb.login(key=secret_value_0)
        anony = None
    except:
        anony = "must"
        print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')


    def class2dict(f):
        return dict((name, getattr(f, name)) for name in dir(f) if not name.startswith('__'))

    run = wandb.init(dir=OUTPUT_DIR,
                     project='NBME-Public', 
                     name=CFG.wandbname,
                     config=class2dict(CFG),
                     group=CFG.wandbgroup,
                     job_type="train",
                     anonymous=anony)
    print(f"wandb run id: {run.id}")

Collecting wandb
  Downloading wandb-0.12.11-py2.py3-none-any.whl (1.7 MB)
[?25l[K     |▏                               | 10 kB 26.6 MB/s eta 0:00:01[K     |▍                               | 20 kB 19.7 MB/s eta 0:00:01[K     |▋                               | 30 kB 10.5 MB/s eta 0:00:01[K     |▊                               | 40 kB 8.4 MB/s eta 0:00:01[K     |█                               | 51 kB 4.5 MB/s eta 0:00:01[K     |█▏                              | 61 kB 5.4 MB/s eta 0:00:01[K     |█▎                              | 71 kB 5.4 MB/s eta 0:00:01[K     |█▌                              | 81 kB 4.3 MB/s eta 0:00:01[K     |█▊                              | 92 kB 4.8 MB/s eta 0:00:01[K     |█▉                              | 102 kB 5.2 MB/s eta 0:00:01[K     |██                              | 112 kB 5.2 MB/s eta 0:00:01[K     |██▎                             | 122 kB 5.2 MB/s eta 0:00:01[K     |██▍                             | 133 kB 5.2 MB/s eta 0:00:01

[34m[1mwandb[0m: W&B API key is configured (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mriow1983[0m (use `wandb login --relogin` to force relogin)


wandb run id: 32ab0v9m


# Library

In [None]:
# ====================================================
# Library
# ====================================================
import os
import gc
import re
import ast
import sys
import copy
import json
import time
import math
import string
import pickle
import random
import joblib
import itertools
import warnings
warnings.filterwarnings("ignore")

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset

os.system('pip uninstall -y transformers')
os.system('python -m pip install --no-index --find-links=../input/nbme-pip-wheels transformers')
import tokenizers
import transformers
print(f"tokenizers.__version__: {tokenizers.__version__}")
print(f"transformers.__version__: {transformers.__version__}")
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
%env TOKENIZERS_PARALLELISM=true

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizers.__version__: 0.11.0
transformers.__version__: 4.16.2
env: TOKENIZERS_PARALLELISM=true


# Helper functions for scoring

In [None]:
# From https://www.kaggle.com/theoviel/evaluation-metric-folds-baseline

def micro_f1(preds, truths):
    """
    Micro f1 on binary arrays.

    Args:
        preds (list of lists of ints): Predictions.
        truths (list of lists of ints): Ground truths.

    Returns:
        float: f1 score.
    """
    # Micro : aggregating over all instances
    preds = np.concatenate(preds)
    truths = np.concatenate(truths)
    return f1_score(truths, preds)


def spans_to_binary(spans, length=None):
    """
    Converts spans to a binary array indicating whether each character is in the span.

    Args:
        spans (list of lists of two ints): Spans.

    Returns:
        np array [length]: Binarized spans.
    """
    length = np.max(spans) if length is None else length
    binary = np.zeros(length)
    for start, end in spans:
        binary[start:end] = 1
    return binary


def span_micro_f1(preds, truths):
    """
    Micro f1 on spans.

    Args:
        preds (list of lists of two ints): Prediction spans.
        truths (list of lists of two ints): Ground truth spans.

    Returns:
        float: f1 score.
    """
    bin_preds = []
    bin_truths = []
    for pred, truth in zip(preds, truths):
        if not len(pred) and not len(truth):
            continue
        length = max(np.max(pred) if len(pred) else 0, np.max(truth) if len(truth) else 0)
        bin_preds.append(spans_to_binary(pred, length))
        bin_truths.append(spans_to_binary(truth, length))
    return micro_f1(bin_preds, bin_truths)

In [None]:
def create_labels_for_scoring(df):
    # example: ['0 1', '3 4'] -> ['0 1; 3 4']
    df['location_for_create_labels'] = [ast.literal_eval(f'[]')] * len(df)
    for i in range(len(df)):
        lst = df.loc[i, 'location']
        if lst:
            new_lst = ';'.join(lst)
            df.loc[i, 'location_for_create_labels'] = ast.literal_eval(f'[["{new_lst}"]]')
    # create labels
    truths = []
    for location_list in df['location_for_create_labels'].values:
        truth = []
        if len(location_list) > 0:
            location = location_list[0]
            for loc in [s.split() for s in location.split(';')]:
                start, end = int(loc[0]), int(loc[1])
                truth.append([start, end])
        truths.append(truth)
    return truths


def get_char_probs(texts, predictions, tokenizer):
    results = [np.zeros(len(t)) for t in texts]
    for i, (text, prediction) in enumerate(zip(texts, predictions)):
        encoded = tokenizer(text, 
                            add_special_tokens=True,
                            return_offsets_mapping=True)
        for idx, (offset_mapping, pred) in enumerate(zip(encoded['offset_mapping'], prediction)):
            start = offset_mapping[0]
            end = offset_mapping[1]
            results[i][start:end] = pred
    return results


def get_results(char_probs, th=0.5):
    results = []
    for char_prob in char_probs:
        result = np.where(char_prob >= th)[0] + 1
        result = [list(g) for _, g in itertools.groupby(result, key=lambda n, c=itertools.count(): n - next(c))]
        result = [f"{min(r)} {max(r)}" for r in result]
        result = ";".join(result)
        results.append(result)
    return results


def get_predictions(results):
    predictions = []
    for result in results:
        prediction = []
        if result != "":
            for loc in [s.split() for s in result.split(';')]:
                start, end = int(loc[0]), int(loc[1])
                prediction.append([start, end])
        predictions.append(prediction)
    return predictions

# Utils

In [None]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = span_micro_f1(y_true, y_pred)
    return score


def get_logger(filename=OUTPUT_DIR+'train'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

# Data Loading

In [None]:
# ====================================================
# Data Loading
# ====================================================
train = pd.read_csv('../input/nbme-score-clinical-patient-notes/train.csv')
train['annotation'] = train['annotation'].apply(ast.literal_eval)
train['location'] = train['location'].apply(ast.literal_eval)
features = pd.read_csv('../input/nbme-score-clinical-patient-notes/features.csv')
def preprocess_features(features):
    features.loc[27, 'feature_text'] = "Last-Pap-smear-1-year-ago"
    return features
features = preprocess_features(features)
patient_notes = pd.read_csv('../input/nbme-score-clinical-patient-notes/patient_notes.csv')

print(f"train.shape: {train.shape}")
display(train.head())
print(f"features.shape: {features.shape}")
display(features.head())
print(f"patient_notes.shape: {patient_notes.shape}")
display(patient_notes.head())

train.shape: (14300, 6)


Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location
0,00016_000,0,16,0,[dad with recent heart attcak],[696 724]
1,00016_001,0,16,1,"[mom with ""thyroid disease]",[668 693]
2,00016_002,0,16,2,[chest pressure],[203 217]
3,00016_003,0,16,3,"[intermittent episodes, episode]","[70 91, 176 183]"
4,00016_004,0,16,4,[felt as if he were going to pass out],[222 258]


features.shape: (143, 3)


Unnamed: 0,feature_num,case_num,feature_text
0,0,0,Family-history-of-MI-OR-Family-history-of-myoc...
1,1,0,Family-history-of-thyroid-disorder
2,2,0,Chest-pressure
3,3,0,Intermittent-symptoms
4,4,0,Lightheaded


patient_notes.shape: (42146, 3)


Unnamed: 0,pn_num,case_num,pn_history
0,0,0,"17-year-old male, has come to the student heal..."
1,1,0,17 yo male with recurrent palpitations for the...
2,2,0,Dillon Cleveland is a 17 y.o. male patient wit...
3,3,0,a 17 yo m c/o palpitation started 3 mos ago; \...
4,4,0,17yo male with no pmh here for evaluation of p...


In [None]:
train = train.merge(features, on=['feature_num', 'case_num'], how='left')
train = train.merge(patient_notes, on=['pn_num', 'case_num'], how='left')
display(train.head())

Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location,feature_text,pn_history
0,00016_000,0,16,0,[dad with recent heart attcak],[696 724],Family-history-of-MI-OR-Family-history-of-myoc...,HPI: 17yo M presents with palpitations. Patien...
1,00016_001,0,16,1,"[mom with ""thyroid disease]",[668 693],Family-history-of-thyroid-disorder,HPI: 17yo M presents with palpitations. Patien...
2,00016_002,0,16,2,[chest pressure],[203 217],Chest-pressure,HPI: 17yo M presents with palpitations. Patien...
3,00016_003,0,16,3,"[intermittent episodes, episode]","[70 91, 176 183]",Intermittent-symptoms,HPI: 17yo M presents with palpitations. Patien...
4,00016_004,0,16,4,[felt as if he were going to pass out],[222 258],Lightheaded,HPI: 17yo M presents with palpitations. Patien...


In [None]:
# incorrect annotation
train.loc[338, 'annotation'] = ast.literal_eval('[["father heart attack"]]')
train.loc[338, 'location'] = ast.literal_eval('[["764 783"]]')

train.loc[621, 'annotation'] = ast.literal_eval('[["for the last 2-3 months"]]')
train.loc[621, 'location'] = ast.literal_eval('[["77 100"]]')

train.loc[655, 'annotation'] = ast.literal_eval('[["no heat intolerance"], ["no cold intolerance"]]')
train.loc[655, 'location'] = ast.literal_eval('[["285 292;301 312"], ["285 287;296 312"]]')

train.loc[1262, 'annotation'] = ast.literal_eval('[["mother thyroid problem"]]')
train.loc[1262, 'location'] = ast.literal_eval('[["551 557;565 580"]]')

train.loc[1265, 'annotation'] = ast.literal_eval('[[\'felt like he was going to "pass out"\']]')
train.loc[1265, 'location'] = ast.literal_eval('[["131 135;181 212"]]')

train.loc[1396, 'annotation'] = ast.literal_eval('[["stool , with no blood"]]')
train.loc[1396, 'location'] = ast.literal_eval('[["259 280"]]')

train.loc[1591, 'annotation'] = ast.literal_eval('[["diarrhoe non blooody"]]')
train.loc[1591, 'location'] = ast.literal_eval('[["176 184;201 212"]]')

train.loc[1615, 'annotation'] = ast.literal_eval('[["diarrhea for last 2-3 days"]]')
train.loc[1615, 'location'] = ast.literal_eval('[["249 257;271 288"]]')

train.loc[1664, 'annotation'] = ast.literal_eval('[["no vaginal discharge"]]')
train.loc[1664, 'location'] = ast.literal_eval('[["822 824;907 924"]]')

train.loc[1714, 'annotation'] = ast.literal_eval('[["started about 8-10 hours ago"]]')
train.loc[1714, 'location'] = ast.literal_eval('[["101 129"]]')

train.loc[1929, 'annotation'] = ast.literal_eval('[["no blood in the stool"]]')
train.loc[1929, 'location'] = ast.literal_eval('[["531 539;549 561"]]')

train.loc[2134, 'annotation'] = ast.literal_eval('[["last sexually active 9 months ago"]]')
train.loc[2134, 'location'] = ast.literal_eval('[["540 560;581 593"]]')

train.loc[2191, 'annotation'] = ast.literal_eval('[["right lower quadrant pain"]]')
train.loc[2191, 'location'] = ast.literal_eval('[["32 57"]]')

train.loc[2553, 'annotation'] = ast.literal_eval('[["diarrhoea no blood"]]')
train.loc[2553, 'location'] = ast.literal_eval('[["308 317;376 384"]]')

train.loc[3124, 'annotation'] = ast.literal_eval('[["sweating"]]')
train.loc[3124, 'location'] = ast.literal_eval('[["549 557"]]')

train.loc[3858, 'annotation'] = ast.literal_eval('[["previously as regular"], ["previously eveyr 28-29 days"], ["previously lasting 5 days"], ["previously regular flow"]]')
train.loc[3858, 'location'] = ast.literal_eval('[["102 123"], ["102 112;125 141"], ["102 112;143 157"], ["102 112;159 171"]]')

train.loc[4373, 'annotation'] = ast.literal_eval('[["for 2 months"]]')
train.loc[4373, 'location'] = ast.literal_eval('[["33 45"]]')

train.loc[4763, 'annotation'] = ast.literal_eval('[["35 year old"]]')
train.loc[4763, 'location'] = ast.literal_eval('[["5 16"]]')

train.loc[4782, 'annotation'] = ast.literal_eval('[["darker brown stools"]]')
train.loc[4782, 'location'] = ast.literal_eval('[["175 194"]]')

train.loc[4908, 'annotation'] = ast.literal_eval('[["uncle with peptic ulcer"]]')
train.loc[4908, 'location'] = ast.literal_eval('[["700 723"]]')

train.loc[6016, 'annotation'] = ast.literal_eval('[["difficulty falling asleep"]]')
train.loc[6016, 'location'] = ast.literal_eval('[["225 250"]]')

train.loc[6192, 'annotation'] = ast.literal_eval('[["helps to take care of aging mother and in-laws"]]')
train.loc[6192, 'location'] = ast.literal_eval('[["197 218;236 260"]]')

train.loc[6380, 'annotation'] = ast.literal_eval('[["No hair changes"], ["No skin changes"], ["No GI changes"], ["No palpitations"], ["No excessive sweating"]]')
train.loc[6380, 'location'] = ast.literal_eval('[["480 482;507 519"], ["480 482;499 503;512 519"], ["480 482;521 531"], ["480 482;533 545"], ["480 482;564 582"]]')

train.loc[6562, 'annotation'] = ast.literal_eval('[["stressed due to taking care of her mother"], ["stressed due to taking care of husbands parents"]]')
train.loc[6562, 'location'] = ast.literal_eval('[["290 320;327 337"], ["290 320;342 358"]]')

train.loc[6862, 'annotation'] = ast.literal_eval('[["stressor taking care of many sick family members"]]')
train.loc[6862, 'location'] = ast.literal_eval('[["288 296;324 363"]]')

train.loc[7022, 'annotation'] = ast.literal_eval('[["heart started racing and felt numbness for the 1st time in her finger tips"]]')
train.loc[7022, 'location'] = ast.literal_eval('[["108 182"]]')

train.loc[7422, 'annotation'] = ast.literal_eval('[["first started 5 yrs"]]')
train.loc[7422, 'location'] = ast.literal_eval('[["102 121"]]')

train.loc[8876, 'annotation'] = ast.literal_eval('[["No shortness of breath"]]')
train.loc[8876, 'location'] = ast.literal_eval('[["481 483;533 552"]]')

train.loc[9027, 'annotation'] = ast.literal_eval('[["recent URI"], ["nasal stuffines, rhinorrhea, for 3-4 days"]]')
train.loc[9027, 'location'] = ast.literal_eval('[["92 102"], ["123 164"]]')

train.loc[9938, 'annotation'] = ast.literal_eval('[["irregularity with her cycles"], ["heavier bleeding"], ["changes her pad every couple hours"]]')
train.loc[9938, 'location'] = ast.literal_eval('[["89 117"], ["122 138"], ["368 402"]]')

train.loc[9973, 'annotation'] = ast.literal_eval('[["gaining 10-15 lbs"]]')
train.loc[9973, 'location'] = ast.literal_eval('[["344 361"]]')

train.loc[10513, 'annotation'] = ast.literal_eval('[["weight gain"], ["gain of 10-16lbs"]]')
train.loc[10513, 'location'] = ast.literal_eval('[["600 611"], ["607 623"]]')

train.loc[11551, 'annotation'] = ast.literal_eval('[["seeing her son knows are not real"]]')
train.loc[11551, 'location'] = ast.literal_eval('[["386 400;443 461"]]')

train.loc[11677, 'annotation'] = ast.literal_eval('[["saw him once in the kitchen after he died"]]')
train.loc[11677, 'location'] = ast.literal_eval('[["160 201"]]')

train.loc[12124, 'annotation'] = ast.literal_eval('[["tried Ambien but it didnt work"]]')
train.loc[12124, 'location'] = ast.literal_eval('[["325 337;349 366"]]')

train.loc[12279, 'annotation'] = ast.literal_eval('[["heard what she described as a party later than evening these things did not actually happen"]]')
train.loc[12279, 'location'] = ast.literal_eval('[["405 459;488 524"]]')

train.loc[12289, 'annotation'] = ast.literal_eval('[["experienced seeing her son at the kitchen table these things did not actually happen"]]')
train.loc[12289, 'location'] = ast.literal_eval('[["353 400;488 524"]]')

train.loc[13238, 'annotation'] = ast.literal_eval('[["SCRACHY THROAT"], ["RUNNY NOSE"]]')
train.loc[13238, 'location'] = ast.literal_eval('[["293 307"], ["321 331"]]')

train.loc[13297, 'annotation'] = ast.literal_eval('[["without improvement when taking tylenol"], ["without improvement when taking ibuprofen"]]')
train.loc[13297, 'location'] = ast.literal_eval('[["182 221"], ["182 213;225 234"]]')

train.loc[13299, 'annotation'] = ast.literal_eval('[["yesterday"], ["yesterday"]]')
train.loc[13299, 'location'] = ast.literal_eval('[["79 88"], ["409 418"]]')

train.loc[13845, 'annotation'] = ast.literal_eval('[["headache global"], ["headache throughout her head"]]')
train.loc[13845, 'location'] = ast.literal_eval('[["86 94;230 236"], ["86 94;237 256"]]')

train.loc[14083, 'annotation'] = ast.literal_eval('[["headache generalized in her head"]]')
train.loc[14083, 'location'] = ast.literal_eval('[["56 64;156 179"]]')

In [None]:
train['annotation_length'] = train['annotation'].apply(len)
display(train['annotation_length'].value_counts())

1    8185
0    4399
2    1292
3     287
4      99
5      27
6       9
7       1
8       1
Name: annotation_length, dtype: int64

## Merge patient_notes w/ features

In [None]:
print(patient_notes.shape)
patient_notes = patient_notes.merge(features, on=['case_num'], how='left')
print(patient_notes.shape)
display(patient_notes.head())

(42146, 3)
(626902, 5)


Unnamed: 0,pn_num,case_num,pn_history,feature_num,feature_text
0,0,0,"17-year-old male, has come to the student heal...",0,Family-history-of-MI-OR-Family-history-of-myoc...
1,0,0,"17-year-old male, has come to the student heal...",1,Family-history-of-thyroid-disorder
2,0,0,"17-year-old male, has come to the student heal...",2,Chest-pressure
3,0,0,"17-year-old male, has come to the student heal...",3,Intermittent-symptoms
4,0,0,"17-year-old male, has come to the student heal...",4,Lightheaded


## ~~Remove pn_nums which are appeared in train from patient_notes~~

In [None]:
# print(patient_notes.shape)
# patient_notes = patient_notes[~patient_notes["pn_num"].isin(train["pn_num"].unique())].reset_index(drop=True)
# print(patient_notes.shape)

## Select one specific case_num

In [None]:
if CFG.wandbname.split("-")[-1] != "all":
    selected_case_num = int(CFG.wandbname.split("-")[-1])
    print(f"selected_case_num: {selected_case_num}")

    print(train.shape)
    train = train[train["case_num"]==selected_case_num].reset_index(drop=True)
    print(train.shape)

    print()

    print(patient_notes.shape)
    patient_notes = patient_notes[patient_notes["case_num"]==selected_case_num].reset_index(drop=True)
    print(patient_notes.shape)

    print()

    print(features.shape)
    features = features[features["case_num"]==selected_case_num].reset_index(drop=True)
    print(features.shape)

selected_case_num: 3
(14300, 9)
(1600, 9)

(626902, 5)
(156048, 5)

(143, 3)
(16, 3)


# CV split

In [None]:
# ====================================================
# CV split
# ====================================================
Fold = GroupKFold(n_splits=CFG.n_fold)
groups = train['pn_num'].values
for n, (train_index, val_index) in enumerate(Fold.split(train, train['location'], groups)):
    train.loc[val_index, 'fold'] = int(n)
train['fold'] = train['fold'].astype(int)
display(train.groupby('fold').size())

fold
0    320
1    320
2    320
3    320
4    320
dtype: int64

In [None]:
len(train)

1600

In [None]:
if CFG.debug:
    display(train.groupby('fold').size())
    if len(train) > 2000:
        train = train.sample(n=2000, random_state=0).reset_index(drop=True)
        display(train.groupby('fold').size())

# tokenizer

In [None]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

Downloading:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/474 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

# Dataset

In [None]:
# ====================================================
# Define max_len
# ====================================================
for text_col in ['pn_history']:
    pn_history_lengths = []
    tk0 = tqdm(patient_notes[text_col].fillna("").values, total=len(patient_notes))
    for text in tk0:
        length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
        pn_history_lengths.append(length)
    LOGGER.info(f'{text_col} max(lengths): {max(pn_history_lengths)}')

for text_col in ['feature_text']:
    features_lengths = []
    tk0 = tqdm(features[text_col].fillna("").values, total=len(features))
    for text in tk0:
        length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
        features_lengths.append(length)
    LOGGER.info(f'{text_col} max(lengths): {max(features_lengths)}')

CFG.max_len = max(pn_history_lengths) + max(features_lengths) + 3 # cls & sep & sep
LOGGER.info(f"max_len: {CFG.max_len}")

  0%|          | 0/156048 [00:00<?, ?it/s]

pn_history max(lengths): 393


  0%|          | 0/16 [00:00<?, ?it/s]

feature_text max(lengths): 25
max_len: 421


In [None]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text, feature_text):
    inputs = cfg.tokenizer(text, feature_text, 
                           add_special_tokens=True,
                           max_length=CFG.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


def create_label(cfg, text, annotation_length, location_list):
    encoded = cfg.tokenizer(text,
                            add_special_tokens=True,
                            max_length=CFG.max_len,
                            padding="max_length",
                            return_offsets_mapping=True)
    offset_mapping = encoded['offset_mapping']
    ignore_idxes = np.where(np.array(encoded.sequence_ids()) != 0)[0]
    label = np.zeros(len(offset_mapping))
    label[ignore_idxes] = -1
    if annotation_length != 0:
        for location in location_list:
            for loc in [s.split() for s in location.split(';')]:
                start_idx = -1
                end_idx = -1
                start, end = int(loc[0]), int(loc[1])
                for idx in range(len(offset_mapping)):
                    if (start_idx == -1) & (start < offset_mapping[idx][0]):
                        start_idx = idx - 1
                    if (end_idx == -1) & (end <= offset_mapping[idx][1]):
                        end_idx = idx + 1
                if start_idx == -1:
                    start_idx = end_idx
                if (start_idx != -1) & (end_idx != -1):
                    label[start_idx:end_idx] = 1
    return torch.tensor(label, dtype=torch.float)


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.feature_texts = df['feature_text'].values
        self.pn_historys = df['pn_history'].values
        self.annotation_lengths = df['annotation_length'].values
        self.locations = df['location'].values

    def __len__(self):
        return len(self.feature_texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, 
                               self.pn_historys[item], 
                               self.feature_texts[item])
        label = create_label(self.cfg, 
                             self.pn_historys[item], 
                             self.annotation_lengths[item], 
                             self.locations[item])
        return inputs, label

# Model

In [None]:
# ====================================================
# Model
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, 1)
        self._init_weights(self.fc)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        return last_hidden_states

    def forward(self, inputs):
        feature = self.feature(inputs)
        output = self.fc(self.fc_dropout(feature))
        return output

# Helper functions

In [None]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        loss = torch.masked_select(loss, labels.view(-1, 1) != -1).mean()
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))
        if CFG.wandb:
            wandb.log({f"[fold{fold}] loss": losses.val,
                       f"[fold{fold}] lr": scheduler.get_lr()[0]})
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        loss = torch.masked_select(loss, labels.view(-1, 1) != -1).mean()
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [None]:
# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['fold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['fold'] == fold].reset_index(drop=True)
    valid_texts = valid_folds['pn_history'].values
    valid_labels = create_labels_for_scoring(valid_folds)
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': weight_decay},
            {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': 0.0},
            {'params': [p for n, p in model.named_parameters() if "model" not in n],
             'lr': decoder_lr, 'weight_decay': 0.0}
        ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler=='linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler=='cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss(reduction="none")
    
    best_score = 0.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        predictions = predictions.reshape((len(valid_folds), CFG.max_len))
        
        # scoring
        char_probs = get_char_probs(valid_texts, predictions, CFG.tokenizer)
        results = get_results(char_probs, th=0.5)
        preds = get_predictions(results)
        score = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')
        if CFG.wandb:
            wandb.log({f"[fold{fold}] epoch": epoch+1, 
                       f"[fold{fold}] avg_train_loss": avg_loss, 
                       f"[fold{fold}] avg_val_loss": avg_val_loss,
                       f"[fold{fold}] score": score})
        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds[[i for i in range(CFG.max_len)]] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [None]:
if __name__ == '__main__':
    
    def get_result(oof_df, cv_score=False, case_num=None):
        if case_num is not None:
            oof_df = oof_df[oof_df["case_num"]==case_num].reset_index(drop=True)
        labels = create_labels_for_scoring(oof_df)
        predictions = oof_df[[i for i in range(CFG.max_len)]].values
        char_probs = get_char_probs(oof_df['pn_history'].values, predictions, CFG.tokenizer)
        results = get_results(char_probs, th=0.5)
        preds = get_predictions(results)
        score = get_score(labels, preds)
        if case_num is not None:
            LOGGER.info(f'Score of case_num {case_num}: {score:<.4f}')
            if cv_score:
                wandb.log({f'CV score of case_num {case_num}': score})
        else:
            LOGGER.info(f'Score: {score:<.4f}')
            if cv_score:
                wandb.log({f'CV score': score})
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        if CFG.cv_case_num:
            for i in range(10):
                get_result(oof_df, cv_score=True, i)
        else:
            get_result(oof_df, cv_score=True)
        oof_df.to_pickle(OUTPUT_DIR+'oof_df.pkl')
        
    if CFG.wandb:
        wandb.finish()



Downloading:   0%|          | 0.00/533M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/106] Elapsed 0m 1s (remain 2m 20s) Loss: 0.6279(0.6279) Grad: inf  LR: 0.00002000  
Epoch: [1][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0210(0.0701) Grad: 4495.5669  LR: 0.00001828  
Epoch: [1][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0126(0.0674) Grad: 3121.7534  LR: 0.00001811  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 12s) Loss: 0.0087(0.0087) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0071(0.0129) 


Epoch 1 - avg_train_loss: 0.0674  avg_val_loss: 0.0129  time: 102s
Epoch 1 - Score: 0.8296
Epoch 1 - Save Best Score: 0.8296 Model


Epoch: [2][0/106] Elapsed 0m 2s (remain 4m 59s) Loss: 0.0097(0.0097) Grad: 21135.6660  LR: 0.00001808  
Epoch: [2][100/106] Elapsed 1m 30s (remain 0m 4s) Loss: 0.0092(0.0135) Grad: 29483.6875  LR: 0.00001344  
Epoch: [2][105/106] Elapsed 1m 34s (remain 0m 0s) Loss: 0.0078(0.0133) Grad: 23175.9531  LR: 0.00001316  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0132(0.0132) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0070(0.0118) 


Epoch 2 - avg_train_loss: 0.0133  avg_val_loss: 0.0118  time: 103s
Epoch 2 - Score: 0.8537
Epoch 2 - Save Best Score: 0.8537 Model


Epoch: [3][0/106] Elapsed 0m 1s (remain 1m 56s) Loss: 0.0147(0.0147) Grad: 13955.6924  LR: 0.00001310  
Epoch: [3][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0091(0.0107) Grad: 22963.4961  LR: 0.00000729  
Epoch: [3][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0045(0.0105) Grad: 18515.9590  LR: 0.00000701  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0116(0.0116) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0077(0.0109) 


Epoch 3 - avg_train_loss: 0.0105  avg_val_loss: 0.0109  time: 102s
Epoch 3 - Score: 0.8549
Epoch 3 - Save Best Score: 0.8549 Model


Epoch: [4][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0016(0.0016) Grad: 17964.4004  LR: 0.00000695  
Epoch: [4][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0044(0.0093) Grad: 26159.1504  LR: 0.00000217  
Epoch: [4][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0117(0.0094) Grad: 25537.5254  LR: 0.00000199  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0119(0.0119) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0077(0.0108) 


Epoch 4 - avg_train_loss: 0.0094  avg_val_loss: 0.0108  time: 101s
Epoch 4 - Score: 0.8578
Epoch 4 - Save Best Score: 0.8578 Model


Epoch: [5][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0031(0.0031) Grad: 14551.7500  LR: 0.00000196  
Epoch: [5][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0013(0.0088) Grad: 4390.5815  LR: 0.00000001  
Epoch: [5][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0026(0.0087) Grad: 15359.9463  LR: 0.00000000  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 14s) Loss: 0.0122(0.0122) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0082(0.0108) 


Epoch 5 - avg_train_loss: 0.0087  avg_val_loss: 0.0108  time: 101s
Epoch 5 - Score: 0.8568
Score: 0.8578
Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/106] Elapsed 0m 1s (remain 1m 54s) Loss: 0.9159(0.9159) Grad: inf  LR: 0.00002000  
Epoch: [1][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0070(0.0925) Grad: 1177.6395  LR: 0.00001828  
Epoch: [1][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0153(0.0886) Grad: 3961.7026  LR: 0.00001811  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0057(0.0057) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0253(0.0156) 


Epoch 1 - avg_train_loss: 0.0886  avg_val_loss: 0.0156  time: 101s
Epoch 1 - Score: 0.8381
Epoch 1 - Save Best Score: 0.8381 Model


Epoch: [2][0/106] Elapsed 0m 1s (remain 1m 59s) Loss: 0.0183(0.0183) Grad: 62141.4688  LR: 0.00001808  
Epoch: [2][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0166(0.0147) Grad: 55478.5586  LR: 0.00001344  
Epoch: [2][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0110(0.0146) Grad: 63271.3711  LR: 0.00001316  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0061(0.0061) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0232(0.0143) 


Epoch 2 - avg_train_loss: 0.0146  avg_val_loss: 0.0143  time: 101s
Epoch 2 - Score: 0.8548
Epoch 2 - Save Best Score: 0.8548 Model


Epoch: [3][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0163(0.0163) Grad: 41543.2773  LR: 0.00001310  
Epoch: [3][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0098(0.0118) Grad: 23837.8008  LR: 0.00000729  
Epoch: [3][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0089(0.0123) Grad: 25388.0996  LR: 0.00000701  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0064(0.0064) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0226(0.0136) 


Epoch 3 - avg_train_loss: 0.0123  avg_val_loss: 0.0136  time: 101s
Epoch 3 - Score: 0.8669
Epoch 3 - Save Best Score: 0.8669 Model


Epoch: [4][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0097(0.0097) Grad: 7877.4395  LR: 0.00000695  
Epoch: [4][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0235(0.0109) Grad: 43079.3867  LR: 0.00000217  
Epoch: [4][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0087(0.0108) Grad: 23467.3828  LR: 0.00000199  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0067(0.0067) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0222(0.0133) 


Epoch 4 - avg_train_loss: 0.0108  avg_val_loss: 0.0133  time: 101s
Epoch 4 - Score: 0.8687
Epoch 4 - Save Best Score: 0.8687 Model


Epoch: [5][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0020(0.0020) Grad: 5786.6040  LR: 0.00000196  
Epoch: [5][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0088(0.0107) Grad: 19123.2363  LR: 0.00000001  
Epoch: [5][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0072(0.0105) Grad: 14926.1318  LR: 0.00000000  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0069(0.0069) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0222(0.0133) 


Epoch 5 - avg_train_loss: 0.0105  avg_val_loss: 0.0133  time: 101s
Epoch 5 - Score: 0.8687
Epoch 5 - Save Best Score: 0.8687 Model
Score: 0.8687
Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/106] Elapsed 0m 1s (remain 1m 54s) Loss: 1.0550(1.0550) Grad: inf  LR: 0.00002000  
Epoch: [1][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0169(0.1035) Grad: 2867.1924  LR: 0.00001828  
Epoch: [1][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0101(0.0994) Grad: 892.8994  LR: 0.00001811  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0293(0.0293) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0067(0.0160) 


Epoch 1 - avg_train_loss: 0.0994  avg_val_loss: 0.0160  time: 101s
Epoch 1 - Score: 0.7995
Epoch 1 - Save Best Score: 0.7995 Model


Epoch: [2][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0113(0.0113) Grad: 23341.8652  LR: 0.00001808  
Epoch: [2][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0260(0.0182) Grad: 77014.2188  LR: 0.00001344  
Epoch: [2][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0098(0.0178) Grad: 38829.4023  LR: 0.00001316  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0331(0.0331) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0062(0.0145) 


Epoch 2 - avg_train_loss: 0.0178  avg_val_loss: 0.0145  time: 101s
Epoch 2 - Score: 0.8327
Epoch 2 - Save Best Score: 0.8327 Model


Epoch: [3][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0084(0.0084) Grad: 31141.9707  LR: 0.00001310  
Epoch: [3][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0250(0.0136) Grad: 31175.3477  LR: 0.00000729  
Epoch: [3][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0164(0.0136) Grad: 57255.1758  LR: 0.00000701  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0310(0.0310) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0056(0.0136) 


Epoch 3 - avg_train_loss: 0.0136  avg_val_loss: 0.0136  time: 101s
Epoch 3 - Score: 0.8494
Epoch 3 - Save Best Score: 0.8494 Model


Epoch: [4][0/106] Elapsed 0m 1s (remain 2m 0s) Loss: 0.0225(0.0225) Grad: 16118.9707  LR: 0.00000695  
Epoch: [4][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0019(0.0117) Grad: 4685.7476  LR: 0.00000217  
Epoch: [4][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0126(0.0118) Grad: 37146.6680  LR: 0.00000199  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0344(0.0344) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0057(0.0135) 


Epoch 4 - avg_train_loss: 0.0118  avg_val_loss: 0.0135  time: 101s
Epoch 4 - Score: 0.8523
Epoch 4 - Save Best Score: 0.8523 Model


Epoch: [5][0/106] Elapsed 0m 1s (remain 2m 1s) Loss: 0.0093(0.0093) Grad: 24582.4609  LR: 0.00000196  
Epoch: [5][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0150(0.0113) Grad: 31469.4648  LR: 0.00000001  
Epoch: [5][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0080(0.0115) Grad: 51023.8320  LR: 0.00000000  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0350(0.0350) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0056(0.0135) 


Epoch 5 - avg_train_loss: 0.0115  avg_val_loss: 0.0135  time: 101s
Epoch 5 - Score: 0.8532
Epoch 5 - Save Best Score: 0.8532 Model
Score: 0.8532
Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/106] Elapsed 0m 1s (remain 1m 53s) Loss: 0.7359(0.7359) Grad: inf  LR: 0.00002000  
Epoch: [1][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0314(0.0843) Grad: 3911.1912  LR: 0.00001828  
Epoch: [1][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0082(0.0809) Grad: 1363.8464  LR: 0.00001811  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0039(0.0039) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0080(0.0158) 


Epoch 1 - avg_train_loss: 0.0809  avg_val_loss: 0.0158  time: 101s
Epoch 1 - Score: 0.8494
Epoch 1 - Save Best Score: 0.8494 Model


Epoch: [2][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0137(0.0137) Grad: 28545.4688  LR: 0.00001808  
Epoch: [2][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0267(0.0144) Grad: 21199.9238  LR: 0.00001344  
Epoch: [2][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0082(0.0144) Grad: 25266.7461  LR: 0.00001316  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0026(0.0026) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0072(0.0150) 


Epoch 2 - avg_train_loss: 0.0144  avg_val_loss: 0.0150  time: 101s
Epoch 2 - Score: 0.8591
Epoch 2 - Save Best Score: 0.8591 Model


Epoch: [3][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0325(0.0325) Grad: 37477.6406  LR: 0.00001310  
Epoch: [3][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0063(0.0124) Grad: 27401.1836  LR: 0.00000729  
Epoch: [3][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0200(0.0123) Grad: 40906.3711  LR: 0.00000701  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0023(0.0023) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0059(0.0143) 


Epoch 3 - avg_train_loss: 0.0123  avg_val_loss: 0.0143  time: 101s
Epoch 3 - Score: 0.8670
Epoch 3 - Save Best Score: 0.8670 Model


Epoch: [4][0/106] Elapsed 0m 1s (remain 1m 59s) Loss: 0.0097(0.0097) Grad: 13510.2939  LR: 0.00000695  
Epoch: [4][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0342(0.0116) Grad: 36580.0039  LR: 0.00000217  
Epoch: [4][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0202(0.0115) Grad: 29950.1660  LR: 0.00000199  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0023(0.0023) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0054(0.0142) 


Epoch 4 - avg_train_loss: 0.0115  avg_val_loss: 0.0142  time: 101s
Epoch 4 - Score: 0.8682
Epoch 4 - Save Best Score: 0.8682 Model


Epoch: [5][0/106] Elapsed 0m 1s (remain 1m 56s) Loss: 0.0018(0.0018) Grad: 4173.5464  LR: 0.00000196  
Epoch: [5][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0079(0.0110) Grad: 39231.8906  LR: 0.00000001  
Epoch: [5][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0193(0.0110) Grad: 24880.0254  LR: 0.00000000  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0022(0.0022) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0053(0.0142) 


Epoch 5 - avg_train_loss: 0.0110  avg_val_loss: 0.0142  time: 101s
Epoch 5 - Score: 0.8711
Epoch 5 - Save Best Score: 0.8711 Model
Score: 0.8711
Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/106] Elapsed 0m 1s (remain 1m 54s) Loss: 0.8446(0.8446) Grad: inf  LR: 0.00002000  
Epoch: [1][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0415(0.0908) Grad: 3020.6489  LR: 0.00001828  
Epoch: [1][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0172(0.0874) Grad: 1625.6667  LR: 0.00001811  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0175(0.0175) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0344(0.0192) 


Epoch 1 - avg_train_loss: 0.0874  avg_val_loss: 0.0192  time: 101s
Epoch 1 - Score: 0.8050
Epoch 1 - Save Best Score: 0.8050 Model


Epoch: [2][0/106] Elapsed 0m 1s (remain 1m 59s) Loss: 0.0258(0.0258) Grad: 63483.4453  LR: 0.00001808  
Epoch: [2][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0219(0.0148) Grad: 9961.6973  LR: 0.00001344  
Epoch: [2][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0103(0.0149) Grad: 19640.8984  LR: 0.00001316  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0139(0.0139) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0370(0.0165) 


Epoch 2 - avg_train_loss: 0.0149  avg_val_loss: 0.0165  time: 101s
Epoch 2 - Score: 0.8302
Epoch 2 - Save Best Score: 0.8302 Model


Epoch: [3][0/106] Elapsed 0m 1s (remain 2m 0s) Loss: 0.0204(0.0204) Grad: 21945.3203  LR: 0.00001310  
Epoch: [3][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0229(0.0126) Grad: 26060.7363  LR: 0.00000729  
Epoch: [3][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0065(0.0126) Grad: 7852.1333  LR: 0.00000701  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0116(0.0116) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0356(0.0152) 


Epoch 3 - avg_train_loss: 0.0126  avg_val_loss: 0.0152  time: 101s
Epoch 3 - Score: 0.8480
Epoch 3 - Save Best Score: 0.8480 Model


Epoch: [4][0/106] Elapsed 0m 1s (remain 1m 58s) Loss: 0.0139(0.0139) Grad: 37479.0820  LR: 0.00000695  
Epoch: [4][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0194(0.0116) Grad: 16289.8037  LR: 0.00000217  
Epoch: [4][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0072(0.0118) Grad: 17955.5879  LR: 0.00000199  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0107(0.0107) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0375(0.0149) 


Epoch 4 - avg_train_loss: 0.0118  avg_val_loss: 0.0149  time: 101s
Epoch 4 - Score: 0.8547
Epoch 4 - Save Best Score: 0.8547 Model


Epoch: [5][0/106] Elapsed 0m 1s (remain 1m 57s) Loss: 0.0094(0.0094) Grad: 50026.8711  LR: 0.00000196  
Epoch: [5][100/106] Elapsed 1m 28s (remain 0m 4s) Loss: 0.0036(0.0114) Grad: 8334.2441  LR: 0.00000001  
Epoch: [5][105/106] Elapsed 1m 33s (remain 0m 0s) Loss: 0.0136(0.0112) Grad: 20270.5762  LR: 0.00000000  
EVAL: [0/27] Elapsed 0m 0s (remain 0m 13s) Loss: 0.0104(0.0104) 
EVAL: [26/27] Elapsed 0m 7s (remain 0m 0s) Loss: 0.0375(0.0148) 


Epoch 5 - avg_train_loss: 0.0112  avg_val_loss: 0.0148  time: 101s
Epoch 5 - Score: 0.8558
Epoch 5 - Save Best Score: 0.8558 Model
Score: 0.8558
Score: 0.8615





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
[fold0] avg_train_loss,█▂▁▁▁
[fold0] avg_val_loss,█▄▂▁▁
[fold0] epoch,▁▃▅▆█
[fold0] loss,█▇▇▄▃▂▂▃▄▃▂▂▂▁▂▁▂▃▂▁▁▁▁▃▁▁▂▂▂▂▁▁▂▂▁▂▁▁▂▁
[fold0] lr,███████▇▇▇▇▇▆▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
[fold0] score,▁▇▇██
[fold1] avg_train_loss,█▁▁▁▁
[fold1] avg_val_loss,█▄▂▁▁
[fold1] epoch,▁▃▅▆█
[fold1] loss,█▃▃▂▂▂▂▂▂▁▁▁▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▂▁▁▂▁▁▁▁▁▁▁▁

0,1
[fold0] avg_train_loss,0.00869
[fold0] avg_val_loss,0.01077
[fold0] epoch,5.0
[fold0] loss,0.00263
[fold0] lr,0.0
[fold0] score,0.8568
[fold1] avg_train_loss,0.01051
[fold1] avg_val_loss,0.01329
[fold1] epoch,5.0
[fold1] loss,0.00715


# Post-processing

In [None]:
oof_df = pd.read_pickle(OUTPUT_DIR+'oof_df.pkl')
oof_df

Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location,feature_text,pn_history,annotation_length,fold,location_for_create_labels,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420
0,30260_300,3,30260,300,[fathers brother had a stomach ulcer],[604 639],FHx-of-PUD-OR-Family-history-of-peptic-ulcer-d...,"35 yo M with CC of ""my stomach is bothering me...",1,0,[604 639],0.105702,0.000320,0.000471,0.000869,0.000278,0.000392,0.000152,0.000193,0.000367,0.000875,0.000460,0.000439,0.000352,0.000133,0.000159,0.000184,0.000149,0.000268,0.000387,0.000310,0.000181,0.000237,0.000220,0.000165,0.000169,0.000179,0.000267,0.000286,0.000204,0.000155,0.000195,0.000303,0.000290,0.000218,0.000255,0.000268,0.000350,0.000198,0.000424,0.000786,0.000231,0.000164,0.000120,0.000236,0.000178,0.000178,0.000225,0.000243,0.000193,0.000099,0.000101,0.000111,0.000118,0.000167,0.000103,0.000171,0.000212,0.000167,0.000093,0.000133,0.000181,0.000166,0.000202,0.000143,0.000135,0.000112,0.000420,0.000262,0.000425,0.000291,0.000564,0.000908,0.000683,0.000188,0.000251,0.000308,0.000376,0.000420,0.000239,0.000159,0.000121,0.000196,0.000176,0.000191,0.000295,0.000650,0.000437,0.000814,0.000194,0.000172,0.000124,0.000192,0.000266,0.000306,0.000389,0.000170,0.000157,0.000116,0.000159,0.000244,0.000172,0.000169,0.000195,0.000187,0.000117,0.000147,0.000173,0.000209,0.000235,0.000263,0.000201,0.000122,0.000133,0.000137,0.000106,0.000169,0.000221,0.000141,0.000146,0.000174,0.000162,0.000125,0.000196,0.000136,0.000128,0.000251,0.000240,0.000085,0.000137,0.000080,0.000138,0.000166,0.000083,0.000237,0.000272,0.000150,0.000335,0.000411,0.000239,0.000128,0.000131,0.000236,0.000213,0.000334,0.000121,0.000147,0.000163,0.000156,0.000140,0.000136,0.000202,0.000204,0.000137,0.000127,0.000159,0.000175,0.000156,0.000162,0.000170,0.000169,0.000214,0.000205,0.000168,0.000160,0.000278,0.000517,0.000348,0.000215,0.000341,0.000300,0.000193,0.000208,0.071365,0.381955,0.714655,0.940559,0.961489,0.976286,0.984565,0.995282,0.994030,0.000579,0.000363,0.000418,0.000346,0.000404,0.000380,0.000389,0.000495,0.000161,0.000205,0.000305,0.000323,0.000378,0.000291,0.000390,0.000373,0.000380,0.000404,0.000465,0.000439,0.000231,0.000212,0.000258,0.000154,0.000274,0.000317,0.000514,0.000382,0.000236,0.000310,0.000182,0.000481,0.000389,0.000132,0.000182,0.000461,0.000420,0.000308,0.000187,0.000181,0.000437,0.009378,0.024919,0.013984,0.061834,0.067357,0.938723,0.921968,0.003829,0.044443,0.001944,0.074322,0.020300,0.448780,0.048047,0.643065,0.136743,0.890585,0.932516,0.623773,0.964318,0.909468,0.003712,0.068286,0.053860,0.085575,0.407254,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117
1,30260_301,3,30260,301,[Pain is above his umbilicus],[232 259],Epigastric-discomfort,"35 yo M with CC of ""my stomach is bothering me...",1,0,[232 259],0.064457,0.000329,0.000511,0.001012,0.000210,0.000330,0.000184,0.000386,0.001985,0.295651,0.283638,0.760343,0.013388,0.000207,0.000173,0.000183,0.000160,0.000418,0.001259,0.007346,0.000221,0.000247,0.000221,0.000170,0.000166,0.000171,0.000249,0.000289,0.000193,0.000153,0.000193,0.000319,0.000286,0.000213,0.000252,0.000263,0.000326,0.000187,0.000408,0.000766,0.000210,0.000148,0.000122,0.000928,0.000207,0.000184,0.000238,0.000267,0.000188,0.000097,0.000095,0.000103,0.000111,0.000155,0.000099,0.000165,0.000187,0.000150,0.000091,0.000126,0.000170,0.000169,0.000195,0.000134,0.000129,0.000169,0.956938,0.956069,0.978559,0.937322,0.986224,0.985724,0.985115,0.000554,0.000403,0.000473,0.000525,0.000580,0.000265,0.000153,0.000116,0.000163,0.000146,0.000155,0.000233,0.000416,0.000287,0.000512,0.000169,0.000147,0.000116,0.000168,0.000236,0.000264,0.000327,0.000156,0.000137,0.000108,0.000139,0.000199,0.000149,0.000149,0.000195,0.000377,0.000113,0.000136,0.000147,0.000165,0.000182,0.000200,0.000162,0.000108,0.000120,0.000130,0.000100,0.000142,0.000180,0.000126,0.000126,0.000146,0.000133,0.000104,0.000147,0.000116,0.000107,0.000170,0.000166,0.000077,0.000110,0.000074,0.000113,0.000130,0.000075,0.000170,0.000184,0.000126,0.000241,0.000294,0.000182,0.000112,0.000114,0.000192,0.000181,0.000268,0.000111,0.000134,0.000139,0.000126,0.000119,0.000122,0.000163,0.000166,0.000121,0.000111,0.000134,0.000145,0.000133,0.000139,0.000149,0.000150,0.000180,0.000169,0.000143,0.000136,0.000202,0.000362,0.000261,0.000165,0.000246,0.000188,0.000144,0.000141,0.000145,0.000154,0.000156,0.000174,0.000160,0.000200,0.000547,0.000533,0.000363,0.000175,0.000172,0.000189,0.000190,0.000222,0.000218,0.000205,0.000225,0.000114,0.000135,0.000207,0.000233,0.000288,0.000186,0.000245,0.000208,0.000203,0.000211,0.000264,0.000243,0.000144,0.000129,0.000145,0.000118,0.000175,0.000221,0.000341,0.000257,0.000182,0.000240,0.000148,0.000637,0.000623,0.000129,0.000145,0.000322,0.000307,0.000242,0.000142,0.000150,0.000714,0.651495,0.679727,0.728392,0.687279,0.392831,0.433541,0.471241,0.407914,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117
2,30260_302,3,30260,302,[black stools],[310 322],Darker-bowel-movements,"35 yo M with CC of ""my stomach is bothering me...",1,0,[310 322],0.082862,0.000326,0.000498,0.000640,0.000159,0.000180,0.000132,0.000191,0.000294,0.000468,0.000338,0.000333,0.000332,0.000131,0.000165,0.000183,0.000147,0.000204,0.000253,0.000219,0.000170,0.000227,0.000209,0.000165,0.000162,0.000166,0.000222,0.000253,0.000189,0.000156,0.000200,0.000297,0.000285,0.000216,0.000250,0.000268,0.000325,0.000195,0.000402,0.000778,0.000231,0.000164,0.000119,0.000176,0.000158,0.000165,0.000215,0.000231,0.000171,0.000098,0.000100,0.000110,0.000120,0.000161,0.000105,0.000176,0.000217,0.000169,0.000096,0.000134,0.000185,0.000174,0.000212,0.000147,0.000139,0.000112,0.000259,0.000224,0.000366,0.000308,0.000540,0.000757,0.000611,0.000179,0.000228,0.000284,0.000310,0.000362,0.000227,0.000159,0.000133,0.000306,0.000317,0.000656,0.028602,0.986764,0.991719,0.988943,0.000346,0.000203,0.000133,0.000207,0.000287,0.000325,0.000398,0.000175,0.000162,0.000115,0.000148,0.000217,0.000159,0.000154,0.000163,0.000144,0.000113,0.000150,0.000163,0.000186,0.000206,0.000241,0.000193,0.000119,0.000125,0.000128,0.000105,0.000169,0.000219,0.000143,0.000147,0.000171,0.000152,0.000119,0.000174,0.000138,0.000132,0.000233,0.000236,0.000086,0.000128,0.000081,0.000133,0.000157,0.000084,0.000224,0.000238,0.000145,0.000327,0.000383,0.000220,0.000125,0.000131,0.000221,0.000205,0.000309,0.000119,0.000148,0.000160,0.000149,0.000136,0.000129,0.000187,0.000192,0.000136,0.000129,0.000151,0.000170,0.000155,0.000160,0.000174,0.000165,0.000208,0.000201,0.000163,0.000158,0.000231,0.000467,0.000301,0.000198,0.000277,0.000208,0.000160,0.000160,0.000170,0.000177,0.000181,0.000190,0.000173,0.000216,0.000443,0.000547,0.000395,0.000191,0.000197,0.000227,0.000228,0.000279,0.000275,0.000251,0.000277,0.000128,0.000162,0.000264,0.000278,0.000340,0.000248,0.000327,0.000290,0.000297,0.000279,0.000360,0.000328,0.000166,0.000142,0.000165,0.000126,0.000219,0.000257,0.000470,0.000351,0.000198,0.000244,0.000143,0.000261,0.000246,0.000126,0.000156,0.000376,0.000362,0.000275,0.000159,0.000171,0.000688,0.881473,0.888479,0.801257,0.851074,0.694631,0.004125,0.019111,0.011949,0.406247,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117
3,30260_303,3,30260,303,[takes ibuprofen],[386 401],NSAID-use-OR-Nonsteroidal-anti-inflammatory-dr...,"35 yo M with CC of ""my stomach is bothering me...",1,0,[386 401],0.130753,0.000516,0.000793,0.001133,0.000194,0.000247,0.000154,0.000233,0.000504,0.001478,0.000772,0.000889,0.000601,0.000151,0.000174,0.000193,0.000161,0.000332,0.000603,0.000407,0.000214,0.000283,0.000252,0.000200,0.000193,0.000204,0.000280,0.000326,0.000240,0.000184,0.000240,0.000365,0.000342,0.000256,0.000299,0.000322,0.000451,0.000235,0.000550,0.001027,0.000250,0.000174,0.000129,0.000236,0.000202,0.000205,0.000250,0.000266,0.000209,0.000101,0.000104,0.000112,0.000124,0.000189,0.000111,0.000186,0.000232,0.000181,0.000101,0.000155,0.000203,0.000185,0.000223,0.000150,0.000140,0.000116,0.000280,0.000192,0.000324,0.000267,0.000439,0.000715,0.000543,0.000187,0.000249,0.000324,0.000373,0.000423,0.000238,0.000162,0.000129,0.000217,0.000195,0.000210,0.000341,0.000695,0.000474,0.000881,0.000204,0.000180,0.000135,0.000221,0.000329,0.000358,0.000441,0.000185,0.000170,0.000130,0.000205,0.000310,0.000261,0.000242,0.000281,0.000260,0.000164,0.000277,0.070420,0.981613,0.976449,0.976692,0.975361,0.000418,0.000454,0.000351,0.000144,0.000286,0.000334,0.000160,0.000160,0.000189,0.000174,0.000140,0.000232,0.000157,0.000151,0.000298,0.000270,0.000092,0.000159,0.000086,0.000170,0.000197,0.000090,0.000289,0.000332,0.000161,0.000346,0.000426,0.000273,0.000140,0.000152,0.000280,0.000236,0.000363,0.000131,0.000158,0.000178,0.000170,0.000149,0.000156,0.000232,0.000229,0.000146,0.000137,0.000177,0.000194,0.000173,0.000173,0.000185,0.000187,0.000238,0.000223,0.000177,0.000170,0.000386,0.000883,0.000586,0.000390,0.000741,0.000443,0.000195,0.000181,0.000175,0.000179,0.000198,0.000217,0.000195,0.000258,0.000678,0.000889,0.000541,0.000202,0.000205,0.000245,0.000252,0.000301,0.000300,0.000263,0.000297,0.000139,0.000193,0.000323,0.000325,0.000389,0.000289,0.000386,0.000341,0.000357,0.000321,0.000424,0.000400,0.000201,0.000162,0.000191,0.000140,0.000274,0.000310,0.000492,0.000372,0.000248,0.000356,0.000189,0.000499,0.000419,0.000142,0.000260,0.000900,0.000747,0.000584,0.000590,0.000793,0.000676,0.760469,0.816892,0.104301,0.433330,0.005296,0.197741,0.002485,0.355373,0.299579,0.616162,0.007720,0.017603,0.003158,0.009247,0.002318,0.007585,0.001725,0.052257,0.404960,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117
4,30260_304,3,30260,304,[burning and knawing],[197 216],burning-OR-gnawing-OR-burning-and-gnawing,"35 yo M with CC of ""my stomach is bothering me...",1,0,[197 216],0.077032,0.000436,0.000677,0.000853,0.000166,0.000193,0.000139,0.000205,0.000391,0.001086,0.000552,0.000553,0.000422,0.000132,0.000165,0.000182,0.000147,0.000254,0.000448,0.000322,0.000177,0.000226,0.000215,0.000173,0.000162,0.000173,0.000243,0.000279,0.000205,0.000156,0.000198,0.000321,0.000295,0.000231,0.000284,0.000294,0.000393,0.000208,0.000457,0.000906,0.000231,0.000163,0.000129,0.000488,0.000275,0.000274,0.000358,0.000388,0.000299,0.000134,0.000161,0.000288,0.007826,0.991339,0.974408,0.984535,0.983028,0.987873,0.000189,0.000165,0.000218,0.000198,0.000242,0.000148,0.000134,0.000108,0.000270,0.000175,0.000307,0.000243,0.000417,0.000662,0.000519,0.000166,0.000220,0.000273,0.000329,0.000370,0.000218,0.000146,0.000112,0.000181,0.000165,0.000171,0.000262,0.000508,0.000347,0.000655,0.000182,0.000159,0.000117,0.000179,0.000260,0.000289,0.000366,0.000161,0.000146,0.000109,0.000142,0.000220,0.000156,0.000151,0.000174,0.000168,0.000110,0.000138,0.000157,0.000182,0.000203,0.000230,0.000181,0.000112,0.000123,0.000124,0.000100,0.000156,0.000201,0.000131,0.000134,0.000159,0.000143,0.000112,0.000161,0.000124,0.000115,0.000203,0.000200,0.000080,0.000120,0.000076,0.000124,0.000143,0.000077,0.000199,0.000212,0.000133,0.000288,0.000337,0.000201,0.000117,0.000119,0.000211,0.000193,0.000299,0.000114,0.000136,0.000150,0.000142,0.000128,0.000127,0.000180,0.000180,0.000126,0.000120,0.000142,0.000157,0.000142,0.000151,0.000161,0.000155,0.000191,0.000180,0.000153,0.000146,0.000216,0.000390,0.000281,0.000175,0.000250,0.000189,0.000149,0.000148,0.000163,0.000167,0.000177,0.000190,0.000176,0.000226,0.000597,0.000702,0.000455,0.000184,0.000185,0.000211,0.000221,0.000262,0.000260,0.000243,0.000264,0.000119,0.000148,0.000243,0.000253,0.000310,0.000228,0.000302,0.000279,0.000286,0.000282,0.000339,0.000320,0.000161,0.000139,0.000161,0.000118,0.000211,0.000266,0.000442,0.000334,0.000197,0.000249,0.000150,0.000378,0.000330,0.000114,0.000150,0.000349,0.000341,0.000252,0.000154,0.000160,0.000898,0.922012,0.675951,0.946287,0.790967,0.925678,0.929606,0.929268,0.015607,0.717985,0.100048,0.899790,0.595082,0.858672,0.519502,0.909624,0.886469,0.926957,0.410507,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117,0.154117
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,39338_311,3,39338,311,[],[],No-blood-in-stool,"Chad Hamilton, a 35-year-old male, has come to...",0,4,[],0.419145,0.000561,0.000991,0.000946,0.000196,0.000233,0.000447,0.000646,0.000644,0.000791,0.000636,0.000957,0.000542,0.000216,0.000295,0.000325,0.000328,0.000746,0.000449,0.000455,0.000563,0.000171,0.000166,0.000288,0.000330,0.000171,0.000510,0.000340,0.000296,0.000215,0.000156,0.000302,0.000230,0.000368,0.000296,0.000456,0.000491,0.000402,0.000305,0.000195,0.001645,0.003034,0.001232,0.002047,0.000270,0.000873,0.001092,0.000405,0.001281,0.002770,0.003236,0.002601,0.004866,0.004136,0.000836,0.016669,0.041854,0.088825,0.113575,0.000242,0.000384,0.000407,0.000725,0.001002,0.000288,0.000364,0.000452,0.000388,0.000371,0.000223,0.000165,0.000361,0.000402,0.000644,0.000394,0.000413,0.000152,0.000221,0.000186,0.000237,0.000231,0.000199,0.000112,0.000224,0.000295,0.000229,0.000195,0.000223,0.000274,0.000293,0.000299,0.000706,0.000763,0.000148,0.000194,0.000165,0.000497,0.001925,0.002562,0.001223,0.000161,0.000228,0.000228,0.000392,0.000264,0.000260,0.000135,0.000276,0.000269,0.000176,0.000211,0.000221,0.000211,0.000126,0.000275,0.000116,0.000203,0.000126,0.000250,0.000246,0.000119,0.000198,0.000218,0.000135,0.000431,0.000343,0.000309,0.000278,0.000334,0.000261,0.000378,0.000337,0.000283,0.000501,0.000247,0.000226,0.000240,0.000235,0.000111,0.000144,0.000178,0.000224,0.000267,0.000412,0.000224,0.000216,0.000190,0.000250,0.000205,0.000741,0.000925,0.000276,0.000233,0.000349,0.000378,0.000162,0.000498,0.000266,0.000293,0.000257,0.000171,0.000454,0.000377,0.000288,0.000356,0.000466,0.000354,0.000308,0.000246,0.000230,0.000225,0.000281,0.000140,0.000397,0.000225,0.000261,0.000285,0.000155,0.000236,0.000176,0.000241,0.000217,0.000291,0.000238,0.000270,0.000242,0.000212,0.000231,0.000288,0.000140,0.000214,0.000183,0.000144,0.000207,0.000215,0.000172,0.000264,0.000345,0.000129,0.000241,0.000305,0.000256,0.000278,0.000474,0.000263,0.000513,0.000620,0.000436,0.000346,0.000290,0.000294,0.000132,0.000194,0.000282,0.000228,0.000145,0.000310,0.000318,0.000111,0.000226,0.000273,0.000254,0.000268,0.000178,0.000149,0.000228,0.000296,0.000127,0.000181,0.000184,0.000162,0.000217,0.000258,0.000266,0.000273,0.004043,0.246317,0.164540,0.252304,0.274067,0.225609,0.236770,0.287187,0.129279,0.636385,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873
1596,39338_312,3,39338,312,[],[],Intermittent,"Chad Hamilton, a 35-year-old male, has come to...",0,4,[],0.425850,0.000554,0.000981,0.000924,0.000198,0.000247,0.000509,0.000691,0.000693,0.000805,0.000711,0.001097,0.000534,0.000221,0.000319,0.000350,0.000336,0.000784,0.000448,0.000455,0.000494,0.000182,0.000181,0.000331,0.000376,0.000173,0.000503,0.000343,0.000303,0.000253,0.000215,0.000862,0.000418,0.001204,0.001756,0.009936,0.004342,0.003236,0.001526,0.000150,0.000262,0.000426,0.000350,0.000481,0.000169,0.000768,0.000794,0.000251,0.000960,0.001786,0.002259,0.001849,0.003229,0.002201,0.000250,0.000634,0.000941,0.001566,0.001543,0.000137,0.000684,0.000694,0.001141,0.001507,0.000327,0.000446,0.000592,0.000475,0.000448,0.000260,0.000175,0.000440,0.000459,0.000708,0.000432,0.000455,0.000163,0.000259,0.000212,0.000270,0.000255,0.000213,0.000114,0.000226,0.000315,0.000248,0.000219,0.000317,0.000349,0.000325,0.000246,0.000482,0.000514,0.000141,0.000170,0.000131,0.000195,0.000325,0.000328,0.000258,0.000154,0.000233,0.000227,0.000397,0.000293,0.000302,0.000136,0.000250,0.000249,0.000191,0.000235,0.000253,0.000243,0.000129,0.000278,0.000112,0.000191,0.000125,0.000247,0.000235,0.000121,0.000211,0.000229,0.000135,0.000412,0.000335,0.000304,0.000269,0.000317,0.000238,0.000286,0.000303,0.000270,0.000473,0.000242,0.000226,0.000242,0.000239,0.000115,0.000161,0.000200,0.000252,0.000284,0.000431,0.000229,0.000216,0.000199,0.000270,0.000222,0.000815,0.000972,0.000280,0.000236,0.000353,0.000381,0.000159,0.000438,0.000256,0.000289,0.000260,0.000169,0.000426,0.000362,0.000293,0.000355,0.000429,0.000328,0.000304,0.000245,0.000227,0.000219,0.000277,0.000134,0.000349,0.000217,0.000259,0.000307,0.000161,0.000264,0.000190,0.000253,0.000238,0.000302,0.000261,0.000323,0.000297,0.000248,0.000270,0.000340,0.000144,0.000228,0.000192,0.000154,0.000218,0.000234,0.000204,0.000322,0.000396,0.000121,0.000217,0.000290,0.000254,0.000276,0.000453,0.000245,0.000448,0.000564,0.000409,0.000341,0.000307,0.000325,0.000141,0.000213,0.000321,0.000261,0.000158,0.000362,0.000373,0.000120,0.000259,0.000320,0.000289,0.000303,0.000191,0.000159,0.000268,0.000355,0.000130,0.000190,0.000209,0.000188,0.000249,0.000290,0.000299,0.000322,0.004939,0.875463,0.923652,0.905030,0.922798,0.646821,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873
1597,39338_313,3,39338,313,[],[],Minimal-to-no-change-with-Tums,"Chad Hamilton, a 35-year-old male, has come to...",0,4,[],0.425739,0.000520,0.000830,0.000757,0.000191,0.000231,0.000451,0.000654,0.000623,0.000806,0.000660,0.001011,0.000580,0.000247,0.000397,0.000479,0.000373,0.000677,0.000467,0.000541,0.000861,0.000200,0.000190,0.000353,0.000393,0.000167,0.000490,0.000334,0.000289,0.000198,0.000153,0.000397,0.000277,0.000620,0.000380,0.000687,0.000605,0.000491,0.000362,0.000135,0.000324,0.000424,0.000315,0.000426,0.000160,0.000602,0.000651,0.000209,0.000720,0.001533,0.001825,0.001385,0.002380,0.001558,0.000394,0.003212,0.005416,0.005523,0.005070,0.000150,0.000428,0.000449,0.000765,0.001061,0.000282,0.000354,0.000468,0.000387,0.000373,0.000243,0.000190,0.000487,0.000479,0.000725,0.000444,0.000478,0.000167,0.000287,0.000233,0.000283,0.000261,0.000221,0.000109,0.000211,0.000285,0.000243,0.000216,0.000275,0.000362,0.000359,0.000347,0.000750,0.000721,0.000145,0.000172,0.000128,0.000194,0.000313,0.000322,0.000248,0.000147,0.000215,0.000220,0.000354,0.000264,0.000253,0.000134,0.000261,0.000263,0.000202,0.000263,0.000288,0.000258,0.000126,0.000268,0.000110,0.000182,0.000121,0.000242,0.000237,0.000119,0.000210,0.000231,0.000134,0.000423,0.000342,0.000309,0.000291,0.000389,0.000360,0.001897,0.000456,0.000331,0.000551,0.000257,0.000230,0.000247,0.000230,0.000114,0.000152,0.000193,0.000243,0.000280,0.000432,0.000233,0.000219,0.000203,0.000273,0.000253,0.001027,0.001091,0.000315,0.000235,0.000313,0.000330,0.000160,0.000513,0.000259,0.000283,0.000231,0.000155,0.000413,0.000380,0.000272,0.000323,0.000385,0.000295,0.000288,0.000245,0.000227,0.000218,0.000268,0.000142,0.000423,0.000227,0.000259,0.000278,0.000149,0.000226,0.000178,0.000244,0.000219,0.000288,0.000242,0.000285,0.000256,0.000222,0.000238,0.000300,0.000145,0.000254,0.000223,0.000150,0.000204,0.000211,0.000201,0.000320,0.000383,0.000126,0.000233,0.000306,0.000273,0.000278,0.000441,0.000243,0.000486,0.000614,0.000432,0.000344,0.000289,0.000294,0.000133,0.000204,0.000290,0.000233,0.000154,0.000363,0.000367,0.000117,0.000247,0.000299,0.000289,0.000372,0.000211,0.000176,0.000304,0.000389,0.000139,0.000208,0.000225,0.000198,0.000269,0.000323,0.000298,0.000304,0.007714,0.577516,0.586567,0.367304,0.607102,0.461934,0.576680,0.339069,0.724900,0.304203,0.503555,0.360263,0.816871,0.743994,0.639187,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873
1598,39338_314,3,39338,314,[nausea],[360 366],Nausea,"Chad Hamilton, a 35-year-old male, has come to...",1,4,[360 366],0.409272,0.000483,0.000837,0.000759,0.000180,0.000232,0.000502,0.000695,0.000699,0.000798,0.000705,0.001044,0.000509,0.000206,0.000292,0.000328,0.000321,0.000731,0.000441,0.000429,0.000465,0.000168,0.000171,0.000326,0.000340,0.000167,0.000494,0.000327,0.000285,0.000203,0.000150,0.000376,0.000254,0.000518,0.000330,0.000519,0.000534,0.000431,0.000312,0.000124,0.000238,0.000409,0.000338,0.000436,0.000161,0.000600,0.000633,0.000215,0.000879,0.001660,0.002170,0.001839,0.003287,0.002158,0.000237,0.000567,0.000835,0.001486,0.001435,0.000128,0.000375,0.000452,0.000833,0.001161,0.000287,0.000385,0.000504,0.000416,0.000395,0.000237,0.000169,0.000438,0.000471,0.000711,0.000443,0.000461,0.000163,0.000257,0.000213,0.000279,0.000265,0.000227,0.000169,0.002718,0.978476,0.001637,0.000856,0.000604,0.000637,0.000611,0.000416,0.000769,0.000877,0.000173,0.000200,0.000143,0.000215,0.000369,0.000365,0.000277,0.000169,0.000270,0.000351,0.001155,0.000436,0.000374,0.000140,0.000260,0.000260,0.000207,0.000251,0.000266,0.000252,0.000128,0.000284,0.000112,0.000190,0.000124,0.000255,0.000240,0.000123,0.000216,0.000236,0.000138,0.000437,0.000340,0.000310,0.000280,0.000336,0.000253,0.000334,0.000317,0.000277,0.000484,0.000243,0.000229,0.000245,0.000240,0.000114,0.000157,0.000195,0.000247,0.000285,0.000435,0.000229,0.000218,0.000198,0.000272,0.000221,0.000811,0.000968,0.000281,0.000236,0.000382,0.000420,0.000160,0.000454,0.000260,0.000292,0.000271,0.000172,0.000453,0.000383,0.000331,0.000416,0.000466,0.000358,0.000323,0.000248,0.000230,0.000225,0.000284,0.000136,0.000358,0.000220,0.000260,0.000316,0.000162,0.000269,0.000191,0.000257,0.000239,0.000314,0.000270,0.000338,0.000314,0.000242,0.000259,0.000327,0.000142,0.000224,0.000188,0.000152,0.000220,0.000235,0.000199,0.000319,0.000392,0.000122,0.000215,0.000287,0.000254,0.000273,0.000460,0.000244,0.000426,0.000554,0.000409,0.000339,0.000299,0.000319,0.000138,0.000213,0.000327,0.000267,0.000156,0.000346,0.000362,0.000118,0.000247,0.000291,0.000264,0.000284,0.000185,0.000156,0.000252,0.000334,0.000131,0.000192,0.000205,0.000192,0.000262,0.000313,0.000301,0.000336,0.003332,0.747503,0.855924,0.750060,0.636294,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873,0.128873


# Push to LINE

In [None]:
import requests

def send_line_notification(message):
    import json
    f = open("../../line.json", "r")
    json_data = json.load(f)
    line_token = json_data["kagglePush"]
    endpoint = 'https://notify-api.line.me/api/notify'
    message = "\n{}".format(message)
    payload = {'message': message}
    headers = {'Authorization': 'Bearer {}'.format(line_token)}
    requests.post(endpoint, data=payload, headers=headers)

if CFG.wandb:
    send_line_notification(f"Training of {CFG.wandbgroup} has been done. See {run.url}")
else:
    send_line_notification(f"Training of {CFG.wandbgroup} has been done.")