# Directory settings

In [None]:
# ====================================================
# Directory settings
# ====================================================

import sys
if "google.colab" in sys.modules:
    from google.colab import drive
    drive.mount("/content/drive")
    base = "/content/drive/MyDrive/colab_notebooks/kaggle/nbme-score-clinical-patient-notes/notebooks"
    %cd {base}


import os
if 'kaggle_web_client' in sys.modules:
    OUTPUT_DIR = './'
else:
    OUTPUT_DIR = './nb001t-token-classifier/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

Mounted at /content/drive
/content/drive/MyDrive/colab_notebooks/kaggle/nbme-score-clinical-patient-notes/notebooks


# CFG

In [None]:
# ====================================================
# CFG
# ====================================================
class CFG:
    wandb=True
    wandbgroup="nb001t-token-classifier"
    wandbname="case-num-1"
    cv_case_num=False
    competition='NBME'
    _wandb_kernel='riow1983'
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model="microsoft/deberta-base"
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=5
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=12
    fc_dropout=0.2
    max_len=512
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]
    train=True
    
if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0]
    CFG.wandbname = "debug-" + CFG.wandbname

In [None]:
# ====================================================
# wandb
# ====================================================
if CFG.wandb:
    if 'google.colab' in sys.modules:
        !pip install wandb
    import wandb

    try:
        if 'kaggle_web_client' in sys.modules:
            from kaggle_secrets import UserSecretsClient
            user_secrets = UserSecretsClient()
            secret_value_0 = user_secrets.get_secret("wandb_api")
        else:
            import json
            f = open("../../wandb.json", "r")
            json_data = json.load(f)
            secret_value_0 = json_data["wandb_api"]
        wandb.login(key=secret_value_0)
        anony = None
    except:
        anony = "must"
        print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')


    def class2dict(f):
        return dict((name, getattr(f, name)) for name in dir(f) if not name.startswith('__'))

    run = wandb.init(dir=OUTPUT_DIR,
                     project='NBME-Public', 
                     name=CFG.wandbname,
                     config=class2dict(CFG),
                     group=CFG.wandbgroup,
                     job_type="train",
                     anonymous=anony)
    print(f"wandb run id: {run.id}")

Collecting wandb
  Downloading wandb-0.12.10-py2.py3-none-any.whl (1.7 MB)
[?25l[K     |▏                               | 10 kB 39.6 MB/s eta 0:00:01[K     |▍                               | 20 kB 33.9 MB/s eta 0:00:01[K     |▋                               | 30 kB 20.5 MB/s eta 0:00:01[K     |▊                               | 40 kB 17.1 MB/s eta 0:00:01[K     |█                               | 51 kB 12.8 MB/s eta 0:00:01[K     |█▏                              | 61 kB 15.0 MB/s eta 0:00:01[K     |█▍                              | 71 kB 15.4 MB/s eta 0:00:01[K     |█▌                              | 81 kB 15.4 MB/s eta 0:00:01[K     |█▊                              | 92 kB 17.0 MB/s eta 0:00:01[K     |██                              | 102 kB 14.2 MB/s eta 0:00:01[K     |██                              | 112 kB 14.2 MB/s eta 0:00:01[K     |██▎                             | 122 kB 14.2 MB/s eta 0:00:01[K     |██▌                             | 133 kB 14.2 MB/s eta

[34m[1mwandb[0m: W&B API key is configured (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mriow1983[0m (use `wandb login --relogin` to force relogin)


wandb run id: hmqsb83x


# Library

In [None]:
# ====================================================
# Library
# ====================================================
import os
import gc
import re
import ast
import sys
import copy
import json
import time
import math
import string
import pickle
import random
import joblib
import itertools
import warnings
warnings.filterwarnings("ignore")

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset

os.system('pip uninstall -y transformers')
os.system('python -m pip install --no-index --find-links=../input/nbme-pip-wheels transformers')
import tokenizers
import transformers
print(f"tokenizers.__version__: {tokenizers.__version__}")
print(f"transformers.__version__: {transformers.__version__}")
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
%env TOKENIZERS_PARALLELISM=true

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizers.__version__: 0.11.0
transformers.__version__: 4.16.2
env: TOKENIZERS_PARALLELISM=true


# Helper functions for scoring

In [None]:
# From https://www.kaggle.com/theoviel/evaluation-metric-folds-baseline

def micro_f1(preds, truths):
    """
    Micro f1 on binary arrays.

    Args:
        preds (list of lists of ints): Predictions.
        truths (list of lists of ints): Ground truths.

    Returns:
        float: f1 score.
    """
    # Micro : aggregating over all instances
    preds = np.concatenate(preds)
    truths = np.concatenate(truths)
    return f1_score(truths, preds)


def spans_to_binary(spans, length=None):
    """
    Converts spans to a binary array indicating whether each character is in the span.

    Args:
        spans (list of lists of two ints): Spans.

    Returns:
        np array [length]: Binarized spans.
    """
    length = np.max(spans) if length is None else length
    binary = np.zeros(length)
    for start, end in spans:
        binary[start:end] = 1
    return binary


def span_micro_f1(preds, truths):
    """
    Micro f1 on spans.

    Args:
        preds (list of lists of two ints): Prediction spans.
        truths (list of lists of two ints): Ground truth spans.

    Returns:
        float: f1 score.
    """
    bin_preds = []
    bin_truths = []
    for pred, truth in zip(preds, truths):
        if not len(pred) and not len(truth):
            continue
        length = max(np.max(pred) if len(pred) else 0, np.max(truth) if len(truth) else 0)
        bin_preds.append(spans_to_binary(pred, length))
        bin_truths.append(spans_to_binary(truth, length))
    return micro_f1(bin_preds, bin_truths)

In [None]:
def create_labels_for_scoring(df):
    # example: ['0 1', '3 4'] -> ['0 1; 3 4']
    df['location_for_create_labels'] = [ast.literal_eval(f'[]')] * len(df)
    for i in range(len(df)):
        lst = df.loc[i, 'location']
        if lst:
            new_lst = ';'.join(lst)
            df.loc[i, 'location_for_create_labels'] = ast.literal_eval(f'[["{new_lst}"]]')
    # create labels
    truths = []
    for location_list in df['location_for_create_labels'].values:
        truth = []
        if len(location_list) > 0:
            location = location_list[0]
            for loc in [s.split() for s in location.split(';')]:
                start, end = int(loc[0]), int(loc[1])
                truth.append([start, end])
        truths.append(truth)
    return truths


def get_char_probs(texts, predictions, tokenizer):
    results = [np.zeros(len(t)) for t in texts]
    for i, (text, prediction) in enumerate(zip(texts, predictions)):
        encoded = tokenizer(text, 
                            add_special_tokens=True,
                            return_offsets_mapping=True)
        for idx, (offset_mapping, pred) in enumerate(zip(encoded['offset_mapping'], prediction)):
            start = offset_mapping[0]
            end = offset_mapping[1]
            results[i][start:end] = pred
    return results


def get_results(char_probs, th=0.5):
    results = []
    for char_prob in char_probs:
        result = np.where(char_prob >= th)[0] + 1
        result = [list(g) for _, g in itertools.groupby(result, key=lambda n, c=itertools.count(): n - next(c))]
        result = [f"{min(r)} {max(r)}" for r in result]
        result = ";".join(result)
        results.append(result)
    return results


def get_predictions(results):
    predictions = []
    for result in results:
        prediction = []
        if result != "":
            for loc in [s.split() for s in result.split(';')]:
                start, end = int(loc[0]), int(loc[1])
                prediction.append([start, end])
        predictions.append(prediction)
    return predictions

# Utils

In [None]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = span_micro_f1(y_true, y_pred)
    return score


def get_logger(filename=OUTPUT_DIR+'train'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

# Data Loading

In [None]:
# ====================================================
# Data Loading
# ====================================================
train = pd.read_csv('../input/nbme-score-clinical-patient-notes/train.csv')
train['annotation'] = train['annotation'].apply(ast.literal_eval)
train['location'] = train['location'].apply(ast.literal_eval)
features = pd.read_csv('../input/nbme-score-clinical-patient-notes/features.csv')
def preprocess_features(features):
    features.loc[27, 'feature_text'] = "Last-Pap-smear-1-year-ago"
    return features
features = preprocess_features(features)
patient_notes = pd.read_csv('../input/nbme-score-clinical-patient-notes/patient_notes.csv')

print(f"train.shape: {train.shape}")
display(train.head())
print(f"features.shape: {features.shape}")
display(features.head())
print(f"patient_notes.shape: {patient_notes.shape}")
display(patient_notes.head())

train.shape: (14300, 6)


Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location
0,00016_000,0,16,0,[dad with recent heart attcak],[696 724]
1,00016_001,0,16,1,"[mom with ""thyroid disease]",[668 693]
2,00016_002,0,16,2,[chest pressure],[203 217]
3,00016_003,0,16,3,"[intermittent episodes, episode]","[70 91, 176 183]"
4,00016_004,0,16,4,[felt as if he were going to pass out],[222 258]


features.shape: (143, 3)


Unnamed: 0,feature_num,case_num,feature_text
0,0,0,Family-history-of-MI-OR-Family-history-of-myoc...
1,1,0,Family-history-of-thyroid-disorder
2,2,0,Chest-pressure
3,3,0,Intermittent-symptoms
4,4,0,Lightheaded


patient_notes.shape: (42146, 3)


Unnamed: 0,pn_num,case_num,pn_history
0,0,0,"17-year-old male, has come to the student heal..."
1,1,0,17 yo male with recurrent palpitations for the...
2,2,0,Dillon Cleveland is a 17 y.o. male patient wit...
3,3,0,a 17 yo m c/o palpitation started 3 mos ago; \...
4,4,0,17yo male with no pmh here for evaluation of p...


In [None]:
train = train.merge(features, on=['feature_num', 'case_num'], how='left')
train = train.merge(patient_notes, on=['pn_num', 'case_num'], how='left')
display(train.head())

Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location,feature_text,pn_history
0,00016_000,0,16,0,[dad with recent heart attcak],[696 724],Family-history-of-MI-OR-Family-history-of-myoc...,HPI: 17yo M presents with palpitations. Patien...
1,00016_001,0,16,1,"[mom with ""thyroid disease]",[668 693],Family-history-of-thyroid-disorder,HPI: 17yo M presents with palpitations. Patien...
2,00016_002,0,16,2,[chest pressure],[203 217],Chest-pressure,HPI: 17yo M presents with palpitations. Patien...
3,00016_003,0,16,3,"[intermittent episodes, episode]","[70 91, 176 183]",Intermittent-symptoms,HPI: 17yo M presents with palpitations. Patien...
4,00016_004,0,16,4,[felt as if he were going to pass out],[222 258],Lightheaded,HPI: 17yo M presents with palpitations. Patien...


In [None]:
# incorrect annotation
train.loc[338, 'annotation'] = ast.literal_eval('[["father heart attack"]]')
train.loc[338, 'location'] = ast.literal_eval('[["764 783"]]')

train.loc[621, 'annotation'] = ast.literal_eval('[["for the last 2-3 months"]]')
train.loc[621, 'location'] = ast.literal_eval('[["77 100"]]')

train.loc[655, 'annotation'] = ast.literal_eval('[["no heat intolerance"], ["no cold intolerance"]]')
train.loc[655, 'location'] = ast.literal_eval('[["285 292;301 312"], ["285 287;296 312"]]')

train.loc[1262, 'annotation'] = ast.literal_eval('[["mother thyroid problem"]]')
train.loc[1262, 'location'] = ast.literal_eval('[["551 557;565 580"]]')

train.loc[1265, 'annotation'] = ast.literal_eval('[[\'felt like he was going to "pass out"\']]')
train.loc[1265, 'location'] = ast.literal_eval('[["131 135;181 212"]]')

train.loc[1396, 'annotation'] = ast.literal_eval('[["stool , with no blood"]]')
train.loc[1396, 'location'] = ast.literal_eval('[["259 280"]]')

train.loc[1591, 'annotation'] = ast.literal_eval('[["diarrhoe non blooody"]]')
train.loc[1591, 'location'] = ast.literal_eval('[["176 184;201 212"]]')

train.loc[1615, 'annotation'] = ast.literal_eval('[["diarrhea for last 2-3 days"]]')
train.loc[1615, 'location'] = ast.literal_eval('[["249 257;271 288"]]')

train.loc[1664, 'annotation'] = ast.literal_eval('[["no vaginal discharge"]]')
train.loc[1664, 'location'] = ast.literal_eval('[["822 824;907 924"]]')

train.loc[1714, 'annotation'] = ast.literal_eval('[["started about 8-10 hours ago"]]')
train.loc[1714, 'location'] = ast.literal_eval('[["101 129"]]')

train.loc[1929, 'annotation'] = ast.literal_eval('[["no blood in the stool"]]')
train.loc[1929, 'location'] = ast.literal_eval('[["531 539;549 561"]]')

train.loc[2134, 'annotation'] = ast.literal_eval('[["last sexually active 9 months ago"]]')
train.loc[2134, 'location'] = ast.literal_eval('[["540 560;581 593"]]')

train.loc[2191, 'annotation'] = ast.literal_eval('[["right lower quadrant pain"]]')
train.loc[2191, 'location'] = ast.literal_eval('[["32 57"]]')

train.loc[2553, 'annotation'] = ast.literal_eval('[["diarrhoea no blood"]]')
train.loc[2553, 'location'] = ast.literal_eval('[["308 317;376 384"]]')

train.loc[3124, 'annotation'] = ast.literal_eval('[["sweating"]]')
train.loc[3124, 'location'] = ast.literal_eval('[["549 557"]]')

train.loc[3858, 'annotation'] = ast.literal_eval('[["previously as regular"], ["previously eveyr 28-29 days"], ["previously lasting 5 days"], ["previously regular flow"]]')
train.loc[3858, 'location'] = ast.literal_eval('[["102 123"], ["102 112;125 141"], ["102 112;143 157"], ["102 112;159 171"]]')

train.loc[4373, 'annotation'] = ast.literal_eval('[["for 2 months"]]')
train.loc[4373, 'location'] = ast.literal_eval('[["33 45"]]')

train.loc[4763, 'annotation'] = ast.literal_eval('[["35 year old"]]')
train.loc[4763, 'location'] = ast.literal_eval('[["5 16"]]')

train.loc[4782, 'annotation'] = ast.literal_eval('[["darker brown stools"]]')
train.loc[4782, 'location'] = ast.literal_eval('[["175 194"]]')

train.loc[4908, 'annotation'] = ast.literal_eval('[["uncle with peptic ulcer"]]')
train.loc[4908, 'location'] = ast.literal_eval('[["700 723"]]')

train.loc[6016, 'annotation'] = ast.literal_eval('[["difficulty falling asleep"]]')
train.loc[6016, 'location'] = ast.literal_eval('[["225 250"]]')

train.loc[6192, 'annotation'] = ast.literal_eval('[["helps to take care of aging mother and in-laws"]]')
train.loc[6192, 'location'] = ast.literal_eval('[["197 218;236 260"]]')

train.loc[6380, 'annotation'] = ast.literal_eval('[["No hair changes"], ["No skin changes"], ["No GI changes"], ["No palpitations"], ["No excessive sweating"]]')
train.loc[6380, 'location'] = ast.literal_eval('[["480 482;507 519"], ["480 482;499 503;512 519"], ["480 482;521 531"], ["480 482;533 545"], ["480 482;564 582"]]')

train.loc[6562, 'annotation'] = ast.literal_eval('[["stressed due to taking care of her mother"], ["stressed due to taking care of husbands parents"]]')
train.loc[6562, 'location'] = ast.literal_eval('[["290 320;327 337"], ["290 320;342 358"]]')

train.loc[6862, 'annotation'] = ast.literal_eval('[["stressor taking care of many sick family members"]]')
train.loc[6862, 'location'] = ast.literal_eval('[["288 296;324 363"]]')

train.loc[7022, 'annotation'] = ast.literal_eval('[["heart started racing and felt numbness for the 1st time in her finger tips"]]')
train.loc[7022, 'location'] = ast.literal_eval('[["108 182"]]')

train.loc[7422, 'annotation'] = ast.literal_eval('[["first started 5 yrs"]]')
train.loc[7422, 'location'] = ast.literal_eval('[["102 121"]]')

train.loc[8876, 'annotation'] = ast.literal_eval('[["No shortness of breath"]]')
train.loc[8876, 'location'] = ast.literal_eval('[["481 483;533 552"]]')

train.loc[9027, 'annotation'] = ast.literal_eval('[["recent URI"], ["nasal stuffines, rhinorrhea, for 3-4 days"]]')
train.loc[9027, 'location'] = ast.literal_eval('[["92 102"], ["123 164"]]')

train.loc[9938, 'annotation'] = ast.literal_eval('[["irregularity with her cycles"], ["heavier bleeding"], ["changes her pad every couple hours"]]')
train.loc[9938, 'location'] = ast.literal_eval('[["89 117"], ["122 138"], ["368 402"]]')

train.loc[9973, 'annotation'] = ast.literal_eval('[["gaining 10-15 lbs"]]')
train.loc[9973, 'location'] = ast.literal_eval('[["344 361"]]')

train.loc[10513, 'annotation'] = ast.literal_eval('[["weight gain"], ["gain of 10-16lbs"]]')
train.loc[10513, 'location'] = ast.literal_eval('[["600 611"], ["607 623"]]')

train.loc[11551, 'annotation'] = ast.literal_eval('[["seeing her son knows are not real"]]')
train.loc[11551, 'location'] = ast.literal_eval('[["386 400;443 461"]]')

train.loc[11677, 'annotation'] = ast.literal_eval('[["saw him once in the kitchen after he died"]]')
train.loc[11677, 'location'] = ast.literal_eval('[["160 201"]]')

train.loc[12124, 'annotation'] = ast.literal_eval('[["tried Ambien but it didnt work"]]')
train.loc[12124, 'location'] = ast.literal_eval('[["325 337;349 366"]]')

train.loc[12279, 'annotation'] = ast.literal_eval('[["heard what she described as a party later than evening these things did not actually happen"]]')
train.loc[12279, 'location'] = ast.literal_eval('[["405 459;488 524"]]')

train.loc[12289, 'annotation'] = ast.literal_eval('[["experienced seeing her son at the kitchen table these things did not actually happen"]]')
train.loc[12289, 'location'] = ast.literal_eval('[["353 400;488 524"]]')

train.loc[13238, 'annotation'] = ast.literal_eval('[["SCRACHY THROAT"], ["RUNNY NOSE"]]')
train.loc[13238, 'location'] = ast.literal_eval('[["293 307"], ["321 331"]]')

train.loc[13297, 'annotation'] = ast.literal_eval('[["without improvement when taking tylenol"], ["without improvement when taking ibuprofen"]]')
train.loc[13297, 'location'] = ast.literal_eval('[["182 221"], ["182 213;225 234"]]')

train.loc[13299, 'annotation'] = ast.literal_eval('[["yesterday"], ["yesterday"]]')
train.loc[13299, 'location'] = ast.literal_eval('[["79 88"], ["409 418"]]')

train.loc[13845, 'annotation'] = ast.literal_eval('[["headache global"], ["headache throughout her head"]]')
train.loc[13845, 'location'] = ast.literal_eval('[["86 94;230 236"], ["86 94;237 256"]]')

train.loc[14083, 'annotation'] = ast.literal_eval('[["headache generalized in her head"]]')
train.loc[14083, 'location'] = ast.literal_eval('[["56 64;156 179"]]')

In [None]:
train['annotation_length'] = train['annotation'].apply(len)
display(train['annotation_length'].value_counts())

1    8185
0    4399
2    1292
3     287
4      99
5      27
6       9
7       1
8       1
Name: annotation_length, dtype: int64

## Merge patient_notes w/ features

In [None]:
print(patient_notes.shape)
patient_notes = patient_notes.merge(features, on=['case_num'], how='left')
print(patient_notes.shape)
display(patient_notes.head())

(42146, 3)
(626902, 5)


Unnamed: 0,pn_num,case_num,pn_history,feature_num,feature_text
0,0,0,"17-year-old male, has come to the student heal...",0,Family-history-of-MI-OR-Family-history-of-myoc...
1,0,0,"17-year-old male, has come to the student heal...",1,Family-history-of-thyroid-disorder
2,0,0,"17-year-old male, has come to the student heal...",2,Chest-pressure
3,0,0,"17-year-old male, has come to the student heal...",3,Intermittent-symptoms
4,0,0,"17-year-old male, has come to the student heal...",4,Lightheaded


## ~~Remove pn_nums which are appeared in train from patient_notes~~

In [None]:
# print(patient_notes.shape)
# patient_notes = patient_notes[~patient_notes["pn_num"].isin(train["pn_num"].unique())].reset_index(drop=True)
# print(patient_notes.shape)

## Select one specific case_num

In [None]:
if CFG.wandbname.split("-")[-1] != "all":
    selected_case_num = int(CFG.wandbname.split("-")[-1])
    print(f"selected_case_num: {selected_case_num}")

    print(train.shape)
    train = train[train["case_num"]==selected_case_num].reset_index(drop=True)
    print(train.shape)

    print()

    print(patient_notes.shape)
    patient_notes = patient_notes[patient_notes["case_num"]==selected_case_num].reset_index(drop=True)
    print(patient_notes.shape)

    print()

    print(features.shape)
    features = features[features["case_num"]==selected_case_num].reset_index(drop=True)
    print(features.shape)

selected_case_num: 1
(14300, 9)
(1300, 9)

(626902, 5)
(10504, 5)

(143, 3)
(13, 3)


# CV split

In [None]:
# ====================================================
# CV split
# ====================================================
Fold = GroupKFold(n_splits=CFG.n_fold)
groups = train['pn_num'].values
for n, (train_index, val_index) in enumerate(Fold.split(train, train['location'], groups)):
    train.loc[val_index, 'fold'] = int(n)
train['fold'] = train['fold'].astype(int)
display(train.groupby('fold').size())

fold
0    260
1    260
2    260
3    260
4    260
dtype: int64

In [None]:
len(train)

1300

In [None]:
if CFG.debug:
    display(train.groupby('fold').size())
    if len(train) > 2000:
        train = train.sample(n=2000, random_state=0).reset_index(drop=True)
        display(train.groupby('fold').size())

# tokenizer

In [None]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

Downloading:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/474 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

# Dataset

In [None]:
# ====================================================
# Define max_len
# ====================================================
for text_col in ['pn_history']:
    pn_history_lengths = []
    tk0 = tqdm(patient_notes[text_col].fillna("").values, total=len(patient_notes))
    for text in tk0:
        length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
        pn_history_lengths.append(length)
    LOGGER.info(f'{text_col} max(lengths): {max(pn_history_lengths)}')

for text_col in ['feature_text']:
    features_lengths = []
    tk0 = tqdm(features[text_col].fillna("").values, total=len(features))
    for text in tk0:
        length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
        features_lengths.append(length)
    LOGGER.info(f'{text_col} max(lengths): {max(features_lengths)}')

CFG.max_len = max(pn_history_lengths) + max(features_lengths) + 3 # cls & sep & sep
LOGGER.info(f"max_len: {CFG.max_len}")

  0%|          | 0/10504 [00:00<?, ?it/s]

pn_history max(lengths): 382


  0%|          | 0/13 [00:00<?, ?it/s]

feature_text max(lengths): 30
max_len: 415


In [None]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text, feature_text):
    inputs = cfg.tokenizer(text, feature_text, 
                           add_special_tokens=True,
                           max_length=CFG.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


def create_label(cfg, text, annotation_length, location_list):
    encoded = cfg.tokenizer(text,
                            add_special_tokens=True,
                            max_length=CFG.max_len,
                            padding="max_length",
                            return_offsets_mapping=True)
    offset_mapping = encoded['offset_mapping']
    ignore_idxes = np.where(np.array(encoded.sequence_ids()) != 0)[0]
    label = np.zeros(len(offset_mapping))
    label[ignore_idxes] = -1
    if annotation_length != 0:
        for location in location_list:
            for loc in [s.split() for s in location.split(';')]:
                start_idx = -1
                end_idx = -1
                start, end = int(loc[0]), int(loc[1])
                for idx in range(len(offset_mapping)):
                    if (start_idx == -1) & (start < offset_mapping[idx][0]):
                        start_idx = idx - 1
                    if (end_idx == -1) & (end <= offset_mapping[idx][1]):
                        end_idx = idx + 1
                if start_idx == -1:
                    start_idx = end_idx
                if (start_idx != -1) & (end_idx != -1):
                    label[start_idx:end_idx] = 1
    return torch.tensor(label, dtype=torch.float)


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.feature_texts = df['feature_text'].values
        self.pn_historys = df['pn_history'].values
        self.annotation_lengths = df['annotation_length'].values
        self.locations = df['location'].values

    def __len__(self):
        return len(self.feature_texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, 
                               self.pn_historys[item], 
                               self.feature_texts[item])
        label = create_label(self.cfg, 
                             self.pn_historys[item], 
                             self.annotation_lengths[item], 
                             self.locations[item])
        return inputs, label

# Model

In [None]:
# ====================================================
# Model
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, 1)
        self._init_weights(self.fc)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        return last_hidden_states

    def forward(self, inputs):
        feature = self.feature(inputs)
        output = self.fc(self.fc_dropout(feature))
        return output

# Helper functions

In [None]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        loss = torch.masked_select(loss, labels.view(-1, 1) != -1).mean()
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))
        if CFG.wandb:
            wandb.log({f"[fold{fold}] loss": losses.val,
                       f"[fold{fold}] lr": scheduler.get_lr()[0]})
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        loss = torch.masked_select(loss, labels.view(-1, 1) != -1).mean()
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [None]:
# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['fold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['fold'] == fold].reset_index(drop=True)
    valid_texts = valid_folds['pn_history'].values
    valid_labels = create_labels_for_scoring(valid_folds)
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': weight_decay},
            {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': 0.0},
            {'params': [p for n, p in model.named_parameters() if "model" not in n],
             'lr': decoder_lr, 'weight_decay': 0.0}
        ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler=='linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler=='cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss(reduction="none")
    
    best_score = 0.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        predictions = predictions.reshape((len(valid_folds), CFG.max_len))
        
        # scoring
        char_probs = get_char_probs(valid_texts, predictions, CFG.tokenizer)
        results = get_results(char_probs, th=0.5)
        preds = get_predictions(results)
        score = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')
        if CFG.wandb:
            wandb.log({f"[fold{fold}] epoch": epoch+1, 
                       f"[fold{fold}] avg_train_loss": avg_loss, 
                       f"[fold{fold}] avg_val_loss": avg_val_loss,
                       f"[fold{fold}] score": score})
        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds[[i for i in range(CFG.max_len)]] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [None]:
if __name__ == '__main__':
    
    def get_result(oof_df, case_num=None):
        if case_num is not None:
            oof_df = oof_df[oof_df["case_num"]==case_num].reset_index(drop=True)
        labels = create_labels_for_scoring(oof_df)
        predictions = oof_df[[i for i in range(CFG.max_len)]].values
        char_probs = get_char_probs(oof_df['pn_history'].values, predictions, CFG.tokenizer)
        results = get_results(char_probs, th=0.5)
        preds = get_predictions(results)
        score = get_score(labels, preds)
        if case_num is not None:
            LOGGER.info(f'Score of case_num {case_num}: {score:<.4f}')
        else:
            LOGGER.info(f'Score: {score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        if CFG.cv_case_num:
            for i in range(10):
                get_result(oof_df, i)
        else:
            get_result(oof_df)
        oof_df.to_pickle(OUTPUT_DIR+'oof_df.pkl')
        
    if CFG.wandb:
        wandb.finish()



Downloading:   0%|          | 0.00/533M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/86] Elapsed 0m 0s (remain 1m 22s) Loss: 0.5984(0.5984) Grad: inf  LR: 0.00002000  
Epoch: [1][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0079(0.0756) Grad: 2303.8201  LR: 0.00001812  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 11s) Loss: 0.0142(0.0142) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0133(0.0181) 


Epoch 1 - avg_train_loss: 0.0756  avg_val_loss: 0.0181  time: 31s
Epoch 1 - Score: 0.7880
Epoch 1 - Save Best Score: 0.7880 Model


Epoch: [2][0/86] Elapsed 0m 0s (remain 0m 46s) Loss: 0.0099(0.0099) Grad: 12487.3574  LR: 0.00001807  
Epoch: [2][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0096(0.0158) Grad: 17386.6953  LR: 0.00001317  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0119(0.0119) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0165(0.0164) 


Epoch 2 - avg_train_loss: 0.0158  avg_val_loss: 0.0164  time: 30s
Epoch 2 - Score: 0.8037
Epoch 2 - Save Best Score: 0.8037 Model


Epoch: [3][0/86] Elapsed 0m 0s (remain 0m 45s) Loss: 0.0359(0.0359) Grad: 57845.2852  LR: 0.00001310  
Epoch: [3][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0074(0.0134) Grad: 18701.3516  LR: 0.00000703  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0137(0.0137) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0170(0.0160) 


Epoch 3 - avg_train_loss: 0.0134  avg_val_loss: 0.0160  time: 30s
Epoch 3 - Score: 0.8098
Epoch 3 - Save Best Score: 0.8098 Model


Epoch: [4][0/86] Elapsed 0m 0s (remain 1m 0s) Loss: 0.0081(0.0081) Grad: 77115.4766  LR: 0.00000697  
Epoch: [4][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0108(0.0118) Grad: 20060.7227  LR: 0.00000201  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0112(0.0112) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0186(0.0158) 


Epoch 4 - avg_train_loss: 0.0118  avg_val_loss: 0.0158  time: 30s
Epoch 4 - Score: 0.8088


Epoch: [5][0/86] Elapsed 0m 0s (remain 0m 44s) Loss: 0.0055(0.0055) Grad: 10285.8350  LR: 0.00000197  
Epoch: [5][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0086(0.0106) Grad: 33868.2383  LR: 0.00000000  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0109(0.0109) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0190(0.0158) 


Epoch 5 - avg_train_loss: 0.0106  avg_val_loss: 0.0158  time: 30s
Epoch 5 - Score: 0.8109
Epoch 5 - Save Best Score: 0.8109 Model
Score: 0.8109
Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/86] Elapsed 0m 0s (remain 0m 44s) Loss: 0.9286(0.9286) Grad: inf  LR: 0.00002000  
Epoch: [1][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0256(0.1056) Grad: 2626.9197  LR: 0.00001812  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0133(0.0133) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0256(0.0172) 


Epoch 1 - avg_train_loss: 0.1056  avg_val_loss: 0.0172  time: 30s
Epoch 1 - Score: 0.8300
Epoch 1 - Save Best Score: 0.8300 Model


Epoch: [2][0/86] Elapsed 0m 0s (remain 0m 44s) Loss: 0.0079(0.0079) Grad: 14543.4355  LR: 0.00001807  
Epoch: [2][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0282(0.0164) Grad: 47169.4570  LR: 0.00001317  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0123(0.0123) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0274(0.0145) 


Epoch 2 - avg_train_loss: 0.0164  avg_val_loss: 0.0145  time: 30s
Epoch 2 - Score: 0.8583
Epoch 2 - Save Best Score: 0.8583 Model


Epoch: [3][0/86] Elapsed 0m 0s (remain 0m 47s) Loss: 0.0295(0.0295) Grad: 31769.0137  LR: 0.00001310  
Epoch: [3][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0028(0.0140) Grad: 12316.5400  LR: 0.00000703  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0115(0.0115) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0253(0.0132) 


Epoch 3 - avg_train_loss: 0.0140  avg_val_loss: 0.0132  time: 30s
Epoch 3 - Score: 0.8664
Epoch 3 - Save Best Score: 0.8664 Model


Epoch: [4][0/86] Elapsed 0m 0s (remain 0m 45s) Loss: 0.0232(0.0232) Grad: 22138.5996  LR: 0.00000697  
Epoch: [4][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0325(0.0125) Grad: 54356.8516  LR: 0.00000201  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0117(0.0117) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0247(0.0128) 


Epoch 4 - avg_train_loss: 0.0125  avg_val_loss: 0.0128  time: 30s
Epoch 4 - Score: 0.8715
Epoch 4 - Save Best Score: 0.8715 Model


Epoch: [5][0/86] Elapsed 0m 0s (remain 0m 44s) Loss: 0.0135(0.0135) Grad: 20373.7637  LR: 0.00000197  
Epoch: [5][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0150(0.0117) Grad: 21848.4980  LR: 0.00000000  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0117(0.0117) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0244(0.0128) 


Epoch 5 - avg_train_loss: 0.0117  avg_val_loss: 0.0128  time: 30s
Epoch 5 - Score: 0.8708
Score: 0.8715
Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/86] Elapsed 0m 0s (remain 0m 49s) Loss: 1.0728(1.0728) Grad: inf  LR: 0.00002000  
Epoch: [1][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0358(0.1160) Grad: 5113.4741  LR: 0.00001812  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0483(0.0483) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0191(0.0203) 


Epoch 1 - avg_train_loss: 0.1160  avg_val_loss: 0.0203  time: 30s
Epoch 1 - Score: 0.7894
Epoch 1 - Save Best Score: 0.7894 Model


Epoch: [2][0/86] Elapsed 0m 0s (remain 0m 45s) Loss: 0.0154(0.0154) Grad: 20906.1445  LR: 0.00001807  
Epoch: [2][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0525(0.0180) Grad: 50267.7930  LR: 0.00001317  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0477(0.0477) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0191(0.0181) 


Epoch 2 - avg_train_loss: 0.0180  avg_val_loss: 0.0181  time: 30s
Epoch 2 - Score: 0.8122
Epoch 2 - Save Best Score: 0.8122 Model


Epoch: [3][0/86] Elapsed 0m 0s (remain 0m 44s) Loss: 0.0131(0.0131) Grad: 22759.9395  LR: 0.00001310  
Epoch: [3][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0078(0.0151) Grad: 15484.3574  LR: 0.00000703  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0469(0.0469) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0177(0.0174) 


Epoch 3 - avg_train_loss: 0.0151  avg_val_loss: 0.0174  time: 30s
Epoch 3 - Score: 0.8258
Epoch 3 - Save Best Score: 0.8258 Model


Epoch: [4][0/86] Elapsed 0m 0s (remain 0m 47s) Loss: 0.0114(0.0114) Grad: 32375.8027  LR: 0.00000697  
Epoch: [4][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0177(0.0144) Grad: 36264.9023  LR: 0.00000201  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0455(0.0455) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0174(0.0170) 


Epoch 4 - avg_train_loss: 0.0144  avg_val_loss: 0.0170  time: 30s
Epoch 4 - Score: 0.8276
Epoch 4 - Save Best Score: 0.8276 Model


Epoch: [5][0/86] Elapsed 0m 0s (remain 0m 44s) Loss: 0.0021(0.0021) Grad: 5485.1011  LR: 0.00000197  
Epoch: [5][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0090(0.0137) Grad: 20327.3672  LR: 0.00000000  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0452(0.0452) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0171(0.0170) 


Epoch 5 - avg_train_loss: 0.0137  avg_val_loss: 0.0170  time: 30s
Epoch 5 - Score: 0.8281
Epoch 5 - Save Best Score: 0.8281 Model
Score: 0.8281
Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/86] Elapsed 0m 0s (remain 0m 44s) Loss: 0.7691(0.7691) Grad: inf  LR: 0.00002000  
Epoch: [1][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0338(0.0925) Grad: 4951.8760  LR: 0.00001812  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0268(0.0268) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0094(0.0203) 


Epoch 1 - avg_train_loss: 0.0925  avg_val_loss: 0.0203  time: 30s
Epoch 1 - Score: 0.7759
Epoch 1 - Save Best Score: 0.7759 Model


Epoch: [2][0/86] Elapsed 0m 0s (remain 0m 45s) Loss: 0.0076(0.0076) Grad: 9238.1826  LR: 0.00001807  
Epoch: [2][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0156(0.0158) Grad: 37532.4531  LR: 0.00001317  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0224(0.0224) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0082(0.0180) 


Epoch 2 - avg_train_loss: 0.0158  avg_val_loss: 0.0180  time: 30s
Epoch 2 - Score: 0.8172
Epoch 2 - Save Best Score: 0.8172 Model


Epoch: [3][0/86] Elapsed 0m 0s (remain 0m 43s) Loss: 0.0150(0.0150) Grad: 11354.6592  LR: 0.00001310  
Epoch: [3][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0040(0.0148) Grad: 8752.9180  LR: 0.00000703  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0212(0.0212) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0070(0.0170) 


Epoch 3 - avg_train_loss: 0.0148  avg_val_loss: 0.0170  time: 30s
Epoch 3 - Score: 0.8239
Epoch 3 - Save Best Score: 0.8239 Model


Epoch: [4][0/86] Elapsed 0m 0s (remain 0m 43s) Loss: 0.0371(0.0371) Grad: 23201.9668  LR: 0.00000697  
Epoch: [4][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0115(0.0128) Grad: 14923.3564  LR: 0.00000201  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0211(0.0211) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0064(0.0168) 


Epoch 4 - avg_train_loss: 0.0128  avg_val_loss: 0.0168  time: 30s
Epoch 4 - Score: 0.8265
Epoch 4 - Save Best Score: 0.8265 Model


Epoch: [5][0/86] Elapsed 0m 0s (remain 0m 44s) Loss: 0.0071(0.0071) Grad: 20773.1406  LR: 0.00000197  
Epoch: [5][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0087(0.0125) Grad: 25845.6133  LR: 0.00000000  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0211(0.0211) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0064(0.0168) 


Epoch 5 - avg_train_loss: 0.0125  avg_val_loss: 0.0168  time: 30s
Epoch 5 - Score: 0.8254
Score: 0.8265
Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/86] Elapsed 0m 0s (remain 0m 57s) Loss: 0.8566(0.8566) Grad: inf  LR: 0.00002000  
Epoch: [1][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0395(0.1026) Grad: 5009.6279  LR: 0.00001812  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0195(0.0195) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0112(0.0209) 


Epoch 1 - avg_train_loss: 0.1026  avg_val_loss: 0.0209  time: 30s
Epoch 1 - Score: 0.7736
Epoch 1 - Save Best Score: 0.7736 Model


Epoch: [2][0/86] Elapsed 0m 0s (remain 0m 47s) Loss: 0.0293(0.0293) Grad: 56049.9453  LR: 0.00001807  
Epoch: [2][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0116(0.0179) Grad: 30282.5645  LR: 0.00001317  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0153(0.0153) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0070(0.0176) 


Epoch 2 - avg_train_loss: 0.0179  avg_val_loss: 0.0176  time: 30s
Epoch 2 - Score: 0.8257
Epoch 2 - Save Best Score: 0.8257 Model


Epoch: [3][0/86] Elapsed 0m 0s (remain 0m 44s) Loss: 0.0097(0.0097) Grad: 21001.1504  LR: 0.00001310  
Epoch: [3][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0093(0.0142) Grad: 13851.3027  LR: 0.00000703  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0150(0.0150) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0066(0.0177) 


Epoch 3 - avg_train_loss: 0.0142  avg_val_loss: 0.0177  time: 30s
Epoch 3 - Score: 0.8239


Epoch: [4][0/86] Elapsed 0m 0s (remain 0m 43s) Loss: 0.0171(0.0171) Grad: 20743.9043  LR: 0.00000697  
Epoch: [4][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0077(0.0139) Grad: 8573.3418  LR: 0.00000201  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0143(0.0143) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0060(0.0172) 


Epoch 4 - avg_train_loss: 0.0139  avg_val_loss: 0.0172  time: 30s
Epoch 4 - Score: 0.8306
Epoch 4 - Save Best Score: 0.8306 Model


Epoch: [5][0/86] Elapsed 0m 0s (remain 0m 45s) Loss: 0.0084(0.0084) Grad: 17604.2422  LR: 0.00000197  
Epoch: [5][85/86] Elapsed 0m 26s (remain 0m 0s) Loss: 0.0249(0.0133) Grad: 64173.3711  LR: 0.00000000  
EVAL: [0/22] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0141(0.0141) 
EVAL: [21/22] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0058(0.0170) 


Epoch 5 - avg_train_loss: 0.0133  avg_val_loss: 0.0170  time: 30s
Epoch 5 - Score: 0.8357
Epoch 5 - Save Best Score: 0.8357 Model
Score: 0.8357
Score: 0.8359





VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
[fold0] avg_train_loss,█▂▁▁▁
[fold0] avg_val_loss,█▃▂▁▁
[fold0] epoch,▁▃▅▆█
[fold0] loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[fold0] lr,███████▇▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁
[fold0] score,▁▆█▇█
[fold1] avg_train_loss,█▁▁▁▁
[fold1] avg_val_loss,█▄▂▁▁
[fold1] epoch,▁▃▅▆█
[fold1] loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
[fold0] avg_train_loss,0.01062
[fold0] avg_val_loss,0.0158
[fold0] epoch,5.0
[fold0] loss,0.00863
[fold0] lr,0.0
[fold0] score,0.81092
[fold1] avg_train_loss,0.01171
[fold1] avg_val_loss,0.0128
[fold1] epoch,5.0
[fold1] loss,0.015


# Post-processing

In [None]:
oof_df = pd.read_pickle(OUTPUT_DIR+'oof_df.pkl')
oof_df

Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location,feature_text,pn_history,annotation_length,fold,location_for_create_labels,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414
0,10019_100,1,10019,100,[],[],No-vaginal-discharge,Suzanne Poweltor is a 20 yo F with no signific...,0,0,[],0.210069,0.000488,0.000566,0.000555,0.000398,0.000255,0.000137,0.000234,0.001611,0.002468,0.007514,0.001155,0.087924,0.107638,0.118176,0.161759,0.093812,0.000150,0.000152,0.000122,0.000130,0.000150,0.000183,0.000438,0.000835,0.000110,0.000130,0.000133,0.000127,0.000155,0.000383,0.000162,0.000135,0.000145,0.000170,0.000168,0.000098,0.000106,0.000128,0.000261,0.000167,0.000265,0.000349,0.000097,0.000116,0.000260,0.000142,0.000164,0.000085,0.000094,0.000163,0.000192,0.000172,0.000111,0.000087,0.000131,0.000139,0.000123,0.000090,0.000097,0.000110,0.000127,0.000098,0.000124,0.000144,0.000237,0.000101,0.000131,0.000113,0.000093,0.000099,0.000084,0.000182,0.000246,0.000110,0.000116,0.000142,0.000113,0.000114,0.000083,0.000121,0.000135,0.000125,0.000108,0.000103,0.000121,0.000134,0.000123,0.000133,0.000104,0.000081,0.000087,0.000081,0.000128,0.000196,0.000148,0.000352,0.000215,0.000267,0.000462,0.000158,0.000081,0.000101,0.000189,0.000164,0.000089,0.000144,0.000478,0.000217,0.000189,0.000215,0.000104,0.000148,0.000258,0.000100,0.000225,0.000241,0.000222,0.000103,0.000173,0.000127,0.000150,0.000165,0.000156,0.000094,0.000395,0.000343,0.000115,0.000244,0.000272,0.000230,0.000130,0.000109,0.000166,0.000122,0.000212,0.000213,0.000090,0.000145,0.000116,0.000122,0.000126,0.000129,0.000106,0.000114,0.000134,0.000081,0.000113,0.000121,0.000093,0.000094,0.000104,0.000100,0.000113,0.000168,0.000077,0.000203,0.000169,0.000162,0.000178,0.000207,0.002197,0.012136,0.036978,0.001093,0.001268,0.000174,0.000174,0.000160,0.000283,0.000316,0.000328,0.000135,0.000182,0.000119,0.000124,0.000098,0.000119,0.000095,0.000111,0.000094,0.000098,0.000110,0.000125,0.000151,0.000120,0.000108,0.000120,0.000184,0.000117,0.000101,0.000128,0.000116,0.000113,0.000119,0.000325,0.000201,0.000463,0.000489,0.000155,0.000207,0.000124,0.000132,0.000144,0.000195,0.000216,0.000101,0.000117,0.000116,0.000147,0.000097,0.000094,0.000095,0.000080,0.000068,0.000167,0.000134,0.000081,0.000087,0.000106,0.000085,0.000064,0.000115,0.000125,0.000121,0.000121,0.000179,0.000310,0.000250,0.000398,0.000409,0.000151,0.000314,0.000213,0.000171,0.000173,0.000183,0.000189,0.000074,0.000117,0.000107,0.000074,0.000275,0.000188,0.000227,0.000178,0.000175,0.000187,0.000085,0.000403,0.000235,0.000249,0.000205,0.000175,0.000208,0.000212,0.000193,0.000220,0.001043,0.556271,0.115173,0.768450,0.549066,0.109755,0.628564,0.629869,0.392933,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429
1,10019_101,1,10019,101,[],[],Weight-loss,Suzanne Poweltor is a 20 yo F with no signific...,0,0,[],0.181153,0.000361,0.000440,0.000450,0.000340,0.000214,0.000119,0.000182,0.001010,0.001595,0.002477,0.000558,0.003766,0.010076,0.011684,0.011547,0.009555,0.000117,0.000125,0.000106,0.000112,0.000118,0.000134,0.000187,0.000277,0.000096,0.000121,0.000125,0.000121,0.000148,0.000251,0.000149,0.000126,0.000131,0.000153,0.000170,0.000095,0.000104,0.000122,0.000237,0.000153,0.000239,0.000318,0.000094,0.000110,0.000189,0.000132,0.000149,0.000083,0.000090,0.000150,0.000174,0.000155,0.000104,0.000085,0.000126,0.000137,0.000121,0.000091,0.000097,0.000106,0.000122,0.000097,0.000120,0.000135,0.000200,0.000101,0.000135,0.000116,0.000096,0.000102,0.000084,0.000160,0.000183,0.000104,0.000111,0.000134,0.000111,0.000107,0.000081,0.000116,0.000130,0.000122,0.000105,0.000100,0.000119,0.000132,0.000122,0.000129,0.000101,0.000079,0.000085,0.000080,0.000121,0.000193,0.000155,0.000271,0.000172,0.000222,0.000390,0.000155,0.000092,0.000197,0.001379,0.000393,0.000106,0.000131,0.000290,0.000178,0.000158,0.000174,0.000097,0.000132,0.000199,0.000097,0.000231,0.000228,0.000197,0.000098,0.000150,0.000121,0.000135,0.000147,0.000142,0.000095,0.000789,0.000591,0.000137,0.000345,0.000390,0.000341,0.000136,0.000105,0.000149,0.000109,0.000165,0.000170,0.000086,0.000137,0.000114,0.000119,0.000125,0.000120,0.000102,0.000111,0.000132,0.000079,0.000108,0.000118,0.000091,0.000093,0.000103,0.000097,0.000112,0.000149,0.000073,0.000192,0.000162,0.000162,0.000177,0.000196,0.001385,0.005100,0.014811,0.000764,0.000821,0.000165,0.000163,0.000153,0.000268,0.000289,0.000298,0.000129,0.000162,0.000115,0.000120,0.000096,0.000114,0.000095,0.000111,0.000092,0.000098,0.000108,0.000124,0.000149,0.000118,0.000106,0.000120,0.000186,0.000118,0.000101,0.000126,0.000116,0.000112,0.000117,0.000303,0.000190,0.000397,0.000436,0.000143,0.000176,0.000119,0.000128,0.000140,0.000185,0.000206,0.000100,0.000114,0.000116,0.000146,0.000097,0.000092,0.000095,0.000077,0.000066,0.000131,0.000118,0.000076,0.000083,0.000098,0.000079,0.000062,0.000110,0.000121,0.000118,0.000118,0.000168,0.000286,0.000236,0.000370,0.000396,0.000143,0.000302,0.000196,0.000163,0.000165,0.000172,0.000183,0.000071,0.000110,0.000098,0.000071,0.000285,0.000175,0.000206,0.000174,0.000164,0.000182,0.000083,0.000406,0.000235,0.000251,0.000204,0.000179,0.000221,0.000218,0.000199,0.000175,0.001777,0.714338,0.673201,0.784592,0.394899,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429
2,10019_102,1,10019,102,[Last sexually active 9 months ago],[854 887],Not-sexually-active,Suzanne Poweltor is a 20 yo F with no signific...,1,0,[854 887],0.241407,0.000447,0.000501,0.000482,0.000350,0.000225,0.000127,0.000191,0.000801,0.001106,0.003482,0.000560,0.011553,0.017414,0.021979,0.014762,0.011130,0.000125,0.000128,0.000107,0.000109,0.000119,0.000137,0.000198,0.000336,0.000099,0.000124,0.000128,0.000121,0.000134,0.000283,0.000148,0.000125,0.000131,0.000149,0.000156,0.000095,0.000100,0.000118,0.000217,0.000149,0.000238,0.000313,0.000096,0.000109,0.000214,0.000130,0.000138,0.000083,0.000091,0.000155,0.000181,0.000155,0.000106,0.000086,0.000127,0.000135,0.000112,0.000086,0.000091,0.000104,0.000121,0.000096,0.000117,0.000135,0.000211,0.000099,0.000126,0.000110,0.000091,0.000096,0.000084,0.000171,0.000222,0.000109,0.000114,0.000131,0.000111,0.000112,0.000084,0.000120,0.000134,0.000125,0.000104,0.000102,0.000119,0.000132,0.000123,0.000132,0.000103,0.000079,0.000086,0.000081,0.000125,0.000190,0.000142,0.000292,0.000207,0.000251,0.000458,0.000157,0.000081,0.000100,0.000198,0.000176,0.000092,0.000149,0.000555,0.000218,0.000196,0.000219,0.000106,0.000153,0.000273,0.000106,0.000234,0.000245,0.000218,0.000105,0.000184,0.000128,0.000155,0.000171,0.000162,0.000099,0.000488,0.000415,0.000130,0.000289,0.000335,0.000269,0.000145,0.000119,0.000228,0.000157,0.000341,0.000327,0.000099,0.000143,0.000117,0.000126,0.000131,0.000134,0.000110,0.000119,0.000142,0.000082,0.000113,0.000122,0.000093,0.000096,0.000107,0.000102,0.000112,0.000173,0.000075,0.000199,0.000165,0.000161,0.000177,0.000201,0.001385,0.004630,0.014155,0.000798,0.000926,0.000171,0.000171,0.000147,0.000253,0.000297,0.000314,0.000132,0.000177,0.000117,0.000122,0.000099,0.000120,0.000096,0.000115,0.000095,0.000099,0.000110,0.000125,0.000149,0.000119,0.000108,0.000120,0.000186,0.000117,0.000101,0.000127,0.000116,0.000113,0.000119,0.000273,0.000180,0.000399,0.000409,0.000147,0.000200,0.000122,0.000130,0.000140,0.000193,0.000213,0.000100,0.000119,0.000117,0.000153,0.000100,0.000095,0.000094,0.000080,0.000067,0.000160,0.000138,0.000084,0.000090,0.000110,0.000087,0.000064,0.000117,0.000128,0.000125,0.000124,0.000214,0.000375,0.000326,0.000485,0.000473,0.000164,0.000423,0.000312,0.000236,0.000218,0.000215,0.000220,0.000084,0.000325,0.000290,0.000205,0.956319,0.975397,0.975905,0.966634,0.974129,0.972445,0.000969,0.001216,0.001809,0.001857,0.001240,0.001082,0.002024,0.003402,0.002868,0.000292,0.001133,0.723678,0.077465,0.470851,0.400322,0.353247,0.602959,0.390771,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429
3,10019_103,1,10019,103,[diarrhea],[416 424],Prior-episodes-of-diarrhea,Suzanne Poweltor is a 20 yo F with no signific...,1,0,[416 424],0.177359,0.000422,0.000506,0.000527,0.000384,0.000225,0.000126,0.000194,0.001302,0.002066,0.003910,0.000504,0.003131,0.010401,0.026139,0.028096,0.015224,0.000127,0.000186,0.000161,0.000171,0.000204,0.000195,0.000404,0.000556,0.000110,0.000126,0.000135,0.000133,0.000156,0.000322,0.000181,0.000145,0.000149,0.000171,0.000181,0.000094,0.000096,0.000113,0.000197,0.000144,0.000229,0.000300,0.000096,0.000117,0.000242,0.000148,0.000144,0.000084,0.000099,0.000191,0.000215,0.000165,0.000115,0.000089,0.000131,0.000149,0.000135,0.000094,0.000103,0.000119,0.000130,0.000098,0.000123,0.000140,0.000241,0.000105,0.000142,0.000117,0.000093,0.000098,0.000086,0.000197,0.000303,0.000135,0.000163,0.000282,0.000189,0.000158,0.000094,0.000145,0.000159,0.000158,0.000172,0.000175,0.000350,0.000258,0.000245,0.000355,0.000152,0.000123,0.000123,0.000106,0.003426,0.033629,0.068840,0.224409,0.163580,0.287797,0.953964,0.236998,0.006398,0.005587,0.031482,0.061033,0.000483,0.000317,0.002922,0.000912,0.003072,0.003845,0.000385,0.001239,0.003280,0.000236,0.010083,0.006380,0.014039,0.000368,0.002019,0.000308,0.001802,0.003179,0.003723,0.000263,0.009424,0.009146,0.000332,0.000706,0.000881,0.001015,0.000183,0.000163,0.000451,0.000254,0.000373,0.000333,0.000104,0.000146,0.000121,0.000142,0.000167,0.000239,0.000188,0.000173,0.000243,0.000086,0.000116,0.000130,0.000099,0.000099,0.000114,0.000106,0.000136,0.000179,0.000077,0.000204,0.000167,0.000162,0.000178,0.000204,0.001582,0.006578,0.015524,0.000837,0.000850,0.000163,0.000167,0.000147,0.000256,0.000267,0.000282,0.000126,0.000159,0.000115,0.000121,0.000095,0.000110,0.000092,0.000111,0.000091,0.000095,0.000107,0.000124,0.000148,0.000118,0.000106,0.000114,0.000173,0.000109,0.000096,0.000117,0.000108,0.000109,0.000117,0.000270,0.000178,0.000357,0.000401,0.000139,0.000173,0.000123,0.000133,0.000145,0.000185,0.000204,0.000097,0.000112,0.000112,0.000146,0.000096,0.000095,0.000094,0.000076,0.000067,0.000126,0.000123,0.000077,0.000081,0.000098,0.000079,0.000063,0.000115,0.000123,0.000120,0.000120,0.000173,0.000276,0.000235,0.000336,0.000361,0.000141,0.000288,0.000187,0.000160,0.000157,0.000167,0.000182,0.000073,0.000109,0.000094,0.000072,0.000228,0.000162,0.000198,0.000167,0.000167,0.000178,0.000083,0.000293,0.000193,0.000206,0.000169,0.000148,0.000201,0.000207,0.000186,0.000217,0.000997,0.004600,0.007876,0.031705,0.013022,0.013120,0.018620,0.011094,0.202425,0.246592,0.181981,0.388164,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429
4,10019_104,1,10019,104,[20 yo],[22 27],20-year,Suzanne Poweltor is a 20 yo F with no signific...,1,0,[22 27],0.223543,0.001199,0.001192,0.000915,0.000613,0.000333,0.000205,0.001462,0.983961,0.976039,0.075194,0.001626,0.017598,0.045029,0.084973,0.095788,0.054858,0.000151,0.000154,0.000121,0.000127,0.000139,0.000168,0.000329,0.000830,0.000110,0.000130,0.000132,0.000124,0.000144,0.000386,0.000163,0.000133,0.000145,0.000165,0.000163,0.000097,0.000105,0.000125,0.000238,0.000161,0.000256,0.000339,0.000095,0.000113,0.000263,0.000137,0.000147,0.000084,0.000092,0.000157,0.000183,0.000159,0.000107,0.000085,0.000130,0.000138,0.000120,0.000090,0.000095,0.000108,0.000126,0.000098,0.000124,0.000142,0.000231,0.000102,0.000134,0.000114,0.000093,0.000098,0.000083,0.000165,0.000196,0.000108,0.000114,0.000134,0.000109,0.000111,0.000082,0.000122,0.000134,0.000124,0.000107,0.000103,0.000122,0.000133,0.000123,0.000132,0.000102,0.000079,0.000085,0.000080,0.000128,0.000198,0.000147,0.000283,0.000205,0.000253,0.000474,0.000158,0.000082,0.000101,0.000189,0.000167,0.000089,0.000143,0.000379,0.000204,0.000196,0.000217,0.000108,0.000155,0.000275,0.000103,0.000235,0.000246,0.000218,0.000103,0.000178,0.000127,0.000152,0.000167,0.000155,0.000097,0.000476,0.000391,0.000123,0.000273,0.000321,0.000253,0.000135,0.000116,0.000200,0.000138,0.000253,0.000248,0.000093,0.000147,0.000118,0.000127,0.000134,0.000135,0.000109,0.000119,0.000145,0.000083,0.000114,0.000123,0.000094,0.000097,0.000109,0.000105,0.000118,0.000181,0.000077,0.000205,0.000167,0.000162,0.000179,0.000208,0.002610,0.012841,0.031024,0.001388,0.001324,0.000175,0.000172,0.000162,0.000271,0.000290,0.000303,0.000130,0.000164,0.000116,0.000122,0.000103,0.000122,0.000097,0.000118,0.000098,0.000102,0.000113,0.000128,0.000153,0.000120,0.000108,0.000126,0.000193,0.000120,0.000102,0.000130,0.000117,0.000113,0.000119,0.000329,0.000189,0.000406,0.000463,0.000150,0.000180,0.000121,0.000128,0.000139,0.000183,0.000203,0.000100,0.000121,0.000120,0.000153,0.000098,0.000095,0.000096,0.000081,0.000066,0.000137,0.000130,0.000080,0.000086,0.000104,0.000084,0.000063,0.000114,0.000124,0.000121,0.000121,0.000178,0.000303,0.000252,0.000330,0.000382,0.000146,0.000312,0.000217,0.000170,0.000174,0.000188,0.000191,0.000073,0.000117,0.000106,0.000073,0.000292,0.000186,0.000229,0.000178,0.000178,0.000187,0.000086,0.000450,0.000262,0.000275,0.000218,0.000194,0.000265,0.000276,0.000231,0.000206,0.003776,0.944152,0.888769,0.887349,0.394224,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429,0.130429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1295,10926_108,1,10926,108,[],[],No-urinary-symptoms,20 yo F without significant PMH presenting wit...,0,4,[],0.206599,0.000925,0.002284,0.002987,0.000347,0.000383,0.000783,0.000607,0.000183,0.000161,0.000655,0.000487,0.000756,0.000488,0.000211,0.000119,0.000226,0.000226,0.000263,0.000265,0.000244,0.000178,0.000106,0.000143,0.000141,0.000165,0.000109,0.000149,0.000145,0.000153,0.000088,0.000141,0.000122,0.000128,0.000123,0.000147,0.000132,0.000166,0.000092,0.000205,0.000199,0.000086,0.000201,0.000110,0.000110,0.000171,0.000227,0.000514,0.000399,0.000109,0.000119,0.000123,0.000111,0.000098,0.000120,0.000188,0.000195,0.000172,0.000167,0.000136,0.000144,0.000137,0.000104,0.000123,0.000116,0.000107,0.000149,0.000120,0.000162,0.000167,0.000094,0.000117,0.000144,0.000291,0.000367,0.000390,0.000424,0.000488,0.000296,0.000223,0.000420,0.000372,0.000311,0.000102,0.000111,0.000116,0.000117,0.000113,0.000216,0.001408,0.000232,0.000222,0.000234,0.000201,0.000242,0.000199,0.000184,0.000116,0.000160,0.000189,0.000204,0.000275,0.000144,0.000180,0.000188,0.000330,0.000493,0.000277,0.000265,0.000211,0.000153,0.089216,0.060525,0.032747,0.006057,0.007799,0.012721,0.040781,0.085509,0.101141,0.000225,0.000241,0.000268,0.000253,0.000246,0.000149,0.003984,0.005327,0.000212,0.000282,0.000218,0.000254,0.000152,0.000432,0.000663,0.000162,0.000357,0.000598,0.000254,0.000199,0.000237,0.000258,0.000138,0.000333,0.000136,0.000198,0.000233,0.000216,0.000209,0.000164,0.000119,0.000162,0.000110,0.000323,0.000161,0.000161,0.000137,0.000158,0.000103,0.000121,0.000116,0.000107,0.000139,0.000140,0.000187,0.000173,0.000158,0.000160,0.000111,0.000270,0.000169,0.000188,0.000285,0.000134,0.000150,0.000157,0.000165,0.000176,0.000119,0.000213,0.000379,0.000242,0.000173,0.000143,0.000097,0.000139,0.000097,0.000129,0.000167,0.000086,0.000116,0.000134,0.000153,0.000177,0.000082,0.000125,0.000153,0.000128,0.000162,0.000140,0.000119,0.000084,0.000193,0.000347,0.000286,0.000118,0.000234,0.000251,0.000267,0.000396,0.000211,0.000107,0.000162,0.000179,0.000158,0.000102,0.000310,0.000389,0.000322,0.000165,0.000274,0.000346,0.002137,0.167541,0.108512,0.192612,0.115074,0.043919,0.026603,0.036759,0.599384,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550
1296,10926_109,1,10926,109,[decreased appetite],[437 455],Diminished-appetite,20 yo F without significant PMH presenting wit...,1,4,[437 455],0.233889,0.000955,0.002513,0.003515,0.000292,0.000332,0.000768,0.000543,0.000152,0.000134,0.000520,0.000380,0.000447,0.000388,0.000191,0.000109,0.000203,0.000211,0.000249,0.000243,0.000226,0.000163,0.000099,0.000128,0.000111,0.000121,0.000096,0.000140,0.000132,0.000140,0.000083,0.000135,0.000118,0.000125,0.000118,0.000140,0.000125,0.000161,0.000086,0.000160,0.000170,0.000079,0.000169,0.000096,0.000096,0.000149,0.000193,0.000430,0.000333,0.000101,0.000110,0.000115,0.000105,0.000093,0.000112,0.000170,0.000184,0.000157,0.000156,0.000127,0.000129,0.000126,0.000099,0.000120,0.000114,0.000105,0.000149,0.000118,0.000162,0.000166,0.000090,0.000108,0.000131,0.000243,0.000313,0.000316,0.000314,0.000349,0.000236,0.000185,0.000342,0.000311,0.000265,0.000096,0.000101,0.000107,0.000103,0.000100,0.000189,0.001171,0.000221,0.000210,0.000221,0.000188,0.000217,0.000184,0.000175,0.000118,0.000299,0.001277,0.001219,0.003547,0.000184,0.000498,0.002632,0.901161,0.965813,0.461605,0.394611,0.265619,0.000202,0.000641,0.000675,0.000916,0.000311,0.001104,0.001777,0.005753,0.011181,0.007431,0.000151,0.000178,0.000195,0.000176,0.000153,0.000095,0.000220,0.000265,0.000089,0.000123,0.000119,0.000122,0.000090,0.000243,0.000326,0.000115,0.000231,0.000415,0.000203,0.000170,0.000197,0.000212,0.000111,0.000239,0.000113,0.000174,0.000209,0.000197,0.000193,0.000154,0.000108,0.000148,0.000100,0.000266,0.000151,0.000156,0.000131,0.000151,0.000100,0.000117,0.000113,0.000104,0.000137,0.000136,0.000185,0.000166,0.000150,0.000143,0.000099,0.000225,0.000154,0.000179,0.000264,0.000125,0.000140,0.000147,0.000158,0.000166,0.000114,0.000203,0.000354,0.000232,0.000169,0.000140,0.000094,0.000135,0.000095,0.000126,0.000166,0.000082,0.000109,0.000127,0.000145,0.000168,0.000079,0.000121,0.000152,0.000124,0.000159,0.000134,0.000112,0.000078,0.000124,0.000245,0.000216,0.000105,0.000205,0.000222,0.000232,0.000347,0.000180,0.000097,0.000146,0.000156,0.000142,0.000088,0.000140,0.000245,0.000211,0.000120,0.000179,0.000230,0.001767,0.292721,0.419331,0.154680,0.551275,0.292835,0.341830,0.605821,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550
1297,10926_110,1,10926,110,[],[],Normal-LMP-2-weeks-ago-OR-Normal-last-menstrua...,20 yo F without significant PMH presenting wit...,0,4,[],0.087373,0.000641,0.001438,0.001819,0.000253,0.000319,0.000654,0.000510,0.000143,0.000132,0.000534,0.000385,0.000425,0.000379,0.000193,0.000110,0.000200,0.000206,0.000240,0.000238,0.000223,0.000162,0.000103,0.000125,0.000107,0.000112,0.000094,0.000132,0.000129,0.000134,0.000083,0.000138,0.000119,0.000123,0.000118,0.000139,0.000125,0.000167,0.000087,0.000158,0.000160,0.000080,0.000215,0.000107,0.000110,0.000168,0.000225,0.000541,0.000410,0.000106,0.000114,0.000116,0.000108,0.000096,0.000118,0.000188,0.000192,0.000174,0.000164,0.000126,0.000126,0.000128,0.000100,0.000125,0.000118,0.000108,0.000153,0.000119,0.000159,0.000165,0.000091,0.000109,0.000124,0.000233,0.000295,0.000296,0.000294,0.000322,0.000217,0.000172,0.000306,0.000277,0.000232,0.000093,0.000099,0.000101,0.000100,0.000096,0.000174,0.001169,0.000215,0.000212,0.000223,0.000184,0.000219,0.000179,0.000168,0.000104,0.000117,0.000150,0.000162,0.000230,0.000122,0.000152,0.000149,0.000253,0.000394,0.000244,0.000245,0.000192,0.000124,0.000274,0.000329,0.000386,0.000220,0.000460,0.000733,0.001059,0.001752,0.001639,0.000137,0.000168,0.000187,0.000172,0.000147,0.000091,0.000192,0.000236,0.000086,0.000119,0.000114,0.000118,0.000085,0.000199,0.000258,0.000110,0.000221,0.000398,0.000198,0.000168,0.000195,0.000208,0.000106,0.000220,0.000107,0.000173,0.000214,0.000200,0.000194,0.000151,0.000105,0.000141,0.000097,0.000256,0.000146,0.000158,0.000143,0.000164,0.000101,0.000117,0.000112,0.000102,0.000135,0.000136,0.000190,0.000168,0.000153,0.000154,0.000102,0.000233,0.000157,0.000188,0.000308,0.000134,0.000147,0.000152,0.000160,0.000172,0.000117,0.000214,0.000379,0.000241,0.000172,0.000143,0.000094,0.000136,0.000095,0.000128,0.000167,0.000084,0.000112,0.000130,0.000150,0.000174,0.000082,0.000145,0.000172,0.000137,0.000169,0.000145,0.000123,0.000079,0.000134,0.000238,0.000218,0.000110,0.000250,0.000248,0.000246,0.000386,0.000211,0.000101,0.000158,0.000161,0.000150,0.000091,0.000136,0.000236,0.000200,0.000122,0.000192,0.000245,0.079901,0.921088,0.551255,0.944108,0.951466,0.902193,0.969379,0.946946,0.980003,0.980129,0.860135,0.957318,0.127449,0.461644,0.117866,0.904018,0.498974,0.952750,0.812058,0.952835,0.877335,0.830413,0.735533,0.946845,0.776216,0.953020,0.879527,0.978608,0.978922,0.499949,0.962925,0.609220,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550
1298,10926_111,1,10926,111,[started 8-10 hours ago],[74 96],8-to-10-hours-of-acute-pain,20 yo F without significant PMH presenting wit...,1,4,[74 96],0.228441,0.000956,0.002454,0.003244,0.000299,0.000353,0.000818,0.000808,0.000268,0.000561,0.018387,0.007905,0.039245,0.215772,0.001669,0.083958,0.944694,0.989372,0.987801,0.990330,0.987835,0.980130,0.000183,0.000210,0.000277,0.001394,0.000165,0.000201,0.000240,0.000342,0.000103,0.000170,0.000145,0.000170,0.000154,0.000184,0.000184,0.000278,0.000097,0.000178,0.000194,0.000087,0.000359,0.000133,0.000133,0.000224,0.000310,0.000885,0.000675,0.000117,0.000119,0.000131,0.000143,0.000118,0.000165,0.000329,0.000316,0.000193,0.000180,0.000142,0.000169,0.000161,0.000101,0.000124,0.000118,0.000112,0.000160,0.000120,0.000161,0.000171,0.000091,0.000106,0.000118,0.000220,0.000281,0.000292,0.000299,0.000319,0.000193,0.000159,0.000285,0.000275,0.000228,0.000092,0.000094,0.000097,0.000092,0.000089,0.000142,0.000621,0.000178,0.000163,0.000187,0.000149,0.000162,0.000146,0.000138,0.000100,0.000109,0.000129,0.000138,0.000189,0.000113,0.000147,0.000141,0.000232,0.000382,0.000252,0.000252,0.000192,0.000113,0.000176,0.000188,0.000213,0.000138,0.000245,0.000338,0.000373,0.000458,0.000466,0.000127,0.000160,0.000176,0.000161,0.000141,0.000089,0.000176,0.000225,0.000084,0.000115,0.000113,0.000116,0.000085,0.000197,0.000251,0.000108,0.000211,0.000378,0.000192,0.000162,0.000186,0.000203,0.000104,0.000208,0.000106,0.000168,0.000199,0.000187,0.000186,0.000149,0.000108,0.000145,0.000100,0.000255,0.000148,0.000153,0.000133,0.000150,0.000099,0.000117,0.000113,0.000105,0.000141,0.000146,0.000190,0.000164,0.000147,0.000142,0.000098,0.000217,0.000151,0.000175,0.000263,0.000124,0.000137,0.000142,0.000152,0.000160,0.000113,0.000202,0.000348,0.000226,0.000167,0.000140,0.000094,0.000136,0.000096,0.000127,0.000164,0.000083,0.000111,0.000126,0.000146,0.000168,0.000079,0.000118,0.000140,0.000121,0.000149,0.000124,0.000103,0.000080,0.000126,0.000207,0.000192,0.000110,0.000220,0.000226,0.000257,0.000362,0.000192,0.000102,0.000151,0.000156,0.000144,0.000089,0.000132,0.000216,0.000184,0.000115,0.000164,0.000198,0.008282,0.895197,0.747009,0.894240,0.768473,0.918492,0.706468,0.840145,0.241821,0.170987,0.074514,0.092318,0.054881,0.019789,0.097159,0.600544,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550,0.118550
