In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.insert(0,"../src")

In [3]:
import pandas as pd
import numpy as np

from fastai2 import *
from fastai2.text.all import *
import vectorize
import helpers
import transformers
from utils import *
from loss.loss import *

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  '{0}.{1}.{2}'.format(*version.hdf5_built_version_tuple)


# Load the data

In [4]:
full_df = pd.read_pickle("../data/full_df.pkl")

In [5]:
MAX_VOCAB = None
MAX_SEQ_LENGTH = 5000
full_df['TEXT_PROCESSED'] = vectorize.clean_notes(full_df, 'TEXT')

In [6]:
full_df.head()

Unnamed: 0_level_0,TEXT,ICD9_GRP,LABELS,TEXT_PROCESSED
HADM_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100001,"{Admission Date: [**2117-9-11**] Discharge Date: [**2117-9-17**]\n\nDate of Birth: [**2082-3-21**] Sex: F\n\nService: MEDICINE\n\nAllergies:\nLevaquin\n\nAttending:[**First Name3 (LF) 2195**]\nChief Complaint:\nnausea, vomiting\n\n\nMajor Surgical or Invasive Procedure:\nnone\n\nHistory of Present Illness:\n35F w/ poorly controlled Type 1 diabetes mellitus w/ neuropathy,\nnephropathy, HTN, gastroparesis, CKD and retinopathy, recently\nhospitalized for orthostatic hypotension [**2-3**] autonomic\nneuropathy [**Date range (1) 25088**]; DKA hospitalizations in [...","[240-279, 320-389, 580-629, 520-579, , 240-279, 520-579, 390-459, 240-279, 390-459, 580-629, 240-279, 320-389, 240-279, 680-709, ]","[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",admission date discharge date date of birth sex f service medicine allergies levaquin attending chief complaint nausea vomiting major surgical or invasive procedure none history of present illness DGf w poorly controlled type DG diabetes mellitus w neuropathy nephropathy htn gastroparesis ckd and retinopathy recently hospitalized for orthostatic hypotension autonomic neuropathy dka hospitalizations in and now returning w DGd history of worsening nausea vomiting with coffee ground emesis chills and dyspnea on exertion last week she had a fall and hit her right face she also had DG day of di...
100003,"{Admission Date: [**2150-4-17**] Discharge Date: [**2150-4-21**]\n\nDate of Birth: [**2090-5-19**] Sex: M\n\nService: MEDICINE\n\nAllergies:\nPatient recorded as having No Known Allergies to Drugs\n\nAttending:[**First Name3 (LF) 12174**]\nChief Complaint:\ncoffee ground emesis\n\nMajor Surgical or Invasive Procedure:\nEGD\nRight IJ CVL\n\n\nHistory of Present Illness:\nMr. [**Known lastname 52368**] is a 59M w HepC cirrhosis c/b grade I/II esophageal\nvarices and portal gastropathy (last EGD [**3-/2150**]), who p/w\ncoffee-ground emesis and melena x2 days.\n...","[520-579, 290-319, 001-139, 520-579, 390-459, 520-579, 390-459, 520-579, 780-789]","[0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]",admission date discharge date date of birth sex m service medicine allergies patient recorded as having no known allergies to drugs attending chief complaint coffee ground emesis major surgical or invasive procedure egd right ij cvl history of present illness mr is a DGm w hepc cirrhosis c b grade i ii esophageal varices and portal gastropathy last egd who p w coffee ground emesis and melena xDG days pt was in his usoh until about DG DG days pta when he began experiencing intermittent nausea he had DG DG episodes of coffee ground emesis and DG episode of tarry black stool in the morning of...
100006,"{Admission Date: [**2108-4-6**] Discharge Date: [**2081-4-7**]\n\nDate of Birth: [**2059-5-7**] Sex: F\n\nService: O MED\n\nCHIEF COMPLAINT: Dyspnea.\n\nHISTORY OF PRESENT ILLNESS: This is a 48 year old African\nAmerican female with a history of multiple myelomas being\nadmitted for respiratory distress. The patient has been\nrecently discharged one week ago from outside hospital ([**Hospital3 7900**]) for respiratory distress. Back at [**Hospital3 7362**],\nshe was given nebulizer, antibiotics and steroids. She also\nhad elevated INR and was given medication to lowe...","[460-519, 460-519, 460-519, 140-239, 240-279, 780-789, 290-319, , ]","[1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]",admission date discharge date date of birth sex f service o med chief complaint dyspnea history of present illness this is a DG year old african american female with a history of multiple myelomas being admitted for respiratory distress the patient has been recently discharged one week ago from outside hospital for respiratory distress back at she was given nebulizer antibiotics and steroids she also had elevated inr and was given medication to lower inr although there was no evidence of bleeding last night she reports having increased difficulty with breathing she has also had a cough she...
100007,{Admission Date: [**2145-3-31**] Discharge Date: [**2145-4-7**]\n\nDate of Birth: [**2071-6-4**] Sex: F\n\nService: SURGERY\n\nAllergies:\nPenicillins / Dilantin\n\nAttending:[**First Name3 (LF) 301**]\nChief Complaint:\nSevere abdominal and back pain\nUnable to take oral intake.\nNo flatus or bowel movement.\nAbdominal distention.\n\n\nMajor Surgical or Invasive Procedure:\nExploratory Laparotomy\nLysis of adhesions\nSmall Bowel Resection\nJejunosotomy\n\n\nHistory of Present Illness:\nMs [**Known lastname **] is a 73 year old female with a history of multip...,"[520-579, 520-579, 800-999, 460-519, 390-459]","[0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0]",admission date discharge date date of birth sex f service surgery allergies penicillins dilantin attending chief complaint severe abdominal and back pain unable to take oral intake no flatus or bowel movement abdominal distention major surgical or invasive procedure exploratory laparotomy lysis of adhesions small bowel resection jejunosotomy history of present illness ms is a DG year old female with a history of multiple abdominal surgeries pancreatitis and previous sbo she presented to the emergency department on with complaints of abdominal pain radiating to her back that began in the mo...
100009,"{Admission Date: [**2162-5-16**] Discharge Date: [**2162-5-21**]\n\nDate of Birth: [**2101-7-30**] Sex: M\n\nService: CARDIOTHORACIC\n\nAllergies:\nNo Known Allergies / Adverse Drug Reactions\n\nAttending:[**First Name3 (LF) 1505**]\nChief Complaint:\nAngina\n\nMajor Surgical or Invasive Procedure:\n[**2162-5-17**]: CABGx4 LIMA-> LAD, RSVG-> Diagonal, Posterior\nDescending Artery, Obtuse marginal\n[**2162-5-19**]: Right Atrial lead placement\n\n\nHistory of Present Illness:\n60yo man with known coronary disease (AMI in [**2143**] and [**Name Prefix (Prefixes)...","[390-459, 800-999, 390-459, 240-279, 240-279, , 390-459, 390-459, , , 290-319, 390-459, 240-279, 390-459, 390-459, , , ]","[1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]",admission date discharge date date of birth sex m service cardiothoracic allergies no known allergies adverse drug reactions attending chief complaint angina major surgical or invasive procedure cabgxDG lima lad rsvg diagonal posterior descending artery obtuse marginal right atrial lead placement history of present illness DGyo man with known coronary disease ami in and lcx doing well until last week when he developed angina initially with exertion then progressed to rest angina each episode was releived with sl ntg no episode lasting more than DG minutes he presented to cardiologist for t...


# Training

Create Item Transformations

In [16]:

item_tfms= [[ColReader('TEXT_PROCESSED'), FastAIBertTokenizer(tokenizer=bert_tok)],
            [ColReader('ICD9_GRP'), MultiCategorize, OneHotEncode]
           ]

# Create datasource & dataloaders 

splits = RandomSplitter()(range_of(full_df))
dsrc = DataSource(full_df, tfms=item_tfms, splits=splits)
dls = dsrc.databunch(bs=2 , num_workers=0)

Testing dataloaders

In [17]:
b = dls.one_batch()

RuntimeError: CUDA error: device-side assert triggered

Showing batch

In [None]:
@typedispatch
def show_batch(x, y, samples, ctxs=None, max_n=9, **kwargs):
    if ctxs is None: ctxs = Inf.nones
    
    # samples[0] contains str , samples[1] contains str
    text_samples = []
    label_samples = []
    for i in range_of(samples):
        text_samples.append(samples[i][0][300:500])
        actual_labels = [l for l in samples[i][1] if l != ""]
        label_samples.append(actual_labels)
    
    df = pd.DataFrame({'Text': text_samples, 'Label': label_samples})
    display_df(df)

    return ctxs

In [None]:
dls.show_batch(b)

# Modelling

In [None]:
from models.lstm_attn import *
from models.lstm import *
from loss.loss import *
from fastai2.metrics import *
from transformers import BertModel, BertConfig


In [None]:
V = len(fastai_bert_vocab)
E = 200
C = len(dsrc.tfms[1].vocab)

In [None]:
# Loss function
loss_func = ICD_Loss(ignore_index=0)

# Metrics
acc_02 = partial(accuracy, thresh=0.2)
#f_score = partial(fbeta, thresh=0.2)

# Configuration
config = BertConfig.from_pretrained('bert-base-uncased') 

# Model
model = BiLSTMWithBertEmbedding(config,V=V, E=E, C=C)


In [None]:
learn = Learner(dls, model, loss_func = loss_func, metrics=[acc_02])

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(3, lr_max=4*10e-3)