In [None]:
!nvidia-smi

In [None]:
import numpy as np
import pandas as pd
from fastai import *
from fastai.text import *

### Load the data

In [None]:
path = "/home/littlefield/MIMIC-NLP/readmission-prediction/data/"
bs = 64

In [None]:
clinical_lm = (TextList.from_folder(path)
               .filter_by_folder(include=['train', 'valid'])
               .split_by_folder()
               .label_for_lm().databunch(bs=bs))

In [None]:
clinical_data = (TextList.from_folder(path, vocab=clinical_lm.vocab)
                .split_by_folder()
                .label_from_folder(classes=['neg', 'pos'])
                .databunch(bs=bs, num_workers=1))

In [None]:
clinical_lm.save('lm_export-class.pkl')
clinical_data.save('clas_export-class.pkl')

In [None]:
clinical_lm = load_data(path, 'lm_export-class.pkl', bs=bs)
clinical_data = load_data(path, 'clas_export-class.pkl', bs=bs)

### 1. Build a language model using the clinical notes

In [None]:
# Build language model

In [None]:
learn = language_model_learner(clinical_lm, AWD_LSTM, drop_mult=0.3).to_fp16()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
# Fit and save, to only run once and save time
learn.fit_one_cycle(1, 1e-3)

In [None]:
learn.save("clinical_lm-class-step1")

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
# Fit and save, to only run once and save time
learn.fit_one_cycle(1, slice(1e-4,1e-2))

In [None]:
learn.save('clinical_lm-class-step2')

In [None]:
learn.save_encoder("clinical_lm-class-step2_encoder")

### 2a. Build Class Weights to handle Imbalance

In [None]:
clinical_data.train_ds.y.items

In [None]:
from sklearn.utils.class_weight import compute_class_weight

train_weights = compute_class_weight(class_weight="balanced", classes=[0, 1], y=clinical_data.train_ds.y.items)

In [None]:
train_weights

### 2. Build a classifier to predict readmission

In [None]:
learn = text_classifier_learner(clinical_data, AWD_LSTM, drop_mult=1, metrics=[accuracy, AUROC(), Precision(), Recall()]).to_fp16()
learn.load_encoder('clinical_lm-class-step2_encoder')

In [None]:
class_weights=torch.FloatTensor(train_weights).cuda()

In [None]:
learn.loss_func.func = nn.CrossEntropyLoss(weight=class_weights)

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
lr = 7e-02

In [None]:
learn.fit_one_cycle(5, slice(lr/(2.6**4), lr))

In [None]:
learn.save("c_learner-1-4")

In [None]:
learn.load("c_learner-1-4")

In [None]:
learn.freeze_to(-2)
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, slice(5e-3/(2.6**4),5e-3), moms=(0.8,0.7))

In [None]:
learn.save("c_learner-2-4")

In [None]:
learn.load("c_learner-2-4")

In [None]:
learn.freeze_to(-3)

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, slice(1e-04/(2.6**4),1e-04), moms=(0.8,0.7))

In [None]:
learn.save("c_learner-3-4")

In [None]:
learn.load("c_learner-3-4")

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(4, slice(1e-5/(2.6**4),1e-5))

In [None]:
learn.save("c_learner-unfreeze-4")