# Bert Classifier for First ICU Prediction

## Import & Inits

In [65]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import sys
sys.path.append('../')

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
%matplotlib inline

import numpy as np
np.set_printoptions(precision=2)

import pandas as pd
import pickle
import torch
from pathlib import Path

from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data.sampler import RandomSampler

In [3]:
from pytorch_pretrained_bert import BertTokenizer, BertAdam
from pytorch_pretrained_bert.modeling import BertForSequenceClassification

In [95]:
from bert_classifier.data_processor import read_df, convert_examples_to_features
from utils.splits import set_two_splits
from args import args
vars(args)

{'workdir': PosixPath('../data/work_dir/bert'),
 'dataset_csv': PosixPath('../data/processed_dataset.csv'),
 'bert_model': 'bert-base-mimic-cased',
 'bert_dir': PosixPath('../pretrained/pytorch-bert/bert-base-mimic-cased'),
 'max_seq_len': 512,
 'do_lower_case': False,
 'bs': 128,
 'device': 'cuda:3',
 'start_seed': 127,
 'cols': ['class_label', 'note'],
 'labels': [0, 1],
 'lr': 5e-05,
 'n_epochs': 1,
 'wd': 0.1,
 'warmup_prop': 0.1,
 'schedule': 'warmup_linear'}

In [None]:
ori_df = pd.read_csv(args.dataset_csv, usecols=args.cols)
df = set_two_splits(ori_df.copy(), 'val')

In [None]:
tokenizer = BertTokenizer.from_pretrained(args.bert_dir, do_lower_case=args.do_lower_case)
train_ex = read_df(df.loc[(df['split'] == 'train')], 'note', 'class_label')
train_feats = convert_examples_to_features(train_ex, args.labels, args.max_seq_len, tokenizer)
# val_ex = read_df(df.loc[(df['split'] == 'train')], 'note', 'class_label', set_type='val')
# val_feats = convert_examples_to_features(val_ex, args.labels, args.max_seq_len, tokenizer)

In [27]:
model = BertForSequenceClassification.from_pretrained(args.bert_dir, num_labels=1)
model = model.to(args.device)

In [31]:
input_ids = torch.tensor([f.input_ids for f in train_feats], dtype=torch.long)
input_mask = torch.tensor([f.input_mask for f in train_feats], dtype=torch.long)
segment_ids = torch.tensor([f.segment_ids for f in train_feats], dtype=torch.long)
label_ids = torch.tensor([f.label_id for f in train_feats], dtype=torch.long)

train_ds = TensorDataset(input_ids, input_mask, segment_ids, label_ids)
train_dl = DataLoader(train_ds, sampler=RandomSampler(train_ds), batch_size=args.bs)
itr = iter(train_dl)

In [52]:
iids, masks, sids, labels = next(itr)
iids.shape, masks.shape, sids.shape, labels.shape

(torch.Size([128, 512]),
 torch.Size([128, 512]),
 torch.Size([128, 512]),
 torch.Size([128]))

In [101]:
from typing import List, Tuple

In [110]:
List[Tuple[int, torch.nn.parameter.Parameter]]

typing.List[typing.Tuple[int, torch.nn.parameter.Parameter]]

In [109]:
type(param_optim[0][1])

torch.nn.parameter.Parameter

In [117]:
def build_optimizer(named_params: List[Tuple[int, torch.nn.parameter.Parameter]],
                    n_steps: int, lr: float, warmup_prop: float, wd: float, schedule='warmup_linear'): 
  no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
  grouped_params = [
    {'params': [p for n, p in param_optim if not any(nd in n for nd in no_decay)], 'weight_decay': wd},
    {'params': [p for n, p in param_optim if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
  ]
  
  return BertAdam(grouped_params, lr=lr, warmup=warmup_prop, t_total=n_steps,
                  schedule=schedule, weight_decay=wd)

In [100]:
optimizer = BertAdam(optim_grouped_params, lr=args.lr, schedule=args.schedule, warmup=args.warmup_prop,\
                     t_total=t_total)

In [118]:
n_steps = (len(train_ds)//args.bs) * args.n_epochs

In [119]:
optimizer = build_optimizer(list(model.named_parameters()), n_steps, args.lr,
                            args.warmup_prop, args.wd, args.schedule)