## Data Reformatting (brat to ConLL)

In [1]:
# from google.colab import drive

# drive.mount('/content/drive')

In [2]:
path = '/scratch/js12684/final/'

In [3]:
#retrieve paths for text and ann data

import glob
notes_orig =glob.glob(path + 'data/*.txt')
ann_orig = glob.glob(path + 'data/*.ann' )

print(len(notes_orig))
print(len(ann_orig))

400
400


In [4]:
!mkdir '/scratch/js12684/final/ann_ref_disp'

mkdir: cannot create directory '/scratch/js12684/final/ann_ref_disp': File exists


In [5]:
#reformat BRAT annotation to CoNLL for drug mentions:

for file in ann_orig:
    with open(file, 'r') as fin:
        outfile = path + 'ann_ref_disp/' + file.split('/')[-1]
        with open(outfile, 'w') as fileout:
            for line in fin:
                if line.startswith('T'):
                    # newline = line.replace('NoDisposition', 'Drug').replace('Disposition', 'Drug').replace('Temporality', 'Drug').replace('Undetermined', "Drug")
                    fileout.write(line)

In [6]:
ann_disp = glob.glob(path + 'ann_ref_disp/*.ann')
print(len(ann_disp))

400


In [7]:
#convert reformatted ann file into tab separated ConLL format
import numpy as np
from urllib.parse import ParseResult
from urllib.parse import ParseResultBytes

outfile = path + '/output_disp.txt'
with open(outfile, 'w') as output_file:
  out_list = []
  for ann_file in ann_disp:
    with open(ann_file, 'r') as ann_in:
      txt_file = path + f'data/{ann_file[-10:-4]}.txt'
      with open(txt_file, 'r') as txt_in:
        start_end = []
        count = 0
        disp = {}
        
        #reading each line of .ann file and extracting start and end indices 
        for line in ann_in:
            entry = line.split()
            disp[count] = entry[1]
            count += 1
            start_end.append([int(entry[2]), int(entry[3])])
            
        #reading each line of .txt file and extracting drug names using list of indices
        text_tokens = txt_in.read().split()
        drug_names = []
        for indicies in start_end:
          start = indicies[0]
          end = indicies[1]

          with open(txt_file, 'r') as txt_in:
            drug_names.append(txt_in.read()[indicies[0]:indicies[1]])
        
        #appending "O" or Disp label to each token in list of txt tokens

        for token in text_tokens:
          token_alnum = ''.join(ch for ch in token if ch.isalnum())
          if token_alnum in drug_names:
            idx = drug_names.index(token_alnum)
            if token == text_tokens[0]:
              out_list.append(f'{ann_file[-10:-4]}\t')
            else:
              out_list.append(f'{np.nan}\t')
            out_list.append(f'{token}\t{disp[idx]}\n')
          else:
            if token == text_tokens[0]:
              out_list.append(f'{ann_file[-10:-4]}\t')
            else:
              out_list.append(f'{np.nan}\t')
            out_list.append(f'{token}\tO\n')
        out_list.append('\n')
  out_string = ''.join(out_list)
  output_file.write(out_string)

## Data Processing for SimpleTransformers

In [8]:
#read in ConLL annotation file as Csv
import pandas as pd

data = pd.read_csv(path + 'output_disp.txt', sep='\t', lineterminator='\n', names = ['Note', 'Token', 'Label'])

#fill missing note numbers
data = data.fillna(method = 'ffill')

In [9]:
token_count = []
for note in data['Note'].unique():
  df = data.loc[data['Note']== note]
  count = 0
  for token in df['Token']:
    token_count.append(count)
    count += 1

data['Token Count'] = token_count
data.sort_values(by = ['Note', 'Token Count'], inplace=True)
data.reset_index(drop = True, inplace = True)

In [10]:
#encode note number
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
note_en = le.fit_transform(data['Note'])

data['Note'] = note_en

In [11]:
#Convert column names to be compatible with model
data.rename(columns = {'Note':'sentence_id', 'Token':'words', 'Label':'labels'}, inplace=True)

In [12]:
#train test split
from sklearn.model_selection import train_test_split

# X = data[['sentence_id','words']]
# y = data['labels'] 

train, test = train_test_split(data, test_size = 0.2, shuffle=False, random_state = 42 )
train, val = train_test_split(train, test_size = 0.125, shuffle=False, random_state = 42 )

print(train.shape)
print(val.shape)
print(test.shape)

(171901, 4)
(24558, 4)
(49115, 4)


In [13]:
#building up train data and test data
train_data = pd.DataFrame({"sentence_id":train["sentence_id"],"words":train["words"],"labels":train['labels']})
val_data = pd.DataFrame({"sentence_id":val["sentence_id"],"words":val["words"],"labels":val['labels']})
test_data = pd.DataFrame({"sentence_id":test["sentence_id"],"words":test["words"],"labels":test['labels']})

In [14]:
label = data["labels"].unique().tolist()
label

['O', 'NoDisposition', 'Disposition', 'Undetermined']

In [15]:
train_data

Unnamed: 0,sentence_id,words,labels
0,0,Record,O
1,0,date:,O
2,0,2106-02-12,O
3,0,Campbell,O
4,0,Orthopedic,O
...,...,...,...
171896,276,"33.8,",O
171897,276,PLT,O
171898,276,601,O
171899,276,(H),O


## Model Setup

In [16]:
#label counts in train data
train_data['labels'].value_counts()

O                167754
NoDisposition      3043
Disposition         850
Undetermined        254
Name: labels, dtype: int64

In [17]:
#calculate weights for class imbalance
n_samples = len(train_data)
n_classes = 4
freq_O = train_data['labels'].value_counts()[0]
freq_nodisp = train_data['labels'].value_counts()[1]
freq_disp = train_data['labels'].value_counts()[2]
freq_und = train_data['labels'].value_counts()[3]

freq_list = [freq_O, freq_nodisp, freq_disp, freq_und]

weights = [n_samples/(n_classes * freq) for freq in freq_list]
print(weights)

[0.2561801805024023, 14.122658560630956, 50.55911764705883, 169.1938976377953]


## clinical BERT tune epochs

In [18]:
from simpletransformers.ner import NERModel,NERArgs

In [24]:
def clinicalbert(epochs):
    # model parameters
    args = NERArgs()
    args.learning_rate = 5e-5
    args.scheduler = 'constant_schedule'
    args.num_train_epochs = epochs
    args.train_batch_size = 32
    args.eval_batch_size = 32
    # args.logging_steps = 10
    # args.evaluate_during_training = True
    # args.evaluate_during_training_verbose = True
    args.overwrite_output_dir = True
    # args.use_cached_eval_features = True
    args.manual_seed = 42
    
#     args.use_early_stopping = True
#     args.early_stopping_delta = 0.01
#     args.early_stopping_metric = "mcc"
#     args.early_stopping_metric_minimize = False
#     args.early_stopping_patience = 5
#     args.evaluate_during_training_steps = 1000
    
    
    #instantiate model
    cb_model = NERModel('bert', 'emilyalsentzer/Bio_ClinicalBERT', labels = label ,args = args ,weight = weights, use_cuda=True)
    
    #train and eval model with train + val data
    cb_model.train_model(train_data, eval_data = val_data)
    
    #test model on test data
    cb_result, cb_model_outputs, cb_preds_list = cb_model.eval_model(test_data)
    
    print(cb_result)

In [25]:
clinicalbert(5)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint 

  0%|          | 0/1 [00:00<?, ?it/s]



Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/9 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.44227076570192975, 'precision': 0.5238095238095238, 'recall': 0.717391304347826, 'f1_score': 0.6055045871559633}




In [30]:
clinicalbert(13)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint 

  0%|          | 0/1 [00:00<?, ?it/s]



Epoch:   0%|          | 0/13 [00:00<?, ?it/s]

Running Epoch 0 of 13:   0%|          | 0/9 [00:00<?, ?it/s]



Running Epoch 1 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 2 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 3 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 4 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 5 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 6 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 7 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 8 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 9 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 10 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 11 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 12 of 13:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.8666353623072306, 'precision': 0.7021276595744681, 'recall': 0.717391304347826, 'f1_score': 0.7096774193548387}




In [26]:
clinicalbert(15)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint 

  0%|          | 0/1 [00:00<?, ?it/s]



Epoch:   0%|          | 0/15 [00:00<?, ?it/s]

Running Epoch 0 of 15:   0%|          | 0/9 [00:00<?, ?it/s]



Running Epoch 1 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 2 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 3 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 4 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 5 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 6 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 7 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 8 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 9 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 10 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 11 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 12 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 13 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 14 of 15:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.8745310107866923, 'precision': 0.7021276595744681, 'recall': 0.717391304347826, 'f1_score': 0.7096774193548387}




In [27]:
clinicalbert(20)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint 

  0%|          | 0/1 [00:00<?, ?it/s]



Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Running Epoch 0 of 20:   0%|          | 0/9 [00:00<?, ?it/s]



Running Epoch 1 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 2 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 3 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 4 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 5 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 6 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 7 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 8 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 9 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 10 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 11 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 12 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 13 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 14 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 15 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 16 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 17 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 18 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 19 of 20:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.93466188510259, 'precision': 0.7111111111111111, 'recall': 0.6956521739130435, 'f1_score': 0.7032967032967032}




In [28]:
clinicalbert(25)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint 

  0%|          | 0/1 [00:00<?, ?it/s]



Epoch:   0%|          | 0/25 [00:00<?, ?it/s]

Running Epoch 0 of 25:   0%|          | 0/9 [00:00<?, ?it/s]



Running Epoch 1 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 2 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 3 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 4 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 5 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 6 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 7 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 8 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 9 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 10 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 11 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 12 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 13 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 14 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 15 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 16 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 17 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 18 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 19 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 20 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 21 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 22 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 23 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 24 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.7477831045786539, 'precision': 0.7111111111111111, 'recall': 0.6956521739130435, 'f1_score': 0.7032967032967032}




## Early Stopping

In [19]:
def clinicalbert_ES(lr = 5e-5, bs = 32):
    # model parameters
    args = NERArgs()
    args.learning_rate = lr
    args.scheduler = 'constant_schedule'
    args.num_train_epochs = 25
    args.train_batch_size = bs
    args.eval_batch_size = bs
    # args.logging_steps = 10
    args.evaluate_during_training = True
#     args.evaluate_during_training_verbose = True
    args.overwrite_output_dir = True
    # args.use_cached_eval_features = True
    args.manual_seed = 42
    
    args.use_early_stopping = True
    args.early_stopping_delta = 0.01
    args.early_stopping_metric = "eval_loss"
    args.early_stopping_metric_minimize = True
    args.early_stopping_patience = 3
    args.evaluate_during_training_steps = 21157
    args.early_stopping_consider_epochs = True
    
    
    #instantiate model
    cb_model = NERModel('bert', 'emilyalsentzer/Bio_ClinicalBERT', labels = label ,args = args ,weight = weights, use_cuda=True)
    
    #train and eval model with train + val data
    cb_model.train_model(train_data, eval_data = val_data)
    
    #test model on test data
    cb_result, cb_model_outputs, cb_preds_list = cb_model.eval_model(test_data)
    
    print(cb_result)

In [20]:
clinicalbert_ES()

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint 

  0%|          | 0/1 [00:00<?, ?it/s]



Epoch:   0%|          | 0/25 [00:00<?, ?it/s]

Running Epoch 0 of 25:   0%|          | 0/9 [00:00<?, ?it/s]



  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/2 [00:00<?, ?it/s]



Running Epoch 1 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/2 [00:00<?, ?it/s]



Running Epoch 2 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/2 [00:00<?, ?it/s]



Running Epoch 3 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/2 [00:00<?, ?it/s]



Running Epoch 4 of 25:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/2 [00:00<?, ?it/s]



  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.6202791134516398, 'precision': 0.56, 'recall': 0.6086956521739131, 'f1_score': 0.5833333333333334}




## weight decay

In [21]:
def clinicalbert_wd(wd):
    # model parameters
    args = NERArgs()
    args.learning_rate = 5e-5
    args.scheduler = 'constant_schedule'
    args.num_train_epochs = 10
    args.train_batch_size = 32
    args.eval_batch_size = 32
    # args.logging_steps = 10
#     args.evaluate_during_training = True
    # args.evaluate_during_training_verbose = True
    args.overwrite_output_dir = True
    # args.use_cached_eval_features = True
    args.manual_seed = 42
    args.weight_decay = wd
    
#     args.use_early_stopping = True
#     args.early_stopping_delta = 0.01
#     args.early_stopping_metric = "eval_loss"
#     args.early_stopping_metric_minimize = False
#     args.early_stopping_patience = 5
#     args.evaluate_during_training_steps = 2117
    
    
    #instantiate model
    cb_model = NERModel('bert', 'emilyalsentzer/Bio_ClinicalBERT', labels = label ,args = args ,weight = weights, use_cuda=True)
    
    #train and eval model with train + val data
    cb_model.train_model(train_data, eval_data = val_data)
    
    #test model on test data
    cb_result, cb_model_outputs, cb_preds_list = cb_model.eval_model(test_data)
    
    print(cb_result)

In [22]:
clinicalbert_wd(0.1)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint 

  0%|          | 0/1 [00:00<?, ?it/s]



Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/9 [00:00<?, ?it/s]



Running Epoch 1 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.6373378932476044, 'precision': 0.64, 'recall': 0.6956521739130435, 'f1_score': 0.6666666666666666}




In [23]:
clinicalbert_wd(0.01)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint 

  0%|          | 0/1 [00:00<?, ?it/s]



Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/9 [00:00<?, ?it/s]



Running Epoch 1 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.6755445102850596, 'precision': 0.7021276595744681, 'recall': 0.717391304347826, 'f1_score': 0.7096774193548387}




In [24]:
clinicalbert_wd(0.001)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint 

  0%|          | 0/1 [00:00<?, ?it/s]



Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/9 [00:00<?, ?it/s]



Running Epoch 1 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/3 [00:00<?, ?it/s]

{'eval_loss': 0.6468777855237325, 'precision': 0.64, 'recall': 0.6956521739130435, 'f1_score': 0.6666666666666666}


