## Note:
this is a workbook notebook for testing the baseline, lstm and cnn model... the final notebook will have much more examples and will have visualization on how the data looks

In [1]:
%matplotlib inline
# General imports
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
import random
from collections import Counter, defaultdict
from operator import itemgetter
import matplotlib.pyplot as plt


#keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Input, MaxPooling1D, Convolution1D, Embedding
from keras.layers.merge import Concatenate
from keras.models import load_model
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

# Custom functions
%load_ext autoreload
%autoreload 2
import database_selection
import vectorization
import helpers
import icd9_cnn_model
import lstm_model


Using TensorFlow backend.


In [2]:
#reading file
full_df = pd.read_csv('../data/disch_notes_all_icd9.csv',
                 names = ['HADM_ID', 'SUBJECT_ID', 'DATE', 'ICD9','TEXT'])


In [3]:
print 'full shape: ', full_df.shape
# taking just a subset of the records for developing models
df = full_df.sample(frac=0.1).reset_index(drop=True)
#df = full_df
print 'shapeto process: ', df.shape

full shape:  (52696, 5)
shapeto process:  (5270, 5)


## Pre processing ICD 9 codes

In [4]:
# instead of finding out the top 20 leave icd-9 codes and filter records based on that
# we will use all records and replace the leave icd-9 codes with its grandparents code in the first level of the hierarchy


In [5]:
ICD9_FIRST_LEVEL = [
    '001-139','140-239','240-279','290-319', '320-389', '390-459','460-519', '520-579', '580-629', 
    '630-679', '680-709','710-739', '760-779', '780-789', '790-796', '797', '798', '799', '800-999' ]
N_TOP = len(ICD9_FIRST_LEVEL)
# replacing leave ICD9 codes with the grandparents
df['ICD9'] = df['ICD9'].apply(lambda x: helpers.replace_with_grandparent_codes(x,ICD9_FIRST_LEVEL))


In [6]:
#counts by icd9_codes
icd9_codes = Counter()
for label in df['ICD9']:
    for icd9_code in label.split():
        icd9_codes[icd9_code] += 1
number_icd9_first_level = len (icd9_codes)
print icd9_codes
print 'Number of icd9 codes in the first level that have notes in the dataset:', number_icd9_first_level

Counter({'390-459': 4144, '240-279': 3441, '290-319': 2825, '460-519': 2460, '800-999': 2193, '580-629': 2127, '520-579': 2033, '780-789': 1597, '320-389': 1507, '001-139': 1443, '710-739': 975, '140-239': 854, '680-709': 587, '760-779': 528, '790-796': 452, '799': 174, '630-679': 16})
Number of icd9 codes in the first level that have notes in the dataset: 17


In [7]:
#preprocess icd9 codes to vectors 
top_codes = ICD9_FIRST_LEVEL
labels = vectorization.vectorize_icd_column(df, 'ICD9', top_codes)
print 'sample of vectorized icd9 labels: ', labels[0]


sample of vectorized icd9 labels:  [0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1]


## Pre process Notes

In [8]:
#preprocess notes
MAX_VOCAB = None # to limit original number of words (None if no limit)
MAX_SEQ_LENGTH = 5000 # to limit length of word sequence (None if no limit)
df.TEXT = vectorization.clean_notes(df, 'TEXT')
data_vectorized, dictionary, MAX_VOCAB = vectorization.vectorize_notes(df.TEXT, MAX_VOCAB, verbose = True)
data, MAX_SEQ_LENGTH = vectorization.pad_notes(data_vectorized, MAX_SEQ_LENGTH)

print("Final Vocabulary: %s" % MAX_VOCAB)
print("Final Max Sequence Length: %s" % MAX_SEQ_LENGTH)

Vocabulary size: 44606
Average note length: 1630.98121442
Max note length: 7726
Final Vocabulary: 44606
Final Max Sequence Length: 5000


In [9]:
#pulling external embeddings and create embedding matrix
EMBEDDING_DIM = 100 # given the glove that we chose
EMBEDDING_MATRIX= []
EMBEDDING_LOC = '../data/notes.100.txt' # location of embedding
EMBEDDING_MATRIX, embedding_dict = vectorization.embedding_matrix(EMBEDDING_LOC,
                                                                  dictionary, EMBEDDING_DIM, verbose = True, sigma=True)


('Vocabulary in notes:', 44606)
('Vocabulary in original embedding:', 21056)
('Vocabulary intersection:', 19924)


## Split Files

In [10]:
#split sets
X_train, X_val, X_test, y_train, y_val, y_test = helpers.train_val_test_split(
    data, labels, val_size=0.2, test_size=0.1, random_state=101)
print("Train: ", X_train.shape, y_train.shape)
print("Validation: ", X_val.shape, y_val.shape)
print("Test: ", X_test.shape, y_test.shape)

('Train: ', (3688, 5000), (3688, 19))
('Validation: ', (1054, 5000), (1054, 19))
('Test: ', (528, 5000), (528, 19))


In [11]:
# Delete temporary variables to free some memory
del df, data, labels

## CNN and attention

In [12]:
import icd9_cnn_att

In [14]:
reload(icd9_cnn_att)
#### build model
cnn_att_model = icd9_cnn_att.build_icd9_cnn_model (input_seq_length=MAX_SEQ_LENGTH, max_vocab = MAX_VOCAB,
                             external_embeddings = True,
                             embedding_dim=EMBEDDING_DIM,embedding_matrix=EMBEDDING_MATRIX,
                             num_filters = 100, filter_sizes=[2,3,4,5],
                             training_dropout=0.8,
                             num_classes=N_TOP )

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_2 (InputLayer)             (None, 5000)          0                                            
____________________________________________________________________________________________________
embedding_2 (Embedding)          (None, 5000, 100)     4460700     input_2[0][0]                    
____________________________________________________________________________________________________
conv1d_5 (Conv1D)                (None, 4999, 100)     20100       embedding_2[0][0]                
____________________________________________________________________________________________________
conv1d_6 (Conv1D)                (None, 4998, 100)     30100       embedding_2[0][0]                
___________________________________________________________________________________________

In [None]:
### 0.8 drop oout a learning rate 0.0007

In [15]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=15, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f88a7382c50>

In [16]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.533      0.530
0.030:      0.552      0.548
0.040:      0.568      0.564
0.050:      0.583      0.577
0.055:      0.590      0.583
0.058:      0.594      0.585
0.060:      0.596      0.588
0.080:      0.620      0.607
0.100:      0.640      0.624
0.200:      0.714      0.678
0.300:      0.759      0.705
0.400:      0.775      0.713
0.500:      0.764      0.699
0.600:      0.728      0.662
0.700:      0.664      0.603


### 0.8 dropout default learning rate

In [20]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=12, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f754066f890>

In [21]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.543      0.543
0.030:      0.562      0.559
0.040:      0.577      0.572
0.050:      0.591      0.585
0.055:      0.596      0.592
0.058:      0.600      0.595
0.060:      0.602      0.596
0.080:      0.619      0.610
0.100:      0.634      0.624
0.200:      0.698      0.674
0.300:      0.745      0.705
0.400:      0.770      0.720
0.500:      0.768      0.711
0.600:      0.737      0.687
0.700:      0.685      0.637


### learning rate lower
0.0007

In [16]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=10, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f759283a050>

In [17]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.530      0.529
0.030:      0.550      0.548
0.040:      0.567      0.563
0.050:      0.582      0.577
0.055:      0.588      0.582
0.058:      0.592      0.586
0.060:      0.594      0.588
0.080:      0.614      0.608
0.100:      0.632      0.625
0.200:      0.695      0.677
0.300:      0.742      0.709
0.400:      0.766      0.723
0.500:      0.764      0.709
0.600:      0.732      0.681
0.700:      0.671      0.622


```
Train on 3688 samples, validate on 1054 samples
Epoch 11/20
3688/3688 [==============================] - 133s - loss: 0.3333 - acc: 0.8688 - val_loss: 0.3678 - val_acc: 0.8487
Epoch 12/20
3688/3688 [==============================] - 134s - loss: 0.3264 - acc: 0.8721 - val_loss: 0.3734 - val_acc: 0.8465
Epoch 13/20
3688/3688 [==============================] - 133s - loss: 0.3205 - acc: 0.8754 - val_loss: 0.3766 - val_acc: 0.8440
Epoch 14/20
3688/3688 [==============================] - 134s - loss: 0.3129 - acc: 0.8810 - val_loss: 0.3795 - val_acc: 0.8462
Epoch 15/20
3688/3688 [==============================] - 133s - loss: 0.3063 - acc: 0.8840 - val_loss: 0.3864 - val_acc: 0.8429
Epoch 16/20
2950/3688 [======================>.......] - ETA: 24s - loss: 0.3001 - acc: 0.8872 
```

### not really 0.8 dropout

In [25]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=10, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb9fc78e1d0>

In [26]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.543      0.531
0.030:      0.563      0.549
0.040:      0.579      0.564
0.050:      0.594      0.579
0.055:      0.601      0.586
0.058:      0.606      0.589
0.060:      0.608      0.592
0.080:      0.632      0.611
0.100:      0.653      0.629
0.200:      0.732      0.689
0.300:      0.781      0.714
0.400:      0.800      0.724
0.500:      0.800      0.716
0.600:      0.778      0.700
0.700:      0.733      0.660


### 0.7 dropout at the end

In [22]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=20, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fb9fc780ed0>

In [23]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.617      0.578
0.030:      0.646      0.598
0.040:      0.671      0.613
0.050:      0.692      0.624
0.055:      0.702      0.629
0.058:      0.707      0.632
0.060:      0.711      0.634
0.080:      0.743      0.649
0.100:      0.768      0.661
0.200:      0.845      0.693
0.300:      0.880      0.703
0.400:      0.896      0.697
0.500:      0.895      0.687
0.600:      0.881      0.672
0.700:      0.848      0.646


## 0.7 dropout at the end

In [16]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=5, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fba05735a10>

In [17]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.524      0.514
0.030:      0.543      0.534
0.040:      0.561      0.552
0.050:      0.574      0.566
0.055:      0.581      0.572
0.058:      0.584      0.576
0.060:      0.586      0.578
0.080:      0.607      0.597
0.100:      0.626      0.614
0.200:      0.692      0.676
0.300:      0.728      0.704
0.400:      0.738      0.716
0.500:      0.718      0.696
0.600:      0.674      0.656
0.700:      0.595      0.573


In [18]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=3, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fba05249250>

In [19]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.528      0.516
0.030:      0.547      0.535
0.040:      0.563      0.552
0.050:      0.577      0.564
0.055:      0.584      0.571
0.058:      0.588      0.573
0.060:      0.590      0.576
0.080:      0.611      0.598
0.100:      0.630      0.613
0.200:      0.704      0.675
0.300:      0.752      0.711
0.400:      0.772      0.724
0.500:      0.759      0.713
0.600:      0.724      0.679
0.700:      0.655      0.613


### regularizations 0.0001 and a dropout after timedistributed
and adding one at the output layer

In [24]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=5, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f3e7d6b4850>

In [25]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.513      0.506
0.030:      0.529      0.521
0.040:      0.544      0.534
0.050:      0.559      0.548
0.055:      0.565      0.554
0.058:      0.569      0.558
0.060:      0.571      0.560
0.080:      0.594      0.582
0.100:      0.613      0.601
0.200:      0.675      0.657
0.300:      0.710      0.691
0.400:      0.737      0.717
0.500:      0.737      0.710
0.600:      0.704      0.684
0.700:      0.645      0.627


In [26]:
cnn_att_model.save('models/cnn_att_5_epochs.h5')

In [27]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=5, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f3a8c3b26d0>

In [28]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.552      0.537
0.030:      0.573      0.557
0.040:      0.592      0.572
0.050:      0.607      0.586
0.055:      0.614      0.591
0.058:      0.618      0.595
0.060:      0.621      0.598
0.080:      0.646      0.617
0.100:      0.666      0.636
0.200:      0.742      0.696
0.300:      0.782      0.721
0.400:      0.792      0.724
0.500:      0.781      0.710
0.600:      0.752      0.685
0.700:      0.708      0.654


In [29]:
cnn_att_model.save('models/cnn_att_10_epochs.h5')

In [30]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=2, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f3e6820d4d0>

In [21]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.579      0.555
0.030:      0.604      0.575
0.040:      0.627      0.591
0.050:      0.646      0.604
0.055:      0.654      0.611
0.058:      0.659      0.616
0.060:      0.663      0.618
0.080:      0.691      0.636
0.100:      0.715      0.652
0.200:      0.791      0.694
0.300:      0.829      0.710
0.400:      0.844      0.703
0.500:      0.839      0.685
0.600:      0.813      0.654
0.700:      0.766      0.614


In [22]:
cnn_att_model.save('models/cnn_att_12_epochs.h5')

### regularization 0.001

In [24]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=5, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f528868b950>

In [25]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=3, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f505d7f0110>

In [26]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.531      0.523
0.030:      0.549      0.541
0.040:      0.566      0.557
0.050:      0.582      0.571
0.055:      0.589      0.579
0.058:      0.594      0.583
0.060:      0.596      0.585
0.080:      0.620      0.607
0.100:      0.639      0.624
0.200:      0.702      0.679
0.300:      0.740      0.714
0.400:      0.750      0.723
0.500:      0.735      0.704
0.600:      0.690      0.665
0.700:      0.628      0.607


In [27]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=1, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/1


<keras.callbacks.History at 0x7f528868b850>

In [28]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.535      0.526
0.030:      0.552      0.542
0.040:      0.567      0.556
0.050:      0.581      0.570
0.055:      0.588      0.576
0.058:      0.592      0.580
0.060:      0.595      0.583
0.080:      0.621      0.606
0.100:      0.642      0.624
0.200:      0.712      0.685
0.300:      0.753      0.717
0.400:      0.760      0.719
0.500:      0.744      0.707
0.600:      0.703      0.667
0.700:      0.638      0.608


### first adding regularization to first dense

In [17]:
# 5 epochs
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=5, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f52a3b99150>

In [18]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=2, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f52a1959a90>

In [19]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.533      0.526
0.030:      0.554      0.546
0.040:      0.572      0.564
0.050:      0.587      0.577
0.055:      0.594      0.583
0.058:      0.598      0.586
0.060:      0.600      0.589
0.080:      0.622      0.609
0.100:      0.641      0.629
0.200:      0.705      0.680
0.300:      0.742      0.707
0.400:      0.746      0.718
0.500:      0.719      0.690
0.600:      0.669      0.651
0.700:      0.606      0.596


In [20]:
cnn_att_model.save('models/cnn_att_7_epochs.h5')

In [21]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=1, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/1


<keras.callbacks.History at 0x7f52aaa4cf10>

In [22]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.536      0.528
0.030:      0.554      0.545
0.040:      0.570      0.560
0.050:      0.584      0.573
0.055:      0.590      0.579
0.058:      0.594      0.582
0.060:      0.597      0.584
0.080:      0.621      0.606
0.100:      0.641      0.623
0.200:      0.715      0.683
0.300:      0.757      0.719
0.400:      0.760      0.720
0.500:      0.735      0.693
0.600:      0.692      0.655
0.700:      0.624      0.594


### Previous results without l2 regularization within attention layer

In [None]:
cnn_att_model.save('models/cnn_att_10_epochs.h5')

## previous run
no regularizations, it overfits after 5 epochs

In [52]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.530      0.526
0.030:      0.545      0.540
0.040:      0.560      0.555
0.050:      0.574      0.569
0.055:      0.581      0.577
0.058:      0.585      0.580
0.060:      0.587      0.582
0.080:      0.608      0.603
0.100:      0.625      0.620
0.200:      0.689      0.675
0.300:      0.734      0.717
0.400:      0.753      0.728
0.500:      0.741      0.717
0.600:      0.705      0.683
0.700:      0.645      0.618


In [53]:
cnn_att_model.save('models/cnn_att_5_epochs_5k.h5')

In [54]:
# 5 more epochs
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=5, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f721039df90>

In [55]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.585      0.566
0.030:      0.609      0.587
0.040:      0.629      0.604
0.050:      0.646      0.617
0.055:      0.655      0.621
0.058:      0.659      0.625
0.060:      0.662      0.627
0.080:      0.687      0.643
0.100:      0.709      0.656
0.200:      0.788      0.704
0.300:      0.830      0.727
0.400:      0.845      0.729
0.500:      0.839      0.720
0.600:      0.813      0.695
0.700:      0.763      0.651


In [56]:
cnn_att_model.save('models/cnn_att_10_epochs_5k.h5')

In [57]:
cnn_att_model.fit(X_train, y_train, batch_size=50, epochs=5, validation_data=(X_val, y_val), verbose=1)

Train on 3688 samples, validate on 1054 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f6eb1b74e90>

### Note: overfitting, the model overtis 5k records

In [58]:
pred_train = cnn_att_model.predict(X_train, batch_size=100)
pred_dev = cnn_att_model.predict(X_val, batch_size=100)
# perform evaluation
helpers.show_f1_score(y_train, pred_train, y_val, pred_dev)

F1 scores
threshold | training | dev  
0.020:      0.668      0.613
0.030:      0.697      0.630
0.040:      0.719      0.645
0.050:      0.738      0.654
0.055:      0.747      0.660
0.058:      0.751      0.663
0.060:      0.754      0.664
0.080:      0.779      0.676
0.100:      0.801      0.686
0.200:      0.865      0.713
0.300:      0.898      0.720
0.400:      0.913      0.719
0.500:      0.913      0.714
0.600:      0.903      0.696
0.700:      0.876      0.671


In [59]:
cnn_att_model.save('models/cnn_att_15_epochs_5k.h5')