In [1]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import heapq as h

import cv2
import time

from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model, load_model, Sequential
from keras.layers import Dropout, Flatten, Dense, Input, Embedding, LSTM, Dropout, add, Conv2D, AveragePooling2D, GlobalMaxPooling1D, concatenate
from keras.layers import GRU, Flatten, Bidirectional, TimeDistributed, Concatenate, GlobalAveragePooling2D, RepeatVector, GlobalAveragePooling1D, BatchNormalization

from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

from keras import backend as K
from keras.engine.topology import Layer
from keras.engine import InputSpec
from keras import initializers, regularizers, constraints
from keras.optimizers import Adam

import pandas as pd
import numpy as np
from numpy import array
from nltk.tokenize import RegexpTokenizer
import tensorflow as tf
from sklearn.model_selection import train_test_split

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

start = "\033[1m"; end = "\033[0;0m"

from nltk.translate.bleu_score import sentence_bleu, corpus_bleu

In [2]:
tf.config.run_functions_eagerly(True)

In [3]:
start = "\033[1m"; end = "\033[0;0m"

In [4]:
df = pd.read_csv('padataset.csv')

In [5]:
## Sentence and word tokenizer
s_tokenizer = RegexpTokenizer(r'[^?!]+')
w_tokenizer = RegexpTokenizer('\w+|\$[\d\.]+|\S+')

In [6]:
df.shape

(3259, 9)

In [7]:
df.head(16).tail(3)

Unnamed: 0,ID,Comparison,Indication,Findings,Impression,Mesh,Image,View,Sex
13,CXR101,,chest pain,the heart is again mildly enlarged. mediastina...,1. mild stable cardiomegaly and central vascul...,"cardiomegaly_mild,technical quality of image u...",CXR101_IM-0011-4004.png,0.0,
14,CXR1011,,chronic,the heart is top normal in size. the mediastin...,no acute disease,normal,CXR1011_IM-0013-1001.png,0.0,
15,CXR1013,,chest pain,"stable mild cardiomegaly. no pneumothorax, ple...",stable mild cardiomegaly without acute cardiop...,"cardiomegaly_mild,implanted medical device_hum...",CXR1013_IM-0013-1001.png,0.0,female


In [8]:
df['Indication_Sex'] = df['Indication'] +' '+ df['Sex']

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3259 entries, 0 to 3258
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   ID              3259 non-null   object 
 1   Comparison      3259 non-null   object 
 2   Indication      3259 non-null   object 
 3   Findings        3259 non-null   object 
 4   Impression      3259 non-null   object 
 5   Mesh            3259 non-null   object 
 6   Image           3259 non-null   object 
 7   View            3259 non-null   float64
 8   Sex             3259 non-null   object 
 9   Indication_Sex  3259 non-null   object 
dtypes: float64(1), object(9)
memory usage: 254.7+ KB


In [11]:
df.to_csv('final_features.csv',index=None)

### Tokenizing Features and Labels

In [10]:
from nltk.tokenize import sent_tokenize,word_tokenize

In [11]:
findings_sent = list()
findings_word = list()
indication_sent = list()
indication_word = list()
comparison_sent = list()
comparison_word = list()

In [12]:
df.columns

Index(['ID', 'Comparison', 'Indication', 'Findings', 'Impression', 'Mesh',
       'Image', 'View', 'Sex', 'Indication_Sex'],
      dtype='object')

In [13]:
image_names = list()
image_id = list()

In [14]:
def sent_word_tokenize(df,col,wordcount):
    sent = list()
    words = list()
    temp = list()
    

    for row in range(0,df.shape[0]):
        
        temp = list(map(str.strip, s_tokenizer.tokenize(df[col].iloc[row])))
        temp = ' '.join(temp[0].split()[:wordcount])   ### taking first 75 words
        temp = ['<start> ' + temp + ' <end>']# for temp in temp]
        sent.append(temp)

        temp = list(map(str.strip, w_tokenizer.tokenize(df[col].iloc[row])))
        words.append(temp)
        
    return sent,words

In [15]:
def getImage(df):
    image_name = list()
    image_id = list()
    for row in range(0,df.shape[0]):
        ## Image names & ID
        image_name.append(df.Image.iloc[row])
        image_id.append(df.ID.iloc[row])
    return image_name,image_id

In [16]:
findings_sent,findings_word = sent_word_tokenize(df,'Findings',75)
indication_sent,indication_word = sent_word_tokenize(df,'Indication_Sex',75)
comparison_sent,comparison_word = sent_word_tokenize(df,'Comparison',75)

In [17]:
image_names,image_id = getImage(df)

In [18]:
row = 15

In [19]:
image_names[row]

'CXR1013_IM-0013-1001.png'

In [20]:
image_id[row]

'CXR1013'

In [21]:
indication_sent[row]

['<start> chest pain female <end>']

In [22]:
indication_word[row]

['chest', 'pain', 'female']

In [23]:
comparison_sent[row]

['<start>  <end>']

In [24]:
comparison_word[row]

[]

In [25]:
findings_sent[row]

['<start> stable mild cardiomegaly. no pneumothorax, pleural effusion, or focal airspace disease. bony structures intact. right humeral head bone anchor <end>']

In [26]:
findings_word[row]

['stable',
 'mild',
 'cardiomegaly',
 '.',
 'no',
 'pneumothorax',
 ',',
 'pleural',
 'effusion',
 ',',
 'or',
 'focal',
 'airspace',
 'disease',
 '.',
 'bony',
 'structures',
 'intact',
 '.',
 'right',
 'humeral',
 'head',
 'bone',
 'anchor']

In [27]:
## Determining the average length of words
def avglen(input):
    lengths = [len(i) for i in input]
    sum = 0; j = 0
    for i in range(0,len(lengths)):
        if lengths[i] > 0:
            sum += lengths[i]
            j+=1

    return (round(sum/j))

## Determining the maximum length of words
def maxlen(input):
    lengths = [len(i) for i in input]
    sum = 0; j = 0; max = 5
    for i in range(0,len(lengths)):
        if lengths[i] > max:
            max = lengths[i]
            sum+=1
    return max

In [28]:
print ("Average length of Finding word    : "+str(avglen(findings_sent)))
print ("Max length of Finding word        : "+str(maxlen(findings_word)))
print ("Average length of Indication : "+str(avglen(indication_word)))
print ("Max length of Indication word        : "+str(maxlen(indication_word)))
print ("Average length of Comparison : "+str(avglen(comparison_word)))
print ("Max length of Comparison word        : "+str(maxlen(comparison_word)))

Average length of Finding word    : 1
Max length of Finding word        : 185
Average length of Indication : 4
Max length of Indication word        : 41
Average length of Comparison : 4
Max length of Comparison word        : 52


In [29]:
## Defining constants for maximum length
MAX_SENT_LENGTH = 75
MAX_SENT = 1
MAX_WORDS_INDICATION = 5
MAX_WORDS_COMPARISON = 7

In [30]:
# Creating 2 tokenizers - One for Findings vocabulary & other for Indication/Comparison vocabulary
tk_find = Tokenizer(oov_token="<unk>", filters='!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ')
tk_indcomp = Tokenizer(oov_token="<unk>", filters='!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ')

In [31]:
## Word collection
findings_texts = [' '.join(x) for x in findings_sent]
indication_texts = [' '.join(x) for x in indication_word]
comparison_texts = [' '.join(x) for x in comparison_word]

print(findings_texts[:2])
print(indication_texts[:2])
print(comparison_texts[:2])

['<start> the cardiac silhouette and mediastinum size are within normal limits. there is no pulmonary edema. there is no focal consolidation. there are no of a pleural effusion. there is no evidence of pneumothorax <end>', '<start> the cardiomediastinal silhouette is within normal limits for size and contour. the lungs are normally inflated without evidence of focal airspace disease, pleural effusion, or pneumothorax. stable calcified granuloma within the right upper lung. no acute bone abnormality <end>']
['positive tb test', 'chest pain male']
['', 'chest radiograph']


In [32]:
## Two tokenizers - one for findings & other for indication/comparison
tk_find.fit_on_texts(findings_texts)
tk_find.word_index['<pad>'] = 0

tk_indcomp.fit_on_texts(indication_texts+comparison_texts)
tk_indcomp.word_index['<pad>'] = 0

In [50]:
for item in tk_indcomp.word_index:
    print(item)

<unk>
chest
pain
of
breath
radiograph
female
male
and
shortness
for
history
view
two
dyspnea
the
preop
a
left
right
cancer
evaluation
is
patient
surgery
ct
no
on
x
to
2
status
transplant
in
ppd
comparison
positive
preoperative
hypertension
pneumonia
copd
post
weeks
back
lung
asthma
dated
syncope
productive
one
786
chronic
out
from
evaluate
rib
days
abdominal
portable
loss
pt
rule
3
vomiting
disease
wheezing
ago
tb
knee
weakness
bone
sob
pre
sidedchest
smoking
hx
arm
shoulder
followup
dizziness
tuberculosis
marrow
o
heart
hypoxia
hemoptysis
lower
1
congestion
this
months
breast
after
bronchitis
complaints
placement
op
anddyspnea
recent
sided
mental
upper
at
hip
abdomen
throat
examination
renal
kidney
's
use
carcinoma
onset
andchest
tobacco
ap
vehicle
6
symptoms
exam
possible
metastatic
pneumothorax
prostate
altered
lateral
mass
nonsmoker
x1
edema
mva
pulmonary
acute
repair
today
testicular
sarcoidosis
prior
bariatric
cp
replacement
nausea
with
ofchest
has
headache
exertion
numbness
pa
c

In [51]:
for item in tk_indcomp.index_word:
    print(item)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


In [54]:
tk_indcomp.index_word[1384]

'respectively'

In [55]:
## Total words using the word list from all features & labels
print ("Length of findings word index              : "+str(len(tk_find.word_index)))
print ("Length of indication/comparison word index : "+str(len(tk_indcomp.word_index)))

Length of findings word index              : 1500
Length of indication/comparison word index : 1385


In [56]:
## Vectorizing Findings
findings_seq = [tk_find.texts_to_sequences(x) for x in findings_sent]

# Aligning with the max sentences 
for x in findings_seq:
    while len(x) < MAX_SENT:
        x.append([0])
        
    if len(x) > MAX_SENT:
        del x[MAX_SENT:]

# Padding sequences for uniform sentence word length
findings_vector = [pad_sequences(x, padding='post', maxlen=MAX_SENT_LENGTH) for x in findings_seq]

## Changing the first word as "<start>" for findings with more than 75 words
for i in range(0, len(findings_vector)):
    if findings_vector[i][0][74] == 8 :
        findings_vector[i][0][0] = 7   ## "7" is the vector value for <start>


In [57]:
n=20
print(findings_sent[n])
print(findings_seq[n])
print(findings_vector[n])

['<start> both lungs are clear and expanded with no infiltrates. basilar focal atelectasis is present in the lingula. heart size normal. calcified right hilar are present <end>']
[[6, 100, 17, 4, 20, 9, 105, 36, 3, 118, 174, 19, 78, 5, 77, 23, 2, 340, 16, 18, 8, 53, 27, 99, 4, 77, 7]]
[[  6 100  17   4  20   9 105  36   3 118 174  19  78   5  77  23   2 340
   16  18   8  53  27  99   4  77   7   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0]]


In [58]:
## Vectorizing Comparison
comparison_seq = [tk_indcomp.texts_to_sequences(x) for x in comparison_sent]

## Taking only one sentence 
for x in comparison_seq:
    while len(x) < 1:
        x.append([0])
        
    if len(x) > 1:
        del x[1:]

In [59]:
# Padding sequences for uniform sentence word length
comparison_vector = [pad_sequences(x, padding='post', maxlen=MAX_WORDS_COMPARISON) for x in comparison_seq]
print(comparison_sent[205])
print(comparison_vector[205])

['<start>  <end>']
[[1 1 0 0 0 0 0]]


In [60]:
## Vectorizing Indication
indication_seq = [tk_indcomp.texts_to_sequences(x) for x in indication_sent]

## Taking only one sentence 
for x in indication_seq:
    while len(x) < 1:
        x.append([0])
        
    if len(x) > 1:
        del x[1:]

In [61]:
# Padding sequences for uniform sentence word length
indication_vector = [pad_sequences(x, padding='post', maxlen=MAX_WORDS_INDICATION) for x in indication_seq]
print(indication_sent[45])
print(indication_vector[45])

['<start> shortness of breath of breath male <end>']
[[5 4 5 8 1]]


### Image Vectorization Using Inception

In [62]:
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model, load_model, Sequential
from keras.preprocessing import image
import cv2
import pickle

In [63]:
imgpath = 'G:\\acra\\imagesource\\'

In [64]:
imagedf = pd.DataFrame(columns = ['Image'],data = df.Image)

In [65]:
imagedf.head(3)

Unnamed: 0,Image
0,CXR1_1_IM-0001-4001.png
1,CXR10_IM-0002-1001.png
2,CXR10_IM-0002-2001.png


In [66]:
image_train = []

for i in range(0,imagedf.shape[0]):
    img = cv2.imread(imgpath+df["Image"][i].upper())
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   
    img = cv2.resize(img,(299,299))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    image_train.append(img)

In [67]:
image_train[0]

array([[[[-0.56078434, -0.56078434, -0.56078434],
         [-0.6392157 , -0.6392157 , -0.6392157 ],
         [-0.6392157 , -0.6392157 , -0.6392157 ],
         ...,
         [-0.75686276, -0.75686276, -0.75686276],
         [-0.6862745 , -0.6862745 , -0.6862745 ],
         [-0.60784316, -0.60784316, -0.60784316]],

        [[-0.654902  , -0.654902  , -0.654902  ],
         [-0.67058825, -0.67058825, -0.67058825],
         [-0.6862745 , -0.6862745 , -0.6862745 ],
         ...,
         [-0.77254903, -0.77254903, -0.77254903],
         [-0.7176471 , -0.7176471 , -0.7176471 ],
         [-0.6392157 , -0.6392157 , -0.6392157 ]],

        [[-0.6627451 , -0.6627451 , -0.6627451 ],
         [-0.67058825, -0.67058825, -0.67058825],
         [-0.6862745 , -0.6862745 , -0.6862745 ],
         ...,
         [-0.78039217, -0.78039217, -0.78039217],
         [-0.7254902 , -0.7254902 , -0.7254902 ],
         [-0.6392157 , -0.6392157 , -0.6392157 ]],

        ...,

        [[-0.67058825, -0.67058825, -0

In [68]:
def init_inception_model():
    # Initialize InceptionV3 and load the pretrained Imagenet weights
    model = InceptionV3(include_top=False,  weights='imagenet', input_shape=(299,299,3))
    new_input = model.input
    hidden_layer = model.layers[-1].output

    return Model(new_input, hidden_layer)

In [69]:
model = init_inception_model()

In [70]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 299, 299, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 149, 149, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 149, 149, 32) 96          conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 149, 149, 32) 0           batch_normalization[0][0]        
_______________________________________________________________________________________

In [71]:
%%time
image_features = list()
for i in range(0,len(image_train)):
    image_features.append(model.predict(image_train[i]))

  "Even though the tf.config.experimental_run_functions_eagerly "


Wall time: 18min


In [72]:
image_features[0].shape

(1, 8, 8, 2048)

In [73]:
## Image vector dimensionality reduction
def model_dim_reduction():
    model = Sequential()
    model.add(Conv2D(2048,(1,1), input_shape=(8, 8, 2048), activation='relu'))
    model.add(AveragePooling2D(pool_size=(2,2)))
    model.add(Conv2D(1024,(1,1), activation='relu'))
    model.add(AveragePooling2D(pool_size=(2,2)))
    model.add(Conv2D(512,(1,1), activation='relu'))
    model.add(AveragePooling2D(pool_size=(2,2)))
    model.add(Flatten())
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [74]:
model_red = model_dim_reduction()

In [75]:
model_red.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_94 (Conv2D)           (None, 8, 8, 2048)        4196352   
_________________________________________________________________
average_pooling2d_9 (Average (None, 4, 4, 2048)        0         
_________________________________________________________________
conv2d_95 (Conv2D)           (None, 4, 4, 1024)        2098176   
_________________________________________________________________
average_pooling2d_10 (Averag (None, 2, 2, 1024)        0         
_________________________________________________________________
conv2d_96 (Conv2D)           (None, 2, 2, 512)         524800    
_________________________________________________________________
average_pooling2d_11 (Averag (None, 1, 1, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 512)               0

In [76]:
%%time
## Feature extraction - 512 features from original image
image_features_final = []
for i in range(0,len(image_features)):
    image_features_final.append(model_red.predict(image_features[i]))

Wall time: 3min 23s


In [87]:
## Features and labels shape
print ("Shape of each image   : "+str(image_features_final[0].shape))
print ("Number of images      : "+str(len(image_features_final)))
print ("Number of Indications : "+str(len(indication_vector)))
print ("Number of Comparison  : "+str(len(comparison_vector)))
print ("Number of Findings    : "+str(len(findings_vector)))

vocab_size_find = len(tk_find.word_index)
vocab_size_indcomp = len(tk_indcomp.word_index)
print ("Vocab size of find       : "+str(vocab_size_find))
print ("Vocab size of indcomp    : "+str(vocab_size_indcomp))

Shape of each image   : (1, 512)
Number of images      : 3259
Number of Indications : 3259
Number of Comparison  : 3259
Number of Findings    : 3259
Vocab size of find       : 1500
Vocab size of indcomp    : 1385


### Input Sequence For Model

In [88]:
def create_sequences(tokenizer, max_length, images, comparison, indication, findings, vocab_size):
    X1 = []
    X2 = []
    X3 = []
    X4 = []
    y = []
    # walk through each image identifier
    for a in range(0,len(images)):
        for i in range(0, len(findings[a])):
            # walk through each description for the image
            # encode the sequence
            seq = findings[a][i]
            # split one sequence into multiple X,y pairs
            for j in range(1, len(seq)):
                # split into input and output pair
                in_seq, out_seq = seq[:j], seq[j]
                # pad input sequence
                in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
                # encode output sequence
                out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
                if (len(images) == 1):
                    X1.append(images[a])
                    X2.append(comparison[a])
                    X3.append(indication[a])
                    X4.append(in_seq)
                    y.append(out_seq)
                else:
                    X1.append(images[a][0])
                    X2.append(comparison[a][0])
                    X3.append(indication[a][0])
                    X4.append(in_seq)
                    y.append(out_seq)
    return X1, X2, X3, X4, y

In [89]:
X1_image, X2_comp, X3_ind, X4_find, Y_find = create_sequences(tk_find, MAX_SENT_LENGTH, image_features_final, comparison_vector, 
                                                            indication_vector, findings_vector, vocab_size_find)

In [90]:
## Shape of sequences
print ("Shape of each image   : "+str(X1_image[0].shape))
print ("Number of images      : "+str(len(X1_image)))

print ("\nShape of Comparison   : "+str(X2_comp[0].shape))
print ("Number of Comparison    : "+str(len(X2_comp)))

print ("\nShape of Indication   : "+str(X3_ind[0].shape))
print ("Number of Indication    : "+str(len(X3_ind)))

print ("\nShape of Findings     : "+str(X4_find[0].shape))
print ("Number of Findings      : "+str(len(X4_find)))

print ("\nShape of Output sequence     : "+str(Y_find[0].shape))
print ("Number of Output sequences     : "+str(len(Y_find)))

Shape of each image   : (512,)
Number of images      : 241166

Shape of Comparison   : (7,)
Number of Comparison    : 241166

Shape of Indication   : (5,)
Number of Indication    : 241166

Shape of Findings     : (75,)
Number of Findings      : 241166

Shape of Output sequence     : (1500,)
Number of Output sequences     : 241166


### Embedding using Radglove (Radiology Embedding)

In [91]:
# load the whole embedding into memory
embeddings_index = dict()
f = open('radglove.800M.100d.txt',encoding="utf8")
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 112343 word vectors.


#### Findings Embedding

In [92]:
embedding_matrix_find = np.zeros((vocab_size_find, 100))

for word, i in tk_find.word_index.items():
    embedding_vector_find = embeddings_index.get(word)
    if embedding_vector_find is not None:
        embedding_matrix_find[i] = embedding_vector_find
        
print (embedding_matrix_find.shape)
print (len(tk_find.word_index))

(1500, 100)
1500


In [101]:
embedding_matrix_find[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [102]:
## Words not present in RADGLOVE
tot = 0
list = []
for i in range(0,len(embedding_matrix_find)):
    if ((embedding_matrix_find[i].all()==0)):
        tot+=1
        list.append(i)

for key, value in tk_find.word_index.items() :
    if value in list:
        print(key)

<start>
<end>
cannot
patient's
there's
verterbroplasty
these't
angulate
epipericardial
they're
today's
<pad>


#### Indication / Comparison Embedding

In [103]:
embedding_matrix_indcomp = np.zeros((vocab_size_indcomp, 100))

for word, i in tk_indcomp.word_index.items():
    embedding_vector_indcomp = embeddings_index.get(word)
    if embedding_vector_indcomp is not None:
        embedding_matrix_indcomp[i] = embedding_vector_indcomp
        
print (embedding_matrix_indcomp.shape)
print (len(tk_indcomp.word_index))

(1385, 100)
1385


In [105]:
embedding_matrix_indcomp[1384]

array([-3.77629995e-01, -8.00965011e-01, -8.54147017e-01, -5.62286973e-01,
       -7.39606977e-01, -3.95869985e-02, -2.31326997e-01,  6.34199977e-01,
       -3.47305000e-01,  7.44826972e-01, -3.31815988e-01, -4.60662991e-01,
       -9.02850032e-02,  5.07996976e-01, -1.10686398e+00,  5.44201016e-01,
        1.44619003e-01,  2.17151999e-01, -4.57653999e-01,  1.34213999e-01,
       -4.69660014e-02, -8.33532989e-01,  3.72435987e-01, -1.21990005e-02,
        3.79870012e-02,  7.93370008e-02,  1.12489402e+00,  3.60424995e-01,
       -5.74705005e-01,  9.26158011e-01, -2.92780008e-02,  5.32015026e-01,
       -5.08249998e-02, -7.55836010e-01,  4.97471988e-01,  7.98189998e-01,
       -9.52907979e-01, -1.30969405e+00,  2.94681996e-01,  4.00492996e-01,
        2.30508998e-01, -4.85868990e-01, -1.26350999e-01,  4.17813003e-01,
        4.80235994e-01, -4.50929999e-02, -2.74278015e-01,  1.45706996e-01,
       -5.00367999e-01, -2.21178994e-01,  1.45514995e-01,  8.86384010e-01,
       -2.65899999e-03,  

In [106]:
## Words not present in RADGLOVE
tot = 0
list = []
for i in range(0,len(embedding_matrix_indcomp)):
    if ((embedding_matrix_indcomp[i].all()==0)):
        tot+=1
        list.append(i)

for key, value in tk_indcomp.word_index.items() :
    if value in list:
        print (key)

sidedchest
anddyspnea
andchest
ofchest
ofdyspnea
leftchest
anteriorchest
paindyspnea
pleuriticchest
onsetdyspnea
midsternalchest
midlinechest
worseningdyspnea
posteriorchest
substernalchest
onsetchest
't
rightchest
compari
lowerchest
ecf
aam
hemopytosis
increasingdyspnea
bangladesh
nonprod
cought
burmese
ealier
doesn
melonoma
66yof
membranoproliferative
burshechest
ramicade
lbot
pkd
xol
sfhhc
acacerbati
chronicdyspnea
54yof
53yof
circumcision
anchest
persistentdyspnea
34yof
outsidechest
diffusechest
increaseddyspnea
hyperlidemia
exsmoker
experiencingdyspnea
emphysemia
denisities
andshortness
30yof
odyspnea
migranes
''s
immunosuppre
centralchest
50chest
occasionalchest
10lb
painchest
focalchest
roomate
hasn
mitomyopathy
painting
pdp
costochondralchest
59chest
insuffcieny
forchest
dyspneachest
fwc
bodyaches
exertionaldyspnea
somechest
vhr
occasionaldyspnea
unspecchest
sfwp
30p
fordyspnea
transp
exertionalchest
positvie
31yof
60y
009
007
hch
<pad>


### Attention

In [107]:
class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b
            
        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim

### AttentionWeightedAverage

In [108]:
class AttentionWeightedAverage(Layer):
    def __init__(self, return_attention=False, **kwargs):
        self.init = initializers.get('uniform')
        self.supports_masking = True
        self.return_attention = return_attention
        super(AttentionWeightedAverage, self).__init__(** kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(ndim=3)]
        assert len(input_shape) == 3

        self.W = self.add_weight(shape=(input_shape[2], 1),
                                 name='{}_W'.format(self.name),
                                 initializer=self.init)
        self._trainable_weights = [self.W]
        super(AttentionWeightedAverage, self).build(input_shape)

    def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / K.sum(ai, axis=1, keepdims=True)
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({'return_attention': self.return_attention })
        return config

    def get_output_shape_for(self, input_shape):
        return self.compute_output_shape(input_shape)

    def compute_output_shape(self, input_shape):
        output_len = input_shape[2]
        if self.return_attention:
            return [(input_shape[0], output_len), (input_shape[0], input_shape[1])]
        return (input_shape[0], output_len)

    def compute_mask(self, input, input_mask=None):
        if isinstance(input_mask, tuple):
            return [None] * len(input_mask)
        else:
            return None

### MODEL

In [109]:
# image feature extractor model
inputs1 = Input(shape=(512,))
image_layer = Dropout(0.1)(inputs1)
image_layer = Dense(128, activation="relu")(image_layer)

# Comparison model
inputs3 = Input(shape=(MAX_WORDS_COMPARISON,))
se3 = Embedding(vocab_size_indcomp, 100, mask_zero=True)(inputs3)
se3 = Dropout(0.1)(se3)
comparison_layer = GRU(128, recurrent_initializer='glorot_uniform')(se3)

# Indication model
inputs4 = Input(shape=(MAX_WORDS_INDICATION,))
se5 = Embedding(vocab_size_indcomp, 100, mask_zero=True)(inputs4)
se5 = Dropout(0.1)(se5)
indication_layer = GRU(128, recurrent_initializer='glorot_uniform')(se5)

# Findings model
inputs2 = Input(shape=(MAX_SENT_LENGTH,))
se1 = Embedding(vocab_size_find, 100, mask_zero=False)(inputs2)
se1 = Dropout(0.2)(se1)
se2 = Bidirectional(LSTM(512, recurrent_initializer='glorot_uniform', return_sequences=True))(se1)
#se2 = BatchNormalization()(se2)
se2 = TimeDistributed(Dense(512,activation='relu')) (se2)
avg_pool = GlobalAveragePooling1D()(se2)
max_pool = GlobalMaxPooling1D()(se2)
attn = AttentionWeightedAverage()(se2)
se2 = concatenate([attn, avg_pool, max_pool])
#se2 = concatenate([avg_pool, max_pool])
#se2 = RepeatVector(MAX_SENT_LENGTH) (se2)
#se2 = Attention(MAX_SENT_LENGTH) (se2)
#se2 = Bidirectional(GRU(512, recurrent_initializer='glorot_uniform', return_sequences=True))(se2)
findings_layer = Dropout(0.2) (se2)
findings_layer = Dense(128, activation="relu")(findings_layer)

# decoder (feed forward) model
decoder1 = add([image_layer, comparison_layer, indication_layer, findings_layer])
# decoder1 = Dense(512, activation="relu")(decoder1)
outputs = Dense(vocab_size_find, activation='softmax')(decoder1)

# merge the two input models
final_model = Model(inputs=[inputs1, inputs3, inputs4, inputs2], outputs=outputs)

In [110]:
final_model.summary()

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 75)]         0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 75, 100)      150000      input_5[0][0]                    
__________________________________________________________________________________________________
dropout_3 (Dropout)             (None, 75, 100)      0           embedding_2[0][0]                
__________________________________________________________________________________________________
bidirectional (Bidirectional)   (None, 75, 1024)     2510848     dropout_3[0][0]                  
_______________________________________________________________________________________

In [116]:
embedding_matrix_find.tolist().pop(0)
embedding_matrix_indcomp.tolist().pop(0)

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

In [117]:
embedding_matrix_find = np.array(embedding_matrix_find)
embedding_matrix_indcomp = np.array(embedding_matrix_indcomp)

In [118]:
## Freezing weights of embedding layers and compiling the model
for i in [1]:   ## Findings layer
    final_model.layers[i].set_weights([embedding_matrix_find])
    final_model.layers[i].trainable = False
    
for i in [11,12]:   ## Indication/comparison layer
    final_model.layers[i].set_weights([embedding_matrix_indcomp])
    final_model.layers[i].trainable = False

opt = Adam(0.001)
final_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"])

In [119]:
## Splitting the features and labels
X1_image_train, X1_image_test, X2_comp_train, X2_comp_test, X3_ind_train, X3_ind_test, X4_find_train, X4_find_test, Y_find_train, Y_find_test = train_test_split(X1_image, X2_comp, X3_ind, X4_find, Y_find, test_size=0.3, random_state=10)

In [120]:
# define checkpoint callback
#filepath = 'modelgru99v1-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
filepath = 'model-ep{epoch:03d}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, 
                             save_best_only=False, save_weights_only=False, mode='auto', save_freq=1)

In [121]:
history = final_model.fit([np.array(X1_image_train),np.array(X2_comp_train),np.array(X3_ind_train),np.array(X4_find_train)],np.array(Y_find_train), epochs=5, verbose=1,callbacks=[checkpoint], validation_data=([np.array(X1_image_test), np.array(X2_comp_test), np.array(X3_ind_test), np.array(X4_find_test)], np.array(Y_find_test)), batch_size=512)

  "Even though the tf.config.experimental_run_functions_eagerly "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 50/330 [===>..........................] - ETA: 1:14:45 - loss: 0.7473 - accuracy: 0.8318

KeyboardInterrupt: 

In [122]:
## Making word to idx & idx to word
ixtoword = {}
wordtoix = {}

ix = 1
for w in tk_find.word_index:
    wordtoix[w] = ix
    ixtoword[ix] = w
    ix += 1

In [123]:
tk_find.word_index['the']

2

In [161]:
def pred_search(model, cxr, comp, indication):
    in_text = '<start>'
    for i in range(MAX_SENT_LENGTH):
        sequence = [wordtoix[w] for w in in_text.split() if w in wordtoix]
        sequence = pad_sequences([sequence], maxlen=MAX_SENT_LENGTH)
        yhat = model.predict([cxr, comp, indication,sequence], verbose=0)
        res = np.argmax(yhat)
        if res == 0:
            res = np.argsort(yhat)[-2:].tolist()[0]
            res = res[-2]
            #print(res.index(h.nlargest(2,res)[-1]))
            #yhat = res.index(h.nlargest(2,res)[-1])
            #print(yhat)
            word = ixtoword[res]
        else:
            word = ixtoword[res]
        in_text += ' ' + word
        if word == '<end>':
            break
    final = in_text.split()
    final = final[1:-1]
    final = ' '.join(final)
    return final

In [159]:
## Load models
pred_models = ["G://acra/model-ep004.h5"]
#pred_models = ["G://acra/modelgru99v1-ep008-loss0.485-val_loss0.739.h5"]
pred_model_values = [x[-12:-3] for x in pred_models]   ## For column names in prediction dataframe
model_to_use = []

for i in range(0,len(pred_models)):
    model_to_use.append(load_model(pred_models[i], custom_objects={'AttentionWeightedAverage': AttentionWeightedAverage}))

In [126]:
## Getting IDs of random CXRs for prediction
ids = ["CXR1","CXR1027","CXR694","CXR1082","CXR1190","CXR3595","CXR910"]
id_index = df[df["ID"].isin(ids)].index
id_index

Int64Index([0, 30, 83, 179, 180, 2339, 2977, 3177], dtype='int64')

In [164]:
for loop in range(len(id_index)):
    ## Prediction pipeline
    #n=5  ## Give the index of the prediction CXR from above
    idx = id_index[loop]  ## Image UID to print

    ## Input features
    pred_cxr = image_features_final[idx]
    pred_indication = indication_vector[idx]
    pred_comparison = comparison_vector[idx]


    #print(pred_cxr)
    #print(pred_comparison)
    #print(pred_indication)


    predicted_sent = []

    for i in range(0,len(pred_models)):
        ## Predicted findings
        predicted_sent.append(pred_search(model_to_use[i], pred_cxr, pred_comparison, pred_indication))


    ## Printing actual vs predicted
    print (start+"Actual Findings"+end)
    actual_findings = findings_sent[idx]
    actual_findings = [w.replace('<start> ', '') for w in actual_findings]
    actual_findings = [w.replace(' <end>', '') for w in actual_findings]
    actual_findings

    for i in range(0,len(predicted_sent)):
        predicted_findings = predicted_sent[i].replace(" <unk>", "")
        print (start+"\nPredicted Findings "+str(i)+end)
        predicted_findings

    ## Calculating BLEU scores
    weights = [(1.0/1.0, ), (1.0/2.0, 1.0/2.0,), (1.0/3.0, 1.0/3.0, 1.0/3.0,), (1.0/4.0, 1.0/4.0, 1.0/4.0, 1.0/4.0)]
    scores = [[] for i in range(len(predicted_sent))]

    weights = [(1.0/1.0, ), (1.0/2.0, 1.0/2.0,), (1.0/3.0, 1.0/3.0, 1.0/3.0,), (1.0/4.0, 1.0/4.0, 1.0/4.0, 1.0/4.0)]
    scores = [[] for i in range(len(predicted_sent))]

    for i in range(0, len(predicted_sent)):
        for j in (weights):
            scores[i].append(round(sentence_bleu(actual_findings,predicted_sent[i], j),3))

    index = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"]
    pd.DataFrame(scores,columns=index,index=pred_model_values).T

  "Even though the tf.config.experimental_run_functions_eagerly "


[1mActual Findings[0;0m


['the cardiac silhouette and mediastinum size are within normal limits. there is no pulmonary edema. there is no focal consolidation. there are no of a pleural effusion. there is no evidence of pneumothorax']

[1m
Predicted Findings 0[0;0m


'the heart is normal in size the mediastinum is unremarkable the lungs are clear'

Unnamed: 0,del-ep004
BLEU1,0.198
BLEU2,0.177
BLEU3,0.147
BLEU4,0.123


  "Even though the tf.config.experimental_run_functions_eagerly "


[1mActual Findings[0;0m


['this examination is somewhat limited secondary to obscuration of the bilateral posterior costophrenic sulci on the lateral view. the cardiomediastinal silhouette is within normal limits for appearance. no focal areas of pulmonary consolidation. no pneumothorax. no large pleural effusion. the thoracic spine appears intact']

[1m
Predicted Findings 0[0;0m


'the cardiomediastinal silhouette is normal in size and contour no focal consolidation pneumothorax or pleural effusion no acute bony abnormality'

Unnamed: 0,del-ep004
BLEU1,0.288
BLEU2,0.276
BLEU3,0.248
BLEU4,0.227


  "Even though the tf.config.experimental_run_functions_eagerly "


[1mActual Findings[0;0m


['stable cardiomegaly. stable tortuosity of the aorta. no focal airspace opacities, pneumothorax or pleural effusion. mild degenerative changes of the thoracic spine']

[1m
Predicted Findings 0[0;0m


'the heart is normal in size the mediastinum is unremarkable the lungs are clear without evidence of infiltrate there is no pneumothorax or effusion'

Unnamed: 0,del-ep004
BLEU1,0.775
BLEU2,0.593
BLEU3,0.458
BLEU4,0.369


  "Even though the tf.config.experimental_run_functions_eagerly "


[1mActual Findings[0;0m


['both lungs are clear and expanded. heart and mediastinum normal']

[1m
Predicted Findings 0[0;0m


'the heart is normal in size the mediastinum is unremarkable the lungs are clear'

Unnamed: 0,del-ep004
BLEU1,0.671
BLEU2,0.587
BLEU3,0.539
BLEU4,0.503


  "Even though the tf.config.experimental_run_functions_eagerly "


[1mActual Findings[0;0m


['both lungs are clear and expanded. heart and mediastinum normal']

[1m
Predicted Findings 0[0;0m


'the heart is normal in size the mediastinum is unremarkable the lungs are clear'

Unnamed: 0,del-ep004
BLEU1,0.671
BLEU2,0.587
BLEU3,0.539
BLEU4,0.503


  "Even though the tf.config.experimental_run_functions_eagerly "


[1mActual Findings[0;0m


['stable cardiomegaly and mediastinal contour. increased interstitial lung markings are seen, possibly due to volume overload. there is improved aeration of the lung bases with small residual left basilar effusion. no focal consolidation or pneumothorax. stable tunneled dialysis catheter. visualized osseous structures appear intact']

[1m
Predicted Findings 0[0;0m


'the cardiomediastinal silhouette is normal in size and contour there is no focal airspace disease pneumothorax or pleural effusion there are no typical findings of pulmonary edema'

Unnamed: 0,del-ep004
BLEU1,0.428
BLEU2,0.382
BLEU3,0.313
BLEU4,0.263


  "Even though the tf.config.experimental_run_functions_eagerly "


[1mActual Findings[0;0m


['the cardiac silhouette and pulmonary vascularity are normal. the lungs are clear. there is no evidence of pleural effusion. postoperative changes are noted in the mediastinum and lower cervical spine']

[1m
Predicted Findings 0[0;0m


'the cardiomediastinal silhouette is normal in size and contour there is no pneumothorax or pleural effusion there is no focal air space opacity to suggest a pneumonia'

Unnamed: 0,del-ep004
BLEU1,0.775
BLEU2,0.657
BLEU3,0.547
BLEU4,0.47


  "Even though the tf.config.experimental_run_functions_eagerly "


[1mActual Findings[0;0m


['the heart size is moderately enlarged. there is evidence of previous aortic valve replacement. sternotomy are grossly intact. the pulmonary and mediastinum are within normal limits. there is no pleural effusion or pneumothorax. there are chronically increased interstitial lung markings without superimposed focal airspace disease identified. there are degenerative changes of the spine']

[1m
Predicted Findings 0[0;0m


'the heart is normal in size the mediastinum is unremarkable there is no pleural effusion or pneumothorax there is no acute bony abnormality'

Unnamed: 0,del-ep004
BLEU1,0.165
BLEU2,0.155
BLEU3,0.139
BLEU4,0.127


In [194]:
import pickle

In [196]:
pickle.dump(image_features_final,open('image_features_final.pkl','wb'))
pickle.dump(indication_vector,open('indication_vector.pkl','wb'))
pickle.dump(comparison_vector,open('comparison_vector.pkl','wb'))
pickle.dump(Y_find,open('Y_find.pkl','wb'))
pickle.dump(tk_find,open('tk_find.pkl','wb'))