In [1]:
import spacy
from keras.applications.vgg16 import VGG16
from keras.applications.inception_resnet_v2 import InceptionResNetV2
import matplotlib.pyplot as plt
from keras.utils import plot_model,multi_gpu_model
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.models import Model
from keras.models import load_model
from keras.layers import Input,GlobalAveragePooling2D,Layer,InputSpec
from keras.layers.core import Dense,Flatten,Dropout
from keras.optimizers import Adam,SGD
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras.utils import np_utils
import keras.backend as K
import keras.layers as kl
from keras.preprocessing import sequence
import tensorflow as tf
from tensorflow.python.client import device_lib
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
from time import time
import pickle
from keras.models import model_from_json




# from AttentionModule import SelfAttention, SoftAttention
import os
visible_gpu_devices = '0,1'
os.environ["CUDA_VISIBLE_DEVICES"]=visible_gpu_devices
import cv2
from tqdm.notebook import tqdm
import AttentionMed as AM
from importlib import reload
reload(AM)
from time import time,localtime,strftime
from ipywidgets import IntProgress
# from coord import CoordinateChannel2D

Using TensorFlow backend.


In [2]:
n_gpu=len(visible_gpu_devices.split(','))
n_cpu=1
tf_config= tf.ConfigProto(device_count = {'GPU': n_gpu , 'CPU': n_cpu})
tf_config.gpu_options.allow_growth=True
s=tf.Session(config=tf_config)
K.set_session(s)


In [3]:
def multiheadSelfAttention(prev_layer,layer_number=0,heads=8):
    assert prev_layer != None
    sa_arr = []
    for head in range(heads):
        sa,samap = AM.SelfAttention(ch=int(prev_layer.shape[-1]),name='sa{0}{1}'.format(layer_number,head))(prev_layer)
        sa = kl.BatchNormalization()(sa)
        sa_arr.append(sa)
    return sa_arr
def multiheadAttention(prev_layer,context_vector,layer_number=0,heads=8):
    assert prev_layer != None
    assert context_vector != None
    a_arr = []
    for head in range(heads):
        a,a_map = AM.Attention(ch=int(prev_layer.shape[-1]),timesteps=1,name='a{0}{1}'.format(layer_number,head))([context_vector,prev_layer])
        a = kl.BatchNormalization()(a)
        a_arr.append(a)
    return a_arr
def feedforwardsa(sa_layer,layer_before_sa,out_channels,layer_number=0,method='add'):
    x = kl.Concatenate(name='concat_selfattn_{0}'.format(layer_number))(sa_layer)
    x = AM.CondenseAttention1D(ch_in=int(x.shape[-1]),ch_out=out_channels)(x)
    x, g1, g2 = AM.ResidualCombine(method=method
                                   ,name='residual_combine_selfattn_{0}'.format(layer_number))([layer_before_sa, x])
    x = kl.BatchNormalization()(x)
    return x
def feedforwarda(a_layer,context_vector,out_channels,layer_number=0,method='add'):
    x = kl.Concatenate(name='concat_attn_{0}'.format(layer_number))(a_layer)
    x = AM.CondenseAttention1D(ch_in=int(x.shape[-1]),ch_out=out_channels)(x)
    x = kl.Lambda(lambda x:K.squeeze(x,axis=1),name="squeeze_attn_{0}".format(layer_number))(x)
    x, g1, g2 = AM.ResidualCombine(method=method
                                   ,name='residual_combine_attn_{0}'.format(layer_number))([context_vector, x])
    x = kl.BatchNormalization()(x)
    return x

In [4]:
timesteps = 40
vocab_size = 100
embedding_size = 300
elu_alpha = 1.0
bottleneck_units = 512
batchsize = 256
def temp_datagen(batchsize=1):
    counter=0
    while True:
        inp = np.random.randint(1,vocab_size,(batchsize,timesteps,))
        out = inp.copy()
        out[:,::2]=0
        out = np_utils.to_categorical(out,num_classes=101)
        counter+=1
        if batchsize==counter:
            yield inp,out
            counter=0
t_datagen = temp_datagen(batchsize)

In [5]:
t_x,t_y=next(t_datagen)
t_x[:2],t_y.argmax(-1),t_y.shape

(array([[82, 40, 53, 36, 11, 82, 58, 17, 52, 90, 58, 21, 77, 67, 17, 14,
         74, 86, 81,  3, 18, 62, 45, 52, 40, 47, 13, 21, 23, 11, 48, 90,
         80, 51, 86, 30, 77, 80, 89, 64],
        [88, 70, 11, 94, 44,  2, 56, 21, 24, 62, 60, 35, 37, 59, 62,  5,
         24, 86, 67, 13, 47, 63, 56, 70,  7,  8, 49, 78, 64, 36,  9, 64,
         94, 58, 40, 17, 13, 24, 53, 23]]),
 array([[ 0, 40,  0, ..., 80,  0, 64],
        [ 0, 70,  0, ..., 24,  0, 23],
        [ 0, 11,  0, ...,  6,  0, 67],
        ...,
        [ 0, 40,  0, ..., 45,  0, 84],
        [ 0, 99,  0, ..., 68,  0, 54],
        [ 0, 64,  0, ..., 16,  0, 49]]),
 (256, 40, 101))

In [6]:
units = 512
number_sa_layers = 4

inp = kl.Input(shape=(timesteps,))
emb = kl.Embedding(vocab_size+1, embedding_size, mask_zero=False, name='w2v_emb')(inp)

c1 = kl.Conv1D(embedding_size,activation='relu',strides=1,kernel_size=3,padding='same')(emb)
c2 = kl.Conv1D(embedding_size,activation='relu',strides=1,kernel_size=3,dilation_rate=2,padding='same')(emb)
# c3 = kl.Conv1D(embedding_size,activation='relu',strides=1,kernel_size=5,padding='same')(emb)
# c4 = kl.Conv1D(embedding_size,activation='relu',strides=1,kernel_size=5,dilation_rate=2,padding='same')(emb)
# c5 = kl.Conv1D(embedding_size,activation='relu',strides=1,kernel_size=7,padding='same')(emb)
# c6 = kl.Conv1D(embedding_size,activation='relu',strides=1,kernel_size=7,dilation_rate=2,padding='same')(emb)
emb = kl.Concatenate(axis=-1)([c1,c2])
emb = kl.Conv1D(embedding_size,activation='relu',strides=1,kernel_size=1,padding='same')(emb)
emb = kl.BatchNormalization()(emb)


sa = multiheadSelfAttention(prev_layer=emb,layer_number=1,heads=8)
sa_ff = feedforwardsa(sa_layer=sa,layer_before_sa=emb,layer_number=1,method='add',out_channels=int(emb.shape[-1]))

# sa = multiheadSelfAttention(prev_layer=sa_ff,layer_number=2,heads=8)
# sa_ff = feedforwardsa(sa_layer=sa,layer_before_sa=sa_ff,layer_number=2,method='add',out_channels=int(sa_ff.shape[-1]))

# sa = multiheadSelfAttention(prev_layer=sa_ff,layer_number=3,heads=8)
# sa_ff = feedforwardsa(sa_layer=sa,layer_before_sa=sa_ff,layer_number=3,method='add',out_channels=int(sa_ff.shape[-1]))


# context_vector = kl.LSTM(512,activation='relu',return_sequences=False)(encoder)
context_vector = kl.GlobalAveragePooling1D(name='gap')(sa_ff)

a = multiheadAttention(prev_layer=sa_ff,context_vector=context_vector,layer_number=1,heads=8)
a_ff = feedforwarda(a_layer=a,context_vector=context_vector,out_channels=int(emb.shape[-1]),layer_number=1,method='add')

repeat_context_vector = kl.RepeatVector(n=timesteps)(a_ff)

decoder = kl.LSTM(512,activation='relu',return_sequences=True)(repeat_context_vector)
decoder_out = kl.TimeDistributed(Dense(vocab_size+1,activation='softmax'))(decoder)
model = Model(inp,decoder_out)
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer=Adam(lr=0.0001))

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 40)           0                                            
__________________________________________________________________________________________________
w2v_emb (Embedding)             (None, 40, 300)      30300       input_1[0][0]                    
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 40, 300)      270300      w2v_emb[0][0]                    
__________________________________________________________________________________________________
conv1d_2 (Conv1D)               (None, 40, 300)      270300      w2v_emb[0][0]                    
__________________________________________________________________________________________________
concatenat

In [7]:
parallel_model=multi_gpu_model(model, gpus=n_gpu)
parallel_model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer=Adam(lr=0.0001))

In [8]:
# /media/MyDataStor1/mshaikh2/project_xray/notebooks
es = EarlyStopping(mode='auto',monitor='loss',patience=50,min_delta=0.0005)
hist = parallel_model.fit_generator(t_datagen,epochs=500,verbose=1,steps_per_epoch=50,callbacks=[es])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 

KeyboardInterrupt: 

In [None]:
df = pd.DataFrame(hist.history)
df.to_csv("graphs/sa_gap_8attn_conv_bn_res_add_metrics.csv")
plot1,plot2 = df.plot(subplots=True, figsize=(5,10))
fig1 = plot1.get_figure()
fig2 = plot2.get_figure()
fig1.savefig("graphs/sa_gap_8attn_conv_bn_res_add_acc.png")
fig2.savefig("graphs/sa_gap_8attn_conv_bn_res_add_loss.png")