In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [2]:
##### Limit GPU for training ###
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Restrict TensorFlow to only use the fourth GPU
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')

        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

NameError: name 'tf' is not defined

In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,LSTM,Dense,Activation,Attention,Bidirectional,Concatenate,Embedding
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
def createToken(data,token=None):
    if token is None  :
        token_x = tf.keras.preprocessing.text.Tokenizer(filters='')
        token_x.fit_on_texts(data)
    else :
        token_x = token

    #object of tokenizer class created 
    tokenizer=Tokenizer()
    #data fitted by tokenizer
    tokenizer.fit_on_texts(data)

    train_x_seq = token_x.texts_to_sequences(data)
    train_x_token = tf.keras.preprocessing.sequence.pad_sequences(train_x_seq,
                                                              padding='post')
    
    return train_x_token,token_x

In [5]:
# File loading
### read hopital names from datasets ###
df = pd.read_csv(r'./datasets/hospital_augment_train3.csv')
df_val = pd.read_csv(r'./datasets/hospital_augment_val3.csv')
df_test = pd.read_csv(r'./datasets/hospital_augment_test3.csv')

In [6]:
hospital_gen_train = list(df['gen_data'])
hospital_true_train = list(df['true_data'])
hospital_gen_val = list(df_val['gen_data'])
hospital_true_val = list(df_val['true_data'])
hospital_gen_test = list(df_test['gen_data'])
hospital_true_test = list(df_test['true_data'])

In [7]:
def createSpace(text):
    re = ["<bow>"]
    for t in text :
        re.append(t)
        
    re.append("<eow>")
    #pad_list = ["<pad>"]*(max_seq-len(re)+index)
    #re_pad = re + pad_list
    return re

print (createSpace(hospital_gen_train[0]))

['<bow>', 'ค', 'ล', 'ิ', 'น', 'ิ', 'ก', 'ก', 'ร', 'ุ', 'ง', 'เ', 'ท', 'พ', 'ร', 'ะ', 'ย', 'อ', 'ง', 'า', 'ข', 'า', 'บ', '่', 'อ', 'ว', 'ิ', 'น', '<eow>']


In [8]:
train_x = []
train_y =[]
for line in hospital_gen_train:
    train_x.append(createSpace(line))
for line in hospital_true_train:
    train_y.append(createSpace(line))

In [9]:
val_x =[]
val_y =[]
for line in hospital_gen_val:
    val_x.append(createSpace(line))
for line in hospital_true_val:
    val_y.append(createSpace(line))

In [10]:
train_x_token,token_x =createToken(train_x)
train_y_token,token_y =createToken(hospital_true_train,token_x)

In [11]:
val_x_token,val_x =createToken(val_x,token_x)
val_y_token,val_y =createToken(val_y,token_x)

In [12]:
x_voc = len(token_x.word_index)+1

In [13]:
y_voc = len(token_y.word_index)+1

In [14]:
train_x_token.shape

(30961, 74)

In [15]:
len_input = train_x_token.shape[1]

In [16]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    # query hidden state shape == (batch_size, hidden size)
    # query_with_time_axis shape == (batch_size, 1, hidden size)
    # values shape == (batch_size, max_len, hidden size)
    # we are doing this to broadcast addition along the time axis to calculate the score
    query_with_time_axis = tf.expand_dims(query, 1)

    # score shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    # the shape of the tensor before applying self.V is (batch_size, max_length, units)
    score = self.V(tf.nn.tanh(
        self.W1(query_with_time_axis) + self.W2(values)))

    # attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis=1)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

In [59]:
embedding_dim=32
units =128

encoder_inputs = Input(shape=(len_input,))
encoder_emb = Embedding(input_dim=x_voc, output_dim=embedding_dim)
# Bidirectional LSTM or Simple LSTM
encoder_lstm = Bidirectional(LSTM(units=units, return_sequences=True, return_state=True)) # Bidirectional(
#encoder_lstm =  LSTM(units=units, return_sequences=True, return_state=True)
encoder_out, fstate_h, fstate_c, bstate_h, bstate_c = encoder_lstm(encoder_emb(encoder_inputs))
#encoder_out,fstate_h,fstate_c = encoder_lstm(encoder_emb(encoder_inputs))
state_h = Concatenate()([fstate_h,bstate_h])
state_c = Concatenate()([bstate_h,bstate_c])

encoder_states = [state_h, state_c]

In [None]:
# Now create the Decoder layers.
decoder_inputs = Input(shape=(None,None,))
decoder_emb = Embedding(input_dim=y_voc, output_dim=embedding_dim)
decoder_lstm = LSTM(units=units*2, return_sequences=True, return_state=True) # units=units*2
decoder_lstm_out, _, _ = decoder_lstm(decoder_emb(decoder_inputs), initial_state=encoder_states)

In [39]:
# Attention layer
attention_layer = BahdanauAttention(10)
attention_result, attention_weights = attention_layer(encoder_out, decoder_lstm_out)

print("Attention result shape: (batch size, units) {}".format(attention_result.shape))
print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape))

Attention result shape: (batch size, units) (None, 74, 256)
Attention weights shape: (batch_size, sequence_length, 1) (None, None, 74, 1)


In [40]:
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_lstm_out, attention_result])

In [56]:
decoder_d2 =  Dense(y_voc, activation="softmax")
decoder_out = decoder_d2(decoder_concat_input)

In [60]:
model = Model([encoder_inputs,decoder_inputs],decoder_out)
#model = Model(Attention())
#model = Model.add(Attention())
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()

ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_9:0", shape=(None, 74), dtype=float32) at layer "input_9". The following previous layers were accessed without issue: []

In [43]:
max_enc_len = max([len(x) for x in train_x])
max_dec_len = max([len(x) for x in train_y])
print("Max Enc Len:",max_enc_len)
print("Max Dec Len:",max_dec_len)

Max Enc Len: 74
Max Dec Len: 73


In [49]:
num_samples = len(train_x)
encoder_input_data = np.zeros( (num_samples , max_enc_len , x_voc),dtype='float32' )
decoder_input_data = np.zeros( (num_samples , max_dec_len , y_voc),dtype='float32' )
decoder_target_data = np.zeros( (num_samples , max_dec_len , y_voc),dtype='float32' )
print("CREATED ZERO VECTORS")

CREATED ZERO VECTORS


In [50]:
#filling in the enc,dec datas
for i,(input_text,target_text) in enumerate(zip(train_x,train_y)):
    for t,char in enumerate(input_text):
        encoder_input_data[ i , t , token_x.word_index[char] ] = 1
    for t,char in enumerate(target_text):
        decoder_input_data[ i, t , token_x.word_index[char] ] = 1
        if t > 0 :
            decoder_target_data[ i , t-1 , token_x.word_index[char] ] = 1
print("COMPLETED...")    

COMPLETED...


In [52]:
h=model.fit([encoder_input_data,decoder_input_data],decoder_target_data
         ,epochs = 100,
          batch_size = 128,
         )

ValueError: Error when checking input: expected input_9 to have 2 dimensions, but got array with shape (30961, 74, 94)