# Parameter tuning
## 1.Try different parameter on basic network structure， keep the best parameter
## 2. IF a word has a UpperCase character , it's very likely a skill word. 
##     Add a Case_sensitive layer to the network, test the accuracy

### import NER dataset

In [1]:
import pandas as pd
from pprint import pprint
from IPython.core.display import display, HTML

import warnings
warnings.filterwarnings('ignore')

In [2]:
import json
with open("NER_dataset/tran_X.json", "r") as f:
    X = json.load(f)
    
with open("NER_dataset/tran_Y.json", "r") as f:
    Y = json.load(f)

### sample data

In [3]:
print( X[0] )
print( Y[0] )

['Experience', 'in', 'commonly', 'used', 'for', 'data', 'analysis', 'such', 'as', 'Python', ',', 'R', ',', 'Julia', ',', 'or', 'SAS', '.']
['O', 'O', 'O', 'O', 'O', 'B', 'I', 'O', 'O', 'B', 'O', 'B', 'O', 'B', 'O', 'O', 'B', 'O']


In [4]:
print( 'trainset_size:',len( X ) )
print( 'testset_size:',len( Y ) )

trainset_size: 614
testset_size: 614


# dataset split

In [5]:
import numpy as np
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X , Y , test_size=0.2, random_state=42)

# PreProcess

In [6]:
import tensorflow as tf
from pprint import pprint

In [7]:
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.callbacks import *
from keras_contrib.layers import CRF
from keras_contrib.losses import crf_loss
from keras_contrib.metrics import crf_accuracy
from keras.preprocessing.sequence import pad_sequences
from keras_bert import load_trained_model_from_checkpoint
from keras_bert import Tokenizer
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [8]:
max_seq_length = 128

In [8]:
label = {}
_label = {}


label_path = "uncased_L-2_H-128_A-2\tag_dict.txt"
f_label = open(label_path, 'r+', encoding='utf-8')
for line in f_label:
    content = line.strip().split()
    label[content[0].strip()] = content[1].strip()
    _label[content[1].strip()] = content[0].strip()
    
    
vocab = {}
vocab_path = r"uncased_L-2_H-128_A-2\vocab.txt"
with open(vocab_path, 'r+', encoding='utf-8') as f_vocab:
    for line in f_vocab.readlines():
        vocab[line.strip()] = len(vocab)

In [10]:
def PreProcessInputData( text ):
    word_labels = []
    seq_types = []

    for sequence in text:
        len_text = len(sequence)

        ###########################################
        temp_word_labels = []
        temp_word_labels.append( 101 )            
        for w in sequence:
            temp_word_labels.append( vocab.get(str(w).lower(),100) )
        temp_word_labels.append( 102 )

        ###########################################
        ###########################################
        temp_seq_types = [1] * len(temp_word_labels) +  [0] * ( max_seq_length - len( temp_word_labels ))
        temp_word_labels = temp_word_labels + [0] * ( max_seq_length - len( temp_word_labels ))

        word_labels.append( temp_word_labels )
        seq_types.append( temp_seq_types )

    return word_labels, seq_types

In [11]:
def PreProcessOutputData( text ):
    tags = []
    for line in text:
        tag = [0]
        for item in line:
            tag.append( int(label[item.strip()]) )
        tag.append(0)
        tags.append(tag)

    pad_tags = pad_sequences(tags, maxlen=max_seq_length, padding="post", truncating="post")
    result_tags = np.expand_dims(pad_tags, 2)
    return result_tags

# Data PreProcessing

In [12]:
train_sentence_list_word_labels ,train_sentence_list_seq_types = PreProcessInputData( X_train )
train_output_label_list = PreProcessOutputData( y_train )

In [13]:
test_sentence_list_word_labels ,test_sentence_list_seq_types = PreProcessInputData( X_test )
test_output_label_list = PreProcessOutputData( y_test )

# basic network structure define

# define_Layer

In [14]:
def define_Layer( layer_dict, my_input ):
    
    if layer_dict['layer_type'] == 'Bidirectional_LSTM':
        return Bidirectional( LSTM( layer_dict['lstmDim'],
                                         return_sequences = True,
                                         activation = layer_dict['activation'],
                                         dropout = layer_dict['dropout'],
                                         recurrent_dropout = layer_dict['dropout']) )(my_input)
    
    elif layer_dict['layer_type'] == 'Dense':
        return Dense(units = layer_dict['units'] , activation = layer_dict['activation'] )(my_input)
    
    elif layer_dict['layer_type'] == 'Dropout':
        return Dropout(dropout = layer_dict['dropout'] )(my_input)

# define network structure

In [15]:
def Train_Model( network_struct_dict ):
    print( '■' * 60 )
    print('network struct:')
    pprint( network_struct_dict )
    
    #################################################################
    ## use BERT embedding layer
    model_path = r"uncased_L-2_H-128_A-2/"
    bert = load_trained_model_from_checkpoint(
        model_path + "bert_config.json",
        model_path + "bert_model.ckpt",
        seq_len = max_seq_length
        )

    #make bert layer trainable
    for layer in bert.layers:
        layer.trainable = network_struct_dict['is_embeding_trainable']
        
    x1 = Input(shape=(None,))
    x2 = Input(shape=(None,))
    bert_out = bert([x1, x2])
    
    my_output = bert_out
    #################################################################
    # use layer_dict_List to build model
    for layer_dict in network_struct_dict['layer_dict_List']:
        my_input  = my_output
        my_output = define_Layer( layer_dict, my_input )
    
    
    #################################################################
    ## CRF output
    crf_out = CRF(len(label), sparse_target=True)(my_output)
    model = Model([x1, x2], crf_out)
    model.summary()

    model.compile(
        optimizer=network_struct_dict['optimizer'],
        loss=crf_loss,
        metrics=[crf_accuracy]
    )
    
    #################################################################
    ## training
    model.fit(x=[train_sentence_list_word_labels, train_sentence_list_seq_types],
                       y = train_output_label_list,
                       batch_size = network_struct_dict['batch_size'],
                       epochs = network_struct_dict['epochs'],
                       validation_split=0.2,
                       callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.001 )],
                       verbose = 1,
                       class_weight = 'auto')
    
    print('evaluate :')
    res = model.evaluate(x=[test_sentence_list_word_labels, test_sentence_list_seq_types],
                       y=test_output_label_list,batch_size=128)
    
    print( res )
    return res[1]

# try different parameter and keep the best

In [16]:
network_struct_dict_List = [
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : RMSprop(1e-4),
        'batch_size' : 128,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 256 ,'activation':'tanh', 'dropout' :0.3},
        ]
    },
    
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : RMSprop(1e-4),
        'batch_size' : 128,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 128 ,'activation':'tanh', 'dropout' :0.2},
        ]
    },
    
    ## network_struct
    {
        'is_embeding_trainable' : False,
        'optimizer' : Adam(1e-4),
        'batch_size' : 128,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 128 ,'activation':'tanh', 'dropout' :0.2},
        ]
    },
    
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : Adam(1e-4),
        'batch_size' : 128,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 128 ,'activation':'sigmoid', 'dropout' :0.2},
        ]
    },
    
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : RMSprop(1e-4),
        'batch_size' : 128,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 64 ,'activation':'tanh', 'dropout' :0.2},
        ]
    },
    
    
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : RMSprop(1e-4),
        'batch_size' : 128,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 128 ,'activation':'tanh', 'dropout' :0.4},
        ]
    },
    
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : RMSprop(1e-4),
        'batch_size' : 128,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 256 ,'activation':'tanh', 'dropout' :0.2},
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 128 ,'activation':'tanh', 'dropout' :0.2},
        ]
    },
    
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : RMSprop(1e-4),
        'batch_size' : 128,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 128 ,'activation':'tanh', 'dropout' :0.2},
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 64 ,'activation':'tanh', 'dropout' :0.2},
        ]
    },
    
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : RMSprop(1e-4),
        'batch_size' : 128,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 128 ,'activation':'tanh', 'dropout' :0.2},
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 64 ,'activation':'sigmoid', 'dropout' :0.2},
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 32 ,'activation':'sigmoid', 'dropout' :0.2},
        ]
    },
    
    
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : RMSprop(1e-4),
        'batch_size' : 64,
        'epochs': 100,
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 256 ,'activation':'tanh', 'dropout' :0.3},
            {'layer_type':'Dense', 'units': 64 ,'activation':'sigmoid'},
        ]
    },
    
    ## network_struct
    {
        'is_embeding_trainable' : True,
        'optimizer' : Adam(1e-4),
        'batch_size' : 64,
        'epochs': 100,
        
        'layer_dict_List' : [ 
            {'layer_type':'Bidirectional_LSTM', 'lstmDim': 128 ,'activation':'tanh', 'dropout' :0.3},
            {'layer_type':'Dense', 'units': 32 ,'activation':'sigmoid'},
        ]
    }
]

In [17]:
best_network_struct = None
best_accurac = 0

for network_struct_dict in network_struct_dict_List:
    train_accurac = Train_Model( network_struct_dict )
    if best_accurac < train_accurac:
        best_accurac = train_accurac
        best_network_struct = network_struct_dict
        
print('Best Network Struct:')
pprint(best_network_struct)

■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
network struct:
{'batch_size': 128,
 'epochs': 100,
 'is_embeding_trainable': True,
 'layer_dict_List': [{'activation': 'tanh',
                      'dropout': 0.3,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 256}],
 'optimizer': <keras.optimizers.RMSprop object at 0x00000000196F1320>}




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None)         0                                            
_______________________________________________________________________________________

Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
evaluate :
[11.3150053024292, 0.9049789905548096]
■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
network struct:
{'batch_size': 128,
 'epochs': 100,
 'is_embeding_trainable': True,
 'layer_dict_List': [{'activation': 'tanh',
                      'dropout': 0.2,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 128}],
 'optimizer': <keras.optimizers.RMSprop object at 0x0000000018F5AD30>}
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, None)         0                                            
___________________________________________________________________________

Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
evaluate :
[11.307711601257324, 0.8950968384742737]
■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
network struct:
{'batch_size': 128,
 'epochs': 100,
 'is_embeding_trainable': False,
 'layer_dict_List': [{'activation': 'tanh',
                      'dropout': 0.2,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 128}],
 'optimizer': <keras.optimizers.Adam object at 0x00000000196F1358>}
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, None)         0                                       

Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
evaluate :
[11.423075675964355, 0.8506270051002502]
■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
network struct:
{'batch_size': 128,
 'epochs': 100,
 'is_embeding_trainable': True,
 'layer_dict_List': [{'activation': 'sigmoid',
                      'dropout': 0.2,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 128}],
 'optimizer': <keras.optimizers.Adam object at 0x0000000019116668>}
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, None)         0                                            
________________________________________________________________________

Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
evaluate :
[11.368276596069336, 0.8779933452606201]
■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
network struct:
{'batch_size': 128,
 'epochs': 100,
 'is_embeding_trainable': True,
 'layer_dict_List': [{'activation': 'tanh',
                      'dropout': 0.4,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 128}],
 'optimizer': <keras.optimizers.RMSprop object at 0x00000000191269E8>}
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           (None, None)         0              

Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
evaluate :
[11.299483299255371, 0.9141008853912354]
■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
network struct:
{'batch_size': 128,
 'epochs': 100,
 'is_embeding_trainable': True,
 'layer_dict_List': [{'activation': 'tanh',
                      'dropout': 0.2,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 128},
                     {'activation': 'tanh',
                      'dropout': 0.2,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 64}],
 'optimizer': <keras.optimizers.RMSprop object at 0x0000000019126DA0>}
__________________________________________________________________________________________________
Layer (type)   

Train on 392 samples, validate on 99 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100


Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
evaluate :
[11.429686546325684, 0.881413996219635]
■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
network struct:
{'batch_size': 64,
 'epochs': 100,
 'is_embeding_trainable': True,
 'layer_dict_List': [{'activation': 'tanh',
                      'dropout': 0.3,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 256},
 

Train on 392 samples, validate on 99 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
evaluate :
[11.353896141052246, 0.8924362063407898]
■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
network struct:
{'batch_size': 64,
 'epochs': 100,
 'is_embeding_trainable': True,
 'layer_dict_List': [{'activation': 'tanh',
                      'dropout': 0.3,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 128},
                     {'activation': 'sigmoid',
                      'layer_type': 'Dense',
   

Train on 392 samples, validate on 99 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100


Epoch 53/100
Epoch 54/100
evaluate :
[11.494059562683105, 0.8901559114456177]
Best Network Struct:
{'batch_size': 128,
 'epochs': 100,
 'is_embeding_trainable': True,
 'layer_dict_List': [{'activation': 'tanh',
                      'dropout': 0.2,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 256},
                     {'activation': 'tanh',
                      'dropout': 0.2,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 128}],
 'optimizer': <keras.optimizers.RMSprop object at 0x0000000019126AC8>}


# ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■

## IF a word has a uppercase character ，it’s very likely a skill word

## change network structure ,add a addtional input layer
## change dataset if word contains uppercase label 1， if word are lowercase label 0
## test result

In [18]:
def PreProcessInputData_Addtion( text ):
    word_labels = []
    seq_types = []
    upper_list = []
    

    for sequence in text:
        len_text = len(sequence)

        ###########################################
        temp_word_labels = []
        temp_upper_list = []
        
        temp_word_labels.append( 101 )
        temp_upper_list.append(0)
        for w in sequence:
            temp_word_labels.append( vocab.get(str(w).lower(),100) )
            if str(w).isalpha() and (str(w).islower() == False):
                temp_upper_list.append( 2 )
            else:
                temp_upper_list.append( 1 )
            
        temp_word_labels.append( 102 )
        temp_upper_list.append(0)
        ###########################################
        ###########################################
        temp_seq_types = [1] * len(temp_word_labels) +  [0] * ( max_seq_length - len( temp_word_labels ))
        temp_word_labels = temp_word_labels + [0] * ( max_seq_length - len( temp_word_labels ))
        temp_upper_list  = temp_upper_list  + [0] * ( max_seq_length - len( temp_upper_list ))

        word_labels.append( temp_word_labels )
        seq_types.append( temp_seq_types )
        upper_list.append( temp_upper_list )

    return word_labels, seq_types, upper_list

In [19]:
def PreProcessOutputData_Addtion( text ):
    tags = []
    for line in text:
        tag = [0]
        for item in line:
            tag.append( int(label[item.strip()]) )
        tag.append(0)
        tags.append(tag)

    pad_tags = pad_sequences(tags, maxlen=max_seq_length, padding="post", truncating="post")
    result_tags = np.expand_dims(pad_tags, 2)
    return result_tags

In [20]:
train_sentence_list_word_labels ,train_sentence_list_seq_types , train_sentence_upper_list = PreProcessInputData_Addtion( X_train )
train_output_label_list = PreProcessOutputData_Addtion( y_train )

In [21]:
test_sentence_list_word_labels ,test_sentence_list_seq_types , test_sentence_upper_list = PreProcessInputData_Addtion( X_test )
test_output_label_list = PreProcessOutputData_Addtion( y_test )

In [22]:
def Train_Model_Addtion( network_struct_dict ):
    print( '■' * 60 )
    print('network struct:')
    pprint( network_struct_dict )
    
    #################################################################
    ## use BERT embedding layer
    model_path = r"uncased_L-2_H-128_A-2/"
    bert = load_trained_model_from_checkpoint(
        model_path + "bert_config.json",
        model_path + "bert_model.ckpt",
        seq_len = max_seq_length
        )

    #make bert layer trainable
    for layer in bert.layers:
        layer.trainable = network_struct_dict['is_embeding_trainable']
        
    x1 = Input(shape=(None,))
    x2 = Input(shape=(None,))
    x3 = Input(shape=(None,))
    
    bert_out = bert([x1, x2])
    label_layer1 = Embedding(4, 2, trainable=False)(x3)

    x4 = Concatenate()( [bert_out, label_layer1] )
    print(x4.shape)
    
    my_output = x4
    #################################################################
    # use layer_dict_List to build model
    for layer_dict in network_struct_dict['layer_dict_List']:
        my_input  = my_output
        my_output = define_Layer( layer_dict, my_input )
    
    
    #################################################################
    ## CRF output
    crf_out = CRF(len(label), sparse_target=True)(my_output)
    model = Model([x1, x2, x3], crf_out)
    model.summary()

    model.compile(
        optimizer=network_struct_dict['optimizer'],
        loss=crf_loss,
        metrics=[crf_accuracy]
    )
    
    #################################################################
    ## training
    model.fit( [train_sentence_list_word_labels, train_sentence_list_seq_types, train_sentence_upper_list],
                       y = train_output_label_list,
                       batch_size = network_struct_dict['batch_size'],
                       epochs = network_struct_dict['epochs'],
                       validation_split=0.2,
                       callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.001)],
                       verbose = 1,
                       class_weight = 'auto')
    
    print('evaluate :')
    res = model.evaluate( [test_sentence_list_word_labels, test_sentence_list_seq_types, test_sentence_upper_list],
                       y=test_output_label_list,batch_size=128)
    
    print( res )
    return res[1]

In [27]:
network_struct_dict = {
    'is_embeding_trainable' : True,
    'optimizer' : Adam(1e-4),
    'batch_size' : 32,
    'epochs': 100,

    'layer_dict_List' : [ 
        {'layer_type':'Bidirectional_LSTM', 'lstmDim': 256 ,'activation':'tanh', 'dropout' :0.3},
#         {'layer_type':'Bidirectional_LSTM', 'lstmDim': 128 ,'activation':'tanh', 'dropout' :0.2},
    ]
}

In [24]:
best_network_struct

{'is_embeding_trainable': True,
 'optimizer': <keras.optimizers.RMSprop at 0x19126ac8>,
 'batch_size': 128,
 'epochs': 100,
 'layer_dict_List': [{'layer_type': 'Bidirectional_LSTM',
   'lstmDim': 256,
   'activation': 'tanh',
   'dropout': 0.2},
  {'layer_type': 'Bidirectional_LSTM',
   'lstmDim': 128,
   'activation': 'tanh',
   'dropout': 0.2}]}

In [28]:
Train_Model_Addtion( network_struct_dict )

■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
network struct:
{'batch_size': 32,
 'epochs': 100,
 'is_embeding_trainable': True,
 'layer_dict_List': [{'activation': 'tanh',
                      'dropout': 0.3,
                      'layer_type': 'Bidirectional_LSTM',
                      'lstmDim': 256}],
 'optimizer': <keras.optimizers.Adam object at 0x000000006A262D68>}
(?, ?, 130)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_26 (InputLayer)           (None, None)         0                                            
__________________________________________________________________________________________________
input_27 (InputLayer)           (None, None)         0                                            
__________________________________________________________________________________________________
in

0.9106805324554443