# Doc classification

* [Preprocessing](#preprocess)
* [CNN](#CNN)
* [Birdirectional LSTM](#BiLSTM)
* [Attention GRU](#AttGRU)
* [Hierarchical LSTM](#H-LSTM)
* [Hierarchical Attention Networks](#HAN)

## Get glove_6B

In [0]:
# from google.colab import drive
# drive.mount('/content/gdrive')

DIRNAME = 'gdrive/My Drive/Colab Notebooks/'

## Import modules

In [0]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
print('TensorFlow Version: {}'.format(tf.__version__))

Found GPU at: /device:GPU:0
TensorFlow Version: 1.12.0


In [0]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.models import Model
from keras.layers import Embedding, Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding, Concatenate, Dropout
from keras.layers import Bidirectional, LSTM, GRU, TimeDistributed
from keras.callbacks import EarlyStopping, ModelCheckpoint

Using TensorFlow backend.


<a id='preprocess'></a>

## Preprocessing keras imdb data

In [0]:
def get_imdb_data(maxlen=100, max_features=20000):
    from keras.datasets import imdb
    from keras.preprocessing import sequence
    
    (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')
    
    print('... Padding sequences (samples x time)')
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)
    
    return (X_train, y_train), (X_test, y_test)


MAX_SEQUENCE_LENGTH = 250
MAX_NUM_WORDS = 20000
EMBEDDING_DIM = 100

(x_train, y_train), (x_val, y_val) = get_imdb_data(MAX_SEQUENCE_LENGTH, MAX_NUM_WORDS)

y_train = to_categorical(y_train)
y_val = to_categorical(y_val)

25000 train sequences
25000 test sequences
... Padding sequences (samples x time)
X_train shape: (25000, 250)
X_test shape: (25000, 250)


## Download imdb train from Kaggle
wget https://www.kaggle.com/c/word2vec-nlp-tutorial/download/labeledTrainData.tsv

In [1]:
import re
from bs4 import BeautifulSoup


def clean_str(string):
    """
    Tokenization/string cleaning for dataset
    Every dataset is lower cased except
    """
    string = re.sub(r"\\", "", string)
    string = re.sub(r"\'", "", string)
    string = re.sub(r"\"", "", string)
    return string.strip().lower()

data_train = pd.read_csv(DIRNAME + 'data/labeledTrainData.tsv', sep='\t')
texts = []
labels = []
for i in range(data_train.review.shape[0]):
    text = BeautifulSoup(data_train.review[i], 'html5lib')
    texts.append(clean_str(text.get_text()))
    labels.append(data_train.sentiment[i])
    
labels = to_categorical(np.asarray(labels))
print('Shape of label tensor:', labels.shape)

In [0]:
MAX_SEQUENCE_LENGTH = 1000
MAX_NUM_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2

tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of data tensor:', data.shape)


# Shuffling and splitting into train and validation sets
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
num_validation_samples = int(VALIDATION_SPLIT * data.shape[0])

x_train = data[:-num_validation_samples]
y_train = labels[:-num_validation_samples]
x_val = data[-num_validation_samples:]
y_val = labels[-num_validation_samples:]

print('Number of positive and negative reviews in training and validation set ')
print(y_train.sum(axis=0))
print(y_val.sum(axis=0))

Found 81503 unique tokens.
Shape of data tensor: (25000, 1000)
Number of positive and negative reviews in traing and validation set 
[10023.  9977.]
[2477. 2523.]


## Glove embedding matrix

In [0]:
def glove_embedding_matrix(EMBEDDING_DIM, word_index):
    embeddings_index = {}
    with open(DIRNAME + 'data/glove.6B/glove.6B.100d.txt') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    print('Total %s word vectors in Glove 6B 100d.' % len(embeddings_index))
    
    embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
            
    return embedding_matrix


embedding_matrix = glove_embedding_matrix(EMBEDDING_DIM, word_index)

Total 400000 word vectors in Glove 6B 100d.


In [0]:
embedding_layer = Embedding(
    len(word_index) + 1,
    EMBEDDING_DIM,
    weights=[embedding_matrix],
    input_length=MAX_SEQUENCE_LENGTH,
    trainable=True)

<a id='CNN'></a>

##  CNN - Yoo Kim

In [3]:
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)

kernel_sizes = [3,4,5]
convs = []
for fsz in kernel_sizes:
    l_conv = Conv1D(filters=128, kernel_size=fsz, activation='relu')(embedded_sequences)
    l_pool = MaxPooling1D(pool_size=5)(l_conv)
    convs.append(l_pool)
    
l_merge = Concatenate(axis=1)(convs)
l_cov1= Conv1D(filters=128, kernel_size=5, activation='relu')(l_merge)
l_pool1 = MaxPooling1D(pool_size=5)(l_cov1)
l_cov2 = Conv1D(filters=128, kernel_size=5, activation='relu')(l_pool1)
l_pool2 = MaxPooling1D(pool_size=30)(l_cov2)
l_flat = Flatten()(l_pool2)
l_dense = Dense(units=128, activation='relu')(l_flat)
preds = Dense(units=2, activation='softmax')(l_dense)

model = Model(sequence_input, preds)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])
print(model.summary())

In [0]:
epochs = 10
batch_size = 1024

file_path = 'weights.h5'
checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1,
                             save_best_only=True, mode='min')
early = EarlyStopping(monitor='val_loss', mode='min', patience=5)

callbacks_list = [checkpoint, early]
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          shuffle=True,
          validation_data=(x_val, y_val),
          callbacks=callbacks_list)

Train on 25000 samples, validate on 25000 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.56476, saving model to weights.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.56476 to 0.29674, saving model to weights.h5
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.29674
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.29674
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.29674
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.29674
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.29674


<keras.callbacks.History at 0x7ff2a9d73198>

### Customized IMDB

In [0]:
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)

kernel_sizes = [3,4,5]
convs = []
for fsz in kernel_sizes:
    l_conv = Conv1D(filters=128, kernel_size=fsz, activation='relu')(embedded_sequences)
    l_pool = MaxPooling1D(pool_size=5)(l_conv)
    convs.append(l_pool)
    
l_merge = Concatenate(axis=1)(convs)
l_cov1= Conv1D(filters=128, kernel_size=5, activation='relu')(l_merge)
l_pool1 = MaxPooling1D(pool_size=5)(l_cov1)
l_cov2 = Conv1D(filters=128, kernel_size=5, activation='relu')(l_pool1)
l_pool2 = MaxPooling1D(pool_size=30)(l_cov2)
l_flat = Flatten()(l_pool2)
l_dense = Dense(units=128, activation='relu')(l_flat)
preds = Dense(units=2, activation='softmax')(l_dense)

model = Model(sequence_input, preds)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])
print(model.summary())

In [0]:
model.fit(x_train, y_train,
          epochs=20,
          batch_size=50,
          validation_data=(x_val, y_val))

  
  
  
  if sys.path[0] == '':


model fitting - more complex convolutional neural network
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1000)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1000, 100)    8150400     input_1[0][0]                    
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 998, 128)     38528       embedding_1[0][0]                
__________________________________________________________________________________________________
conv1d_2 (Conv1D)               (None, 997, 128)     51328       embedding_1[0][0]                
___________________________________________________



Train on 20000 samples, validate on 5000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f26d5446470>

<a id='BiLSTM'></a>

## Birdirectional LSTM

In [0]:
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)

l_lstm = Bidirectional(LSTM(units=100))(embedded_sequences)
preds = Dense(units=2, activation='softmax')(l_lstm)

model = Model(sequence_input, preds)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

print("model fitting - Bidirectional LSTM")
print(model.summary())
model.fit(x_train, y_train,
          epochs=10,
          batch_size=50,
          validation_data=(x_val, y_val))

model fitting - Bidirectional LSTM
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 1000)              0         
_________________________________________________________________
embedding_2 (Embedding)      (None, 1000, 200)         4000000   
_________________________________________________________________
bidirectional_2 (Bidirection (None, 200)               240800    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 402       
Total params: 4,241,202
Trainable params: 4,241,202
Non-trainable params: 0
_________________________________________________________________
None
Train on 25000 samples, validate on 25000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: ignored

<a id='AttGRU'></a>

## Attention GRU

In [0]:
from keras import backend as K
from keras import activations, initializers, regularizers, constraints
from keras.layers import Layer, InputSpec


class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1], ),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1], ),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None
        super(Attention, self).build(input_shape)  # Be sure to call this at the end

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim
    
    
# class AttLayer(Layer):
#     def __init__(self, **kwargs):
#         self.init = initializations.get('normal')
#         #self.input_spec = [InputSpec(ndim=3)]
#         super(AttLayer, self).__init__(**kwargs)

#     def build(self, input_shape):
#         assert len(input_shape)==3
#         #self.W = self.init((input_shape[-1],1))
#         self.W = self.init((input_shape[-1],))
#         #self.input_spec = [InputSpec(shape=input_shape)]
#         self.trainable_weights = [self.W]
#         super(AttLayer, self).build(input_shape)  # be sure you call this somewhere!

#     def call(self, x, mask=None):
#         eij = K.tanh(K.dot(x, self.W))
        
#         ai = K.exp(eij)
#         weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x')
        
#         weighted_input = x*weights.dimshuffle(0,1,'x')
#         return weighted_input.sum(axis=1)

#     def get_output_shape_for(self, input_shape):
#         return (input_shape[0], input_shape[-1])

In [0]:
sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)

l_gru = Bidirectional(GRU(units=100, return_sequences=True))(embedded_sequences)
l_attn = Attention(MAX_SEQUENCE_LENGTH)(l_gru)
preds = Dense(units=2, activation='softmax')(l_attn)

model = Model(sequence_input, preds)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

print("model fitting - attention GRU network")
print(model.summary())
model.fit(x_train, y_train, validation_data=(x_val, y_val),
          epochs=10, batch_size=50)

model fitting - attention GRU network
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 1000)              0         
_________________________________________________________________
embedding_4 (Embedding)      (None, 1000, 200)         4000000   
_________________________________________________________________
bidirectional_4 (Bidirection (None, 1000, 200)         180600    
_________________________________________________________________
attention_1 (Attention)      (None, 200)               1200      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 402       
Total params: 4,182,202
Trainable params: 4,182,202
Non-trainable params: 0
_________________________________________________________________
None
Train on 25000 samples, validate on 25000 samples
Epoch 1/10
Epoch 2/10
 1000/25000 [>...

KeyboardInterrupt: ignored

In [0]:
model.save_weights('models/AttGRU_weights.h5')

<a id='H-LSTM'></a>

## Hierarchical LSTM

Need to construct the data input as 3D other than 2D in previous two posts. So the input tensor would be (# of reviews each batch, # of sentences, # of words in each sentence).

In [0]:
# import nltk
# nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/kokmeng/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [0]:
from nltk import tokenize

def clean_str(string):
    """
    Tokenization/string cleaning for dataset
    Every dataset is lower cased except
    """
    string = re.sub(r"\\", "", string)
    string = re.sub(r"\'", "", string)
    string = re.sub(r"\"", "", string)
    return string.strip().lower()

data_train = pd.read_csv(DIRNAME + 'data/labeledTrainData.tsv', sep='\t')
texts = []
reviews = []
labels = []
for i in range(data_train.review.shape[0]):
    text = clean_str(BeautifulSoup(data_train.review[i], 'html5lib').get_text())
    texts.append(text)
    reviews.append(tokenize.sent_tokenize(text))
    labels.append(data_train.sentiment[i])
    
labels = to_categorical(np.asarray(labels))
print('Shape of label tensor:', labels.shape)

In [0]:
MAX_SENT_LENGTH = 100
MAX_SENTS = 15
MAX_NUM_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2

tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(texts)

data = np.zeros((len(texts), MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
for i, sentences in enumerate(reviews):
    for j, sent in enumerate(sentences):
        if j < MAX_SENTS:
            wordTokens = text_to_word_sequence(sent)
            #set max number of words
            k = 0
            for _, word in enumerate(wordTokens):
                if k < MAX_SENT_LENGTH and tokenizer.word_index[word] < MAX_NUM_WORDS:
                    data[i,j,k] = tokenizer.word_index[word]
                    k = k + 1
print('Shape of data tensor:', data.shape)

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

Shape of data tensor: (25000, 15, 100)
Found 81503 unique tokens.


In [0]:
# Shuffling and splitting into train and validation sets
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
num_validation_samples = int(VALIDATION_SPLIT * data.shape[0])

x_train = data[:-num_validation_samples]
y_train = labels[:-num_validation_samples]
x_val = data[-num_validation_samples:]
y_val = labels[-num_validation_samples:]

print('Number of positive and negative reviews in training and validation set')
print(y_train.sum(axis=0))
print(y_val.sum(axis=0))

Number of positive and negative reviews in traing and validation set 
[ 9953. 10047.]
[2547. 2453.]


In [None]:
embedding_matrix = glove_embedding_matrix(EMBEDDING_DIM, word_index)

In [0]:
embedding_layer = Embedding(
    len(word_index) + 1,
    EMBEDDING_DIM,
    weights=[embedding_matrix],
    input_length=MAX_SENT_LENGTH,
    trainable=True)

In [0]:
sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)

l_lstm = Bidirectional(LSTM(units=100))(embedded_sequences)
sentEncoder = Model(sentence_input, l_lstm)

review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
review_encoder = TimeDistributed(sentEncoder)(review_input)

l_lstm_sent = Bidirectional(LSTM(units=100))(review_encoder)
preds = Dense(units=2, activation='softmax')(l_lstm_sent)

model = Model(review_input, preds)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

print("model fitting - Hierachical LSTM")
print(model.summary())
model.fit(x_train, y_train, validation_data=(x_val, y_val),
          epochs=10, batch_size=50)

model fitting - Hierachical LSTM
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 15, 100)           0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 15, 200)           8311200   
_________________________________________________________________
bidirectional_3 (Bidirection (None, 200)               240800    
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 402       
Total params: 8,552,402
Trainable params: 8,552,402
Non-trainable params: 0
_________________________________________________________________
None
Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd72874fe80>

In [0]:
model.load_weights('models/HLSTM_weights.h5')

<a id='HAN'></a>

## Hierarchical Attention Networks

In [0]:
# x = TimeDistributed(Dense(300), input_shape=(MAX_SEQUENCE_LENGTH, 392))(concated)
# x = TimeDistributed(Activation('tanh'))(x)
# x = TimeDistributed(Dense(1))(x)
# x = Flatten()(x)
# x = Activation('softmax')(x)
# x = Reshape((-1,1))(x)
# final_model = merge([concated, x], mode = lambda x: K.batch_dot(x[0], x[1], axes=[1,1]),
#                     output_shape= lambda x: (x[0][0], x[0][2]))

In [0]:
sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)

l_gru = Bidirectional(GRU(units=100, return_sequences=True))(embedded_sequences)
l_dense = TimeDistributed(Dense(units=200))(l_gru)
l_attn = Attention(MAX_SENT_LENGTH)(l_dense)
sentEncoder = Model(sentence_input, l_attn)

review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
review_encoder = TimeDistributed(sentEncoder)(review_input)

l_gru_sent = Bidirectional(GRU(units=100, return_sequences=True))(review_encoder)
l_dense_sent = TimeDistributed(Dense(units=200))(l_gru_sent)
l_attn_sent = Attention(MAX_SENTS)(l_dense_sent)
preds = Dense(units=2, activation='softmax')(l_attn_sent)

model = Model(review_input, preds)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

print("model fitting - Hierachical attention network")
print(model.summary())
model.fit(x_train, y_train, validation_data=(x_val, y_val),
          epochs=10, batch_size=50)

model fitting - Hierachical attention network
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 15, 100)           0         
_________________________________________________________________
time_distributed_6 (TimeDist (None, 15, 200)           8311500   
_________________________________________________________________
bidirectional_5 (Bidirection (None, 15, 200)           180600    
_________________________________________________________________
time_distributed_7 (TimeDist (None, 15, 200)           40200     
_________________________________________________________________
attention_3 (Attention)      (None, 200)               215       
_________________________________________________________________
dense_6 (Dense)              (None, 2)                 402       
Total params: 8,532,917
Trainable params: 8,532,917
Non-trainable params: 0
____________________

<keras.callbacks.History at 0x7fa49d2c6278>

In [0]:
model.save_weights('models/han_weights.h5')