# Importation

In [1]:
import pandas as pd
import numpy as np

from nltk import word_tokenize
from nltk import sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

from gensim.models import Word2Vec

import tensorflow as tf

from keras.layers import Embedding

from keras.utils import to_categorical, pad_sequences

# Class define form data.

In [2]:
class Data(object):
    unique_words = {}
    unique_ner_tags = {}
    MAX_LENGTH = 200
    def __init__(self):
        self.sentences = []
        self.sentences_num = None
        self.ner_tags = []
        self.ner_tags_num = None
        self.chunk_tags = []
        self.pos_tags = []
        self.x, self.y = None, None
    def word2vec(self, vector_size=100):
        word2vec_model = Word2Vec(self.sentences, vector_size=vector_size, window=5, min_count=1, workers=4)
        return word2vec_model   
    def word2idx(self, word):
        pass
    def idx2word(self, index):
        pass
    def tag2idx(self, tag):
        pass
    def idx2tag(self, index):
        pass

# Loading data

In [3]:
class Loading():
    def __init__(self, data: Data, file):
        self.data = data
        self.load_sentences(file)
        print("Loading successfully")
    def load_sentences(self, filepath):
        tokens, pos_tags, chunk_tags, ner_tags = [], [], [], []
        with open(filepath, 'r') as f:
            for line in f.readlines():
                if (line == ('-DOCSTART- -X- -X- O\n') or line == '\n'):
                    if len(tokens) > 0:
                        self.data.sentences.append(tokens)
                        self.data.pos_tags.append(pos_tags)
                        self.data.chunk_tags.append(chunk_tags)
                        self.data.ner_tags.append(ner_tags)
                        tokens, pos_tags, chunk_tags, ner_tags = [], [], [], []
                else:
                    l = line.split(' ')
                    tokens.append(l[0])
                    pos_tags.append(l[1])
                    chunk_tags.append(l[2])
                    ner_tags.append(l[3].strip('\n'))

# Preprocessing

In [4]:
class Preprocessing():
    def __init__(self, data:Data, text=None, lang="english"):
        self.data = data
        self.text = text
        self.lang = lang
    def tokenize(self):
        if self.text != None:
            sentenses = [word_tokenize(sentence, language=self.lang) for sentence in sent_tokenize(self.text, language=self.lang)]
            self.data.sentences = [[token for token in sentence if token not in stopwords.words(self.lang)] for sentence in sentenses]
    def lowercasing(self):
        self.data.sentences = [[word.lower() for word in sentence] for sentence in self.data.sentences]
    def lemmatize(self):
        lemmatizer = WordNetLemmatizer()
        self.data.sentences = [[lemmatizer.lemmatize(word) for word in sentence] for sentence in self.data.sentences]
        self.unicity() # must be changed
    def unicity(self):
        temp = set() 
        [temp.update(word) for word in self.data.sentences]
        Data.unique_words = dict(zip(list(temp), range(1, len(temp) + 1)))
        temp = set() 
        [temp.update(word) for word in self.data.ner_tags]
        Data.unique_ner_tags = dict(zip(list(temp), range(len(temp))))

# Vectorization

In [5]:
class Vectorization():
    def __init__(self, data:Data):
        self.data = data
        # self.data.max_length = max([len(sentence) for sentence in data.sentences])
        data.sentences_num = [[Data.unique_words[word] for word in sentence] for sentence in data.sentences]
        data.ner_tags_num = [[Data.unique_ner_tags[tag] for tag in tags] for tags in data.ner_tags] 
    def padding_x(self):
        if len(self.data.sentences_num) > 0:
            self.data.sentences_num = self.word2vec()
            self.data.x = pad_sequences(
                sequences=self.data.sentences_num, 
                maxlen=self.data.MAX_LENGTH, 
                dtype="float32", 
                padding="post", 
                value=0
            )
    def padding_y(self):
        if len(self.data.ner_tags_num) > 0:
            self.data.y = pad_sequences(
                sequences=self.data.ner_tags_num, 
                maxlen=self.data.MAX_LENGTH, 
                dtype="float32", 
                padding="post", 
                value=self.data.unique_ner_tags.get("O")
            )
    def word2vec(self, min_count=1, vector_size=100, window=5):
        word2vec_model = Word2Vec(self.data.sentences, min_count=min_count, vector_size=vector_size, window=window)
        vectors= [[word2vec_model.wv[word] for word in sentence] for sentence in self.data.sentences]
        return vectors
    def vectorized_x(self):
        self.padding_x() 
        self.data.x = np.array(self.data.x, dtype="float32")
    def vectorized_y(self):
        self.padding_y()
        self.data.y = [[to_categorical(tag, num_classes=len(Data.unique_ner_tags)) for tag in tags] for tags in self.data.y]
        self.data.y = np.array(self.data.y, dtype='float32')

# Main

## Pretraining for CONLL2003

In [43]:

def pretraining_CoNLL3(path: str):
    data = Data()
    base_file = "conll2003_english/"
    Loading(data = data, file=base_file + path)
    preprocessing = Preprocessing(data=data)
    preprocessing.lowercasing()
    preprocessing.lemmatize()
    vector = Vectorization(data=data)
    vector.vectorized_x()
    vector.vectorized_y()
    return data
    

    # Loading(data = test, file=base_file + "test.txt")

## Define Trainset

In [44]:
train = pretraining_CoNLL3("train.txt")
print(train.sentences[0])
print(train.ner_tags[0])

Loading successfully
['eu', 'reject', 'german', 'call', 'to', 'boycott', 'british', 'lamb', '.']
['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O']


In [45]:
print(train.sentences[0])
print(train.ner_tags[0])

['eu', 'reject', 'german', 'call', 'to', 'boycott', 'british', 'lamb', '.']
['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O']


In [46]:
print("X_train", train.x.shape)
print("y_train", train.y.shape)
print(type(train.x))

X_train (14041, 200, 100)
y_train (14041, 200, 9)
<class 'numpy.ndarray'>


## Define Testset

In [47]:
test = pretraining_CoNLL3("test.txt")
print("X_test", test.x.shape)
print("y_test", test.y.shape)

Loading successfully
X_test (3453, 200, 100)
y_test (3453, 200, 9)


## Define ValidSet

In [48]:
valid = pretraining_CoNLL3("valid.txt")
print("X_valid", valid.x.shape)
print("y_valid", valid.y.shape)

Loading successfully
X_valid (3250, 200, 100)
y_valid (3250, 200, 9)


## New input text

In [None]:
test_text = Data()

preprocessing = Preprocessing(data = test_text, text = "Obama is the president of the United States. I am from Guinea, nice to meet you.")
preprocessing.tokenize()
preprocessing.lowercasing()
preprocessing.lemmatize()
print(test_text.sentences)

vector = Vectorization(test_text)
vector.vectorized_x()
print(test_text.x.shape)

# Trainning

In [49]:
NUM_WORDS = len(Data.unique_words)
NUM_CLASSES = len(Data.unique_ner_tags)
MAX_LENGTH = Data.MAX_LENGTH
OUTPUT_DIM = 100
print(MAX_LENGTH, NUM_CLASSES, NUM_WORDS, OUTPUT_DIM)
# Hyperparameters
EMBEDDING_DIM = 100
NUM_FILTERS = 256
KERNEL_SIZE = 3
HIDDEN_DIM = 200
DROPOUT_RATE = 0.5
BATCH_SIZE = 32
EPOCHS = 10


200 9 8419 100


In [50]:
# from sklearn.feature_extraction.text import TfidfVectorizer
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv1D, MaxPooling1D


# Build CNN model
model = Sequential()
model.add(Conv1D(64, KERNEL_SIZE, activation='relu', input_shape=(MAX_LENGTH, EMBEDDING_DIM), padding='same'))
# model.add(MaxPooling1D(2, padding='same'))
model.add(Dropout(DROPOUT_RATE))
model.add(Conv1D(32, KERNEL_SIZE, activation='relu', padding='same'))
# model.add(MaxPooling1D(2))
# model.add(Dropout(DROPOUT_RATE))
# model.add(Dense(HIDDEN_DIM, activation='relu'))
model.add(Dropout(DROPOUT_RATE))
model.add(Dense(NUM_CLASSES, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.summary()
# Train CNN model
model.fit(train.x, train.y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(valid.x, valid.y))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x16f47652460>

In [51]:
model.save("model_cnn.keras")

In [52]:
cnn_model = tf.keras.models.load_model("model_cnn.keras")

In [53]:
from keras_contrib.layers import CRF

# cnn_model.trainable = False 

# Build CRF layer
crf = CRF(NUM_CLASSES, learn_mode='marginal')
cnn_model.add(crf)
cnn_model.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy])

cnn_model.summary()

Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_30 (Conv1D)          (None, 200, 64)           19264     
                                                                 
 dropout_38 (Dropout)        (None, 200, 64)           0         
                                                                 
 conv1d_31 (Conv1D)          (None, 200, 32)           6176      
                                                                 
 dropout_39 (Dropout)        (None, 200, 32)           0         
                                                                 
 dense_23 (Dense)            (None, 200, 9)            297       
                                                                 
 crf_5 (CRF)                 (None, 200, 9)            189       
                                                                 
Total params: 25,926
Trainable params: 25,926
Non-tra



In [41]:
model.fit(train.x, train.y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(test.x, test.y))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x16f477bffa0>

In [56]:
from keras.models import Model, Sequential
from keras.layers import InputLayer, Embedding, Conv1D, GlobalMaxPooling1D, Dense, TimeDistributed, Dropout
from keras_contrib.layers import CRF
from keras_contrib.losses import crf_loss
from keras_contrib.metrics import crf_viterbi_accuracy
model = Sequential()

# model.add(InputLayer(input_shape=(MAX_LENGTH,)))
# model.add(Embedding(input_dim=NUM_WORDS + 1, output_dim=OUTPUT_DIM, input_length=MAX_LENGTH))
model.add(Conv1D(64, 3, padding='same', activation='relu', input_shape=(MAX_LENGTH, OUTPUT_DIM)))
# model.add(Dropout(0.5))
model.add(Conv1D(32, 3, padding='same', activation='relu'))
# model.add(Dropout(0.5))
model.add(Dense(units=NUM_CLASSES))

crf_layer = CRF(units=NUM_CLASSES, sparse_target=False)
model.add(crf_layer)

model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_18 (Conv1D)          (None, 200, 64)           19264     
                                                                 
 conv1d_19 (Conv1D)          (None, 200, 32)           6176      
                                                                 
 dense_10 (Dense)            (None, 200, 9)            297       
                                                                 
 crf_9 (CRF)                 (None, 200, 9)            189       
                                                                 
Total params: 25,926
Trainable params: 25,926
Non-trainable params: 0
_________________________________________________________________


In [None]:
from keras.losses import SparseCategoricalCrossentropy, Loss
class CustomNonPaddingTokenLoss(Loss):
    def __init__(self, name="custom_ner_loss"):
        super().__init__(name=name)

    def call(self, y_true, y_pred):
        loss_fn = SparseCategoricalCrossentropy(
            from_logits=True, reduction=keras.losses.Reduction.NONE
        )
        loss = loss_fn(y_true, y_pred)
        mask = tf.cast((y_true > 0), dtype=tf.float32)
        loss = loss * mask
        return tf.reduce_sum(loss) / tf.reduce_sum(mask)

loss = CustomNonPaddingTokenLoss()

In [57]:
model.compile('adam', loss=tf.keras.losses.CategoricalCrossentropy(), metrics=["accuracy"])

In [58]:
# Train the model
model.fit(train.x, train.y, epochs=10, batch_size=32)

Epoch 1/10


ValueError: in user code:

    File "e:\PFE\CoNLL2003\NERC\venv\lib\site-packages\keras\engine\training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "e:\PFE\CoNLL2003\NERC\venv\lib\site-packages\keras\engine\training.py", line 1268, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "e:\PFE\CoNLL2003\NERC\venv\lib\site-packages\keras\engine\training.py", line 1249, in run_step  **
        outputs = model.train_step(data)
    File "e:\PFE\CoNLL2003\NERC\venv\lib\site-packages\keras\engine\training.py", line 1054, in train_step
        self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "e:\PFE\CoNLL2003\NERC\venv\lib\site-packages\keras\optimizers\optimizer.py", line 543, in minimize
        self.apply_gradients(grads_and_vars)
    File "e:\PFE\CoNLL2003\NERC\venv\lib\site-packages\keras\optimizers\optimizer.py", line 1173, in apply_gradients
        grads_and_vars = self.aggregate_gradients(grads_and_vars)
    File "e:\PFE\CoNLL2003\NERC\venv\lib\site-packages\keras\optimizers\optimizer.py", line 1139, in aggregate_gradients
        return optimizer_utils.all_reduce_sum_gradients(grads_and_vars)
    File "e:\PFE\CoNLL2003\NERC\venv\lib\site-packages\keras\optimizers\utils.py", line 33, in all_reduce_sum_gradients
        filtered_grads_and_vars = filter_empty_gradients(grads_and_vars)
    File "e:\PFE\CoNLL2003\NERC\venv\lib\site-packages\keras\optimizers\utils.py", line 77, in filter_empty_gradients
        raise ValueError(

    ValueError: No gradients provided for any variable: (['conv1d_18/kernel:0', 'conv1d_18/bias:0', 'conv1d_19/kernel:0', 'conv1d_19/bias:0', 'dense_10/kernel:0', 'dense_10/bias:0', 'crf_9/kernel:0', 'crf_9/chain_kernel:0', 'crf_9/bias:0', 'crf_9/left_boundary:0', 'crf_9/right_boundary:0'],). Provided `grads_and_vars` is ((None, <tf.Variable 'conv1d_18/kernel:0' shape=(3, 100, 64) dtype=float32>), (None, <tf.Variable 'conv1d_18/bias:0' shape=(64,) dtype=float32>), (None, <tf.Variable 'conv1d_19/kernel:0' shape=(3, 64, 32) dtype=float32>), (None, <tf.Variable 'conv1d_19/bias:0' shape=(32,) dtype=float32>), (None, <tf.Variable 'dense_10/kernel:0' shape=(32, 9) dtype=float32>), (None, <tf.Variable 'dense_10/bias:0' shape=(9,) dtype=float32>), (None, <tf.Variable 'crf_9/kernel:0' shape=(9, 9) dtype=float32>), (None, <tf.Variable 'crf_9/chain_kernel:0' shape=(9, 9) dtype=float32>), (None, <tf.Variable 'crf_9/bias:0' shape=(9,) dtype=float32>), (None, <tf.Variable 'crf_9/left_boundary:0' shape=(9,) dtype=float32>), (None, <tf.Variable 'crf_9/right_boundary:0' shape=(9,) dtype=float32>)).


In [20]:
help(crf_layer)

Help on CRF in module keras_contrib.layers.crf object:

class CRF(keras.engine.base_layer.Layer)
 |  CRF(units, learn_mode='join', test_mode=None, sparse_target=False, use_boundary=True, use_bias=True, activation='linear', kernel_initializer='glorot_uniform', chain_initializer='orthogonal', bias_initializer='zeros', boundary_initializer='zeros', kernel_regularizer=None, chain_regularizer=None, boundary_regularizer=None, bias_regularizer=None, kernel_constraint=None, chain_constraint=None, boundary_constraint=None, bias_constraint=None, input_dim=None, unroll=False, **kwargs)
 |  
 |  An implementation of linear chain conditional random field (CRF).
 |  
 |  An linear chain CRF is defined to maximize the following likelihood function:
 |  
 |  $$ L(W, U, b; y_1, ..., y_n) := rac{1}{Z}
 |  \sum_{y_1, ..., y_n} \exp(-a_1' y_1 - a_n' y_n
 |      - \sum_{k=1^n}((f(x_k' W + b) y_k) + y_1' U y_2)), $$
 |  
 |  where:
 |      $Z$: normalization constant
 |      $x_k, y_k$:  inputs and outputs

In [None]:
def myLoss(y_true:tf.Tensor, y_pred):
    print("ypred", y_pred.shape)
    for i in range(y_true.shape[1]):
        for j in range(y_true.shape[2]):
            for k in range(0, 9):
                print(y_true[i, j, k])
    return 0

def myAccuracy(y_true, y_pred):
    print(y_pred)
    print(y_true)
    return 0

In [None]:
import numpy as np
import tensorflow as tf
from keras.metrics import Metric
from keras.losses import Loss

class CRFLoss(Loss):
    def __init__(self, num_labels):
        super(CRFLoss, self).__init__()
        self.num_labels = num_labels

    def call(self, y_true, y_pred):
        log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(y_pred, tf.cast(y_true, tf.int32), np.ones([self.num_labels], dtype=np.float32))
        loss = tf.reduce_mean(-log_likelihood)
        return loss

class CRFAccuracy(Metric):
    def __init__(self, num_labels):
        super(CRFAccuracy, self).__init__()
        self.num_labels = num_labels
        self.total = self.add_weight('total', initializer='zeros')
        self.count = self.add_weight('count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.cast(tf.argmax(y_pred, axis=-1), tf.int32)
        correct_predictions = tf.cast(tf.equal(y_true, y_pred), tf.float32)
        mask = tf.not_equal(y_true, 0)
        mask = tf.cast(mask, tf.float32)
        correct_predictions *= mask
        self.total.assign_add(tf.reduce_sum(correct_predictions))
        self.count.assign_add(tf.reduce_sum(mask))

    def result(self):
        return self.total / self.count

    def reset_states(self):
        self.total.assign(0)
        self.count.assign(0)


In [None]:
from keras_contrib.losses import crf_loss
from keras_contrib.metrics import crf_accuracy

model.compile(optimizer='adam', loss=crf_loss, metrics=[crf_accuracy])
# model.compile(optimizer='adam', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy])
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
def my_loss_fn(y_true, y_pred):
    # squared_difference = tf.square(y_true - y_pred)
    # return tf.reduce_mean(squared_difference, axis=-1)  # Note the `axis=-1`
    return 0.0988
def my_metric_fn(y_true, y_pred):
    # squared_difference = tf.square(y_true - y_pred)
    return 0.009  # Note the `axis=-1`
model.compile(optimizer='adam', loss=my_loss_fn, metrics=[my_metric_fn])

In [16]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Define the model architecture
model = Sequential()
model.add(LSTM(128, input_shape=(MAX_LENGTH, OUTPUT_DIM), return_sequences=True))
model.add(LSTM(64, return_sequences=True))
model.add(Dense(9, activation='softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 200, 128)          117248    
                                                                 
 lstm_1 (LSTM)               (None, 200, 64)           49408     
                                                                 
 dense (Dense)               (None, 200, 9)            585       
                                                                 
Total params: 167,241
Trainable params: 167,241
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train.x, train.y, epochs=10, batch_size=32)

In [None]:
# Evaluation
loss, accuracy = model.evaluate(test.x, test.y, batch_size=32)

print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

# Noisy 

In [None]:
# from keras_contrib.layers import CRF

# word2vec_model = train.word2vec()

# inputs = tf.keras.layers.Input(shape=(max_length, embedding_dim), dtype=tf.float32, name='sequence_input')
# conv1D = tf.keras.layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(inputs)
# maxPooling1D = tf.keras.layers.MaxPooling1D(pool_size=2)(conv1D)
# outputs = tf.keras.layers.Dense(num_classes, activation='relu')(maxPooling1D)
# base = tf.keras.Model(inputs=inputs, outputs=outputs)

# crf_layer = CRF(num_classes, sparse_target=False)
# model = crf_layer(base)

# model.summary()

In [None]:
# model = tf.keras.Sequential([
#   tf.keras.layers.Input(shape=(max_length, 100), dtype=tf.float32, name='sequence_input'),
#   tf.keras.layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'),
#   tf.keras.layers.MaxPooling1D(pool_size=2),
#   tf.keras.layers.Dense(num_classes, activation='relu'),
#   tf.keras.layers.Flatten(),
#   tf.keras.layers.Dense(num_classes, activation='softmax')
# ])

# model.summary()

In [None]:
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# model.compile(loss=crf_layer.loss_function, optimizer='adam', metrics=[crf_layer.accuracy])


In [None]:
# # Trainning
# batch_size = 56
# num_epochs = 5

# model.fit(train.x, train.y, epochs=num_epochs, batch_size=batch_size)

# # # Evaluation
# # loss, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size)

# # print('Test Loss:', loss)
# # print('Test Accuracy:', accuracy)

In [None]:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(113, 100)))
# model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
# model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu'))
# model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
# model.add(tf.keras.layers.Flatten())
# model.add(tf.keras.layers.Dense(units=128, activation='relu'))
# model.add(tf.keras.layers.Dense(units=9, activation='softmax'))

# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# model.fit(train.x, train.y, epochs=10, validation_split=0.2)

In [None]:
# # # Compile the model
# # model.compile(optimizer='adam', loss=tfa.losses.SigmoidFocalCrossEntropy(), metrics=[tfa.metrics.F1Score(num_classes=9, threshold=0.5, dtype='float32')])
# import tensorflow_addons as tfa
# # no need to specify a loss for CRFModel, model will compute crf loss by itself
# # model.compile(optimizer=tf.keras.optimizers.Adam(3e-4), metrics=['acc'])
# model.compile(
#     optimizer=tf.keras.optimizers.Adam(3e-4),
#     loss="categorical_crossentropy",
#     metrics=[tfa.metrics.MultiLabelConfusionMatrix(num_classes=num_classes)]
# )

In [None]:
        # vocab_size = len(word2vec_model.wv)
        # embedding_dim = 100
        # embedding_matrix = np.zeros((vocab_size, embedding_dim))
        # for i, vec in enumerate(word2vec_model.wv):
        #     embedding_matrix[i] = vec
        # embedding_layer = Embedding(
        #     input_dim=vocab_size,
        #     output_dim=embedding_dim,
        #     weights=[embedding_matrix],
        #     trainable=False)
        # return embedding_layer


# class DataSet():
#     def __init__(self):
#         self.labels = {"ner_tags": set(), "pos_tags": set(), "chunk_tags": set()}
#         self.word2vec_model = None
#     def unique_values(self, index = "ner_tags"):
#         if self.labels[index].__len__() > 0:
#             return dict(zip(self.labels[index], range(0, len(self.labels[index]))))
#         raise KeyError("Key does not exist !!!")