## Import

In [1]:
# !git clone https://github.com/taslimamindia/NERC.git

In [2]:
# from nltk import download
# download('wordnet')
# download('stopwords')

In [3]:
from nerc.loading import Loading
from nerc.word2vec import Model_Word2Vec
from nerc.cnn import Model_CNN
from nerc.functions import checkDataset

In [4]:
# path = "/content/m/Data/conll2003_english/"
path = "E:/PFE/CoNLL2003/NERC/Data/conll2003_english/"
load_train = Loading(path + "train.txt")
load_test = Loading(path + "test.txt")
load_valid = Loading(path + "valid.txt")
train, test, valid = load_train.data, load_test.data, load_valid.data
data = train + test + valid

In [5]:
data.VOCAB_SIZE = 300
data.PADDING_SIZE = 10
data.EPOCHS = 2

In [6]:
w2v = Model_Word2Vec(data.sentences, data.VOCAB_SIZE)

## CNN


In [7]:
model = Model_CNN(data, w2v)
model.change(max_length=50, vocab_size=300, padding_size=10)
model.preprocessing()
model.vectorization()

### Model without validSet

In [8]:
model.train_test_split()
checkDataset(train=model.train, test=model.test)

X_train (152542, 10, 300) Features_train (152542, 7) y_train (152542, 9) 

X_test (38136, 10, 300) Features_test (38136, 7) y_test (38136, 9) 



#### Architecture Word2Vec

In [9]:
model.architecture_word2vec()
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 300)]         0         
                                                                 
 conv1d (Conv1D)             (None, 10, 64)            57664     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 5, 64)            0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 5, 64)             0         
                                                                 
 conv1d_1 (Conv1D)           (None, 5, 32)             6176      
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 3, 32)            0         
 1D)                                                         

In [10]:
model.training(model.train.x, model.train.y)

Epoch 1/2
Epoch 2/2


In [11]:
model.predicting(model.test.x, model.test.y)

----------------------- Evaluation -------------------------
(38136, 9)
34725 38136
2538 7702 10240 0.248 0.752



#### Architecture Word2Vec and Features

In [12]:
model.architecture_word2vec_features()
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 10, 300)]    0           []                               
                                                                                                  
 conv1d_2 (Conv1D)              (None, 10, 64)       57664       ['input_1[0][0]']                
                                                                                                  
 max_pooling1d_2 (MaxPooling1D)  (None, 5, 64)       0           ['conv1d_2[0][0]']               
                                                                                                  
 dropout_2 (Dropout)            (None, 5, 64)        0           ['max_pooling1d_2[0][0]']        
                                                                                            

In [13]:
model.training([model.train.x, model.train.features], model.train.y)

Epoch 1/2
Epoch 2/2


In [14]:
model.predicting([model.test.x, model.test.features], model.test.y)

----------------------- Evaluation -------------------------
(38136, 9)
31044 38136
2936 7304 10240 0.287 0.713



### Model with validSet

In [15]:
model.train_test_valid_split()
checkDataset(train=model.train, test=model.test, valid=model.valid)

X_train (129660, 10, 300) Features_train (129660, 7) y_train (129660, 9) 

X_test (38136, 10, 300) Features_test (38136, 7) y_test (38136, 9) 

X_valid (22882, 10, 300) Features_valid (22882, 7) y_valid (22882, 9)


#### Architecture Word2Vec

In [16]:
model.architecture_word2vec()
model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 300)]         0         
                                                                 
 conv1d_4 (Conv1D)           (None, 10, 64)            57664     
                                                                 
 max_pooling1d_4 (MaxPooling  (None, 5, 64)            0         
 1D)                                                             
                                                                 
 dropout_4 (Dropout)         (None, 5, 64)             0         
                                                                 
 conv1d_5 (Conv1D)           (None, 5, 32)             6176      
                                                                 
 max_pooling1d_5 (MaxPooling  (None, 3, 32)            0         
 1D)                                                       

In [17]:
model.training_valid(model.train.x, model.train.y, model.valid.x, model.valid.y)

Epoch 1/2
Epoch 2/2


In [18]:
model.predicting(model.test.x, model.test.y)

----------------------- Evaluation -------------------------
(38136, 9)
34897 38136
2443 7574 10017 0.244 0.756



#### Architecture Word2Vec and Features

In [19]:
model.architecture_word2vec_features()
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 10, 300)]    0           []                               
                                                                                                  
 conv1d_6 (Conv1D)              (None, 10, 64)       57664       ['input_1[0][0]']                
                                                                                                  
 max_pooling1d_6 (MaxPooling1D)  (None, 5, 64)       0           ['conv1d_6[0][0]']               
                                                                                                  
 dropout_6 (Dropout)            (None, 5, 64)        0           ['max_pooling1d_6[0][0]']        
                                                                                            

In [20]:
model.training_valid([model.train.x, model.train.features], model.train.y, [model.valid.x, model.valid.features], model.valid.y)

Epoch 1/2
Epoch 2/2


In [21]:
model.predicting([model.test.x, model.test.features], model.test.y)

----------------------- Evaluation -------------------------
(38136, 9)
33374 38136
1308 8709 10017 0.131 0.869

