In [0]:
import numpy as np
from validation import compute_f1
from keras.models import Model
from keras.layers import TimeDistributed, Conv1D, Dense, Embedding, Input, Dropout, LSTM, Bidirectional, MaxPooling1D, \
    Flatten, concatenate
from preprocess import readfile, createBatches, createMatrices, iterate_minibatches, addCharInformatioin, padding
from keras.utils import Progbar
from keras.preprocessing.sequence import pad_sequences
from keras.initializers import RandomUniform
from keras.utils import plot_model

from gensim.models import Word2Vec, KeyedVectors


epochs = 120
DOMAIN_TRAIN_NER = 'ner_1'
DOMAIN_TRAIN_NER_1 = 'ner'
PATH_SAVE_MODEL = "models/model_" + DOMAIN_TRAIN_NER + ".h5"
PATH_SAVE_IDX2LABEL = "models/idx2label_" + DOMAIN_TRAIN_NER + ".npy"
PATH_SAVE_W2I = "models/word2Idx_" + DOMAIN_TRAIN_NER + ".npy"

PATH_TRAIN = "data/" + DOMAIN_TRAIN_NER_1 + "_train.txt"
PATH_TEST = "data/" + DOMAIN_TRAIN_NER_1 + "_test.txt"
PATH_DEV = "data/" + DOMAIN_TRAIN_NER_1 + "_valid.txt"


def tag_dataset(dataset):
    correctLabels = []
    predLabels = []
    b = Progbar(len(dataset))
    for i, data in enumerate(dataset):
        tokens, casing, char, labels = data
        tokens = np.asarray([tokens])
        casing = np.asarray([casing])
        char = np.asarray([char])
        pred = model.predict([tokens, casing, char], verbose=False)[0]
        pred = pred.argmax(axis=-1)  # Predict the classes
        correctLabels.append(labels)
        predLabels.append(pred)
        b.update(i)
    b.update(i + 1)
    return predLabels, correctLabels


trainSentences = readfile(PATH_TRAIN)
devSentences = readfile(PATH_DEV)
testSentences = readfile(PATH_TEST)

trainSentences = addCharInformatioin(trainSentences)
devSentences = addCharInformatioin(devSentences)
testSentences = addCharInformatioin(testSentences)

labelSet = set()
words = {}

for dataset in [trainSentences, devSentences, testSentences]:
    for sentence in dataset:
        for token, char, label in sentence:
            labelSet.add(label)
            words[token.lower()] = True

print("-------labelSet-----------")
print(labelSet)

# :: Create a mapping for the labels ::
label2Idx = {}
for label in labelSet:
    label2Idx[label] = len(label2Idx)

# print("------label2Idx--------")
# print(label2Idx)

# :: Hard coded case lookup ::
case2Idx = {'numeric': 0, 'allLower': 1, 'allUpper': 2, 'initialUpper': 3, 'other': 4, 'mainly_numeric': 5,
            'contains_digit': 6, 'PADDING_TOKEN': 7}
caseEmbeddings = np.identity(len(case2Idx), dtype='float32')

# print("-------caseEmbeddings------")
# print(caseEmbeddings)

# :: Read in word embeddings ::
word2Idx = {}
# wordEmbeddings = []


model_1 = KeyedVectors.load_word2vec_format("/content/drive/My Drive/word2vec.model.bin", binary=True)
words1 = list(model_1.wv.vocab)

# print("shape1: " + str(model_1.wv.syn0.shape))

wordEmbeddings = model_1.wv.syn0
# print("shape2: " + str(wordEmbeddings.shape))

if len(word2Idx) == 0:  # Add padding+unknown
    word2Idx["PADDING_TOKEN"] = len(word2Idx)
    word2Idx["UNKNOWN_TOKEN"] = len(word2Idx)

    word2Idx["PADDING_TOKEN"] = len(word2Idx)
    vector = np.zeros(300)  # Zero vector vor 'PADDING' word
    wordEmbeddings = np.insert(wordEmbeddings, 0, vector, axis=0)

    word2Idx["UNKNOWN_TOKEN"] = len(word2Idx)
    vector = np.random.uniform(-0.25, 0.25, 300)
    wordEmbeddings = np.insert(wordEmbeddings, 0, vector, axis=0)

# print("shape3: " + str(wordEmbeddings.shape))

for index, word in enumerate(words1):
    i = index + 2
    word2Idx[word] = len(word2Idx)

# print("------wordEmbeddings-----")
# print(wordEmbeddings)
#
# print("------word2Idx-----")
# print(word2Idx)

char2Idx = {"PADDING": 0, "UNKNOWN": 1}
for c in " 0123456789aáàạảãăắằặẳẵâấầậẫẩbcdđeéèẹẻẽêếềệểễfghiíìịĩỉjklmnoóòọỏõôốồổỗộơớờợỡởpqrstuúùụũủưứừửựữvwxyýỳỵỹỷzAÁÀẠẢÃĂẮẰẶẲẴÂẤẦẬẪẨBCDĐEÉÈẸẺẼÊẾỀỆỂỄFGHIÍÌỊĨỈJKLMNOÓÒỌỎÕÔỐỒỔỖỘƠỚỜỢỠỞPQRSTUÚÙỤŨỦƯỨỪỬỰỮVWXYÝỲỴỸỶZ.,-_()[]{}!?:;#'\"/\\%$`&=*+@^~|":
    char2Idx[c] = len(char2Idx)

# print("------char2Idx-------")
# print(char2Idx)

train_set = padding(createMatrices(trainSentences, word2Idx, label2Idx, case2Idx, char2Idx))
dev_set = padding(createMatrices(devSentences, word2Idx, label2Idx, case2Idx, char2Idx))
test_set = padding(createMatrices(testSentences, word2Idx, label2Idx, case2Idx, char2Idx))

# print('-------devSentences-------')
# print(devSentences[3][0])
#
# print('---------dev_set---------')
# print(dev_set[3])
# print('len: ' + str(len(dev_set[3])))
#
# print('*****')
# print(dev_set[3][0])
# print("len: " + str(len(dev_set[3])))
#
# print("***")
# print(dev_set[3][1])
# print("len: " + str(len(dev_set[3][1])))

idx2Label = {v: k for k, v in label2Idx.items()}
np.save(PATH_SAVE_IDX2LABEL, idx2Label)
np.save(PATH_SAVE_W2I, word2Idx)

train_batch, train_batch_len = createBatches(train_set)
dev_batch, dev_batch_len = createBatches(dev_set)
test_batch, test_batch_len = createBatches(test_set)

words_input = Input(shape=(None,), dtype='int32', name='words_input')
words = Embedding(input_dim=wordEmbeddings.shape[0], output_dim=wordEmbeddings.shape[1], weights=[wordEmbeddings],
                  trainable=False)(words_input)
casing_input = Input(shape=(None,), dtype='int32', name='casing_input')
casing = Embedding(output_dim=caseEmbeddings.shape[1], input_dim=caseEmbeddings.shape[0], weights=[caseEmbeddings],
                   trainable=False)(casing_input)
character_input = Input(shape=(None, 52,), name='char_input')
embed_char_out = TimeDistributed(
    Embedding(len(char2Idx), 30, embeddings_initializer=RandomUniform(minval=-0.5, maxval=0.5)), name='char_embedding')(
    character_input)
dropout = Dropout(0.5)(embed_char_out)
conv1d_out = TimeDistributed(Conv1D(kernel_size=3, filters=30, padding='same', activation='tanh', strides=1))(dropout)
maxpool_out = TimeDistributed(MaxPooling1D(52))(conv1d_out)
char = TimeDistributed(Flatten())(maxpool_out)
char = Dropout(0.5)(char)
output = concatenate([words, casing, char])
output = Bidirectional(LSTM(200, return_sequences=True, dropout=0.65, recurrent_dropout=0.45))(output)
output = TimeDistributed(Dense(len(label2Idx), activation='softmax'))(output)
model = Model(inputs=[words_input, casing_input, character_input], outputs=[output])
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop')  # nadam
model.summary()

# plot_model(model, to_file='model.png')


for epoch in range(epochs):
    print("Epoch %d/%d" % (epoch, epochs))
    a = Progbar(len(train_batch_len))
    for i, batch in enumerate(iterate_minibatches(train_batch, train_batch_len)):
        labels, tokens, casing, char = batch
        model.train_on_batch([tokens, casing, char], labels)
        a.update(i)
    a.update(i + 1)
    print(' ')
    #   Performance on dev dataset
    predLabels, correctLabels = tag_dataset(train_batch)
    pre_train, rec_train, f1_train = compute_f1(predLabels, correctLabels, idx2Label)
    print("Train-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_train, rec_train, f1_train))

    #   Performance on dev dataset
    predLabels, correctLabels = tag_dataset(dev_batch)
    pre_dev, rec_dev, f1_dev = compute_f1(predLabels, correctLabels, idx2Label)
    print("Dev-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_dev, rec_dev, f1_dev))

    #   Performance on test dataset
    predLabels, correctLabels = tag_dataset(test_batch)
    pre_test, rec_test, f1_test = compute_f1(predLabels, correctLabels, idx2Label)
    print("Test-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_test, rec_test, f1_test))

model.save(PATH_SAVE_MODEL)

#   Performance on dev dataset
predLabels, correctLabels = tag_dataset(dev_batch)
pre_dev, rec_dev, f1_dev = compute_f1(predLabels, correctLabels, idx2Label)
print("Dev-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_dev, rec_dev, f1_dev))

#   Performance on test dataset
predLabels, correctLabels = tag_dataset(test_batch)
pre_test, rec_test, f1_test = compute_f1(predLabels, correctLabels, idx2Label)
print("Test-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_test, rec_test, f1_test))


# Weather, rmsprop, 0.65 0.45
# Test-Data: Prec: 0.868, Rec: 0.853, F1: 0.861,
# Dev-Data: Prec: 0.808, Rec: 0.737, F1: 0.771

# event, 0.65 0.45, rmsprop
# Dev-Data: Prec: 0.919, Rec: 0.910, F1: 0.915
# Test-Data: Prec: 0.943, Rec: 0.921, F1: 0.931



"""
all ner: lưu vào ner
Test-Data: Prec: 0.931, Rec: 0.929, F1: 0.930
Dev-Data: Prec: 0.955, Rec: 0.962, F1: 0.958
Train-Data: Prec: 0.992, Rec: 0.993, F1: 0.992
"""

"""
Thêm data alarm lưu vào ner_1 epouch 120
Dev-Data: Prec: 0.953, Rec: 0.948, F1: 0.950
Test-Data: Prec: 0.945, Rec: 0.941, F1: 0.943
Train-Data: Prec: 0.995, Rec: 0.994, F1: 0.994
"""


-------labelSet-----------
{'I-EVENT', 'B-TIME', 'B-LOC', 'I-TIME', 'I-LOC', 'I-SITE', 'B-SITE', 'O', 'B-EVENT'}


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
char_input (InputLayer)         (None, None, 52)     0                                            
__________________________________________________________________________________________________
char_embedding (TimeDistributed (None, None, 52, 30) 6870        char_input[0][0]                 
__________________________________________________________________________________________________
dropout_3 (Dropout)             (None, None, 52, 30) 0           char_embedding[0][0]             
__________________________________________________________________________________________________
time_distributed_5 (TimeDistrib (None, None, 52, 30) 2730        dropout_3[0][0]                  
____________________________________________________________________________________________

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


 
Train-Data: Prec: 0.579, Rec: 0.329, F1: 0.420
Dev-Data: Prec: 0.712, Rec: 0.493, F1: 0.582
Test-Data: Prec: 0.493, Rec: 0.309, F1: 0.380
Epoch 1/120
 
Train-Data: Prec: 0.672, Rec: 0.561, F1: 0.612
Dev-Data: Prec: 0.763, Rec: 0.683, F1: 0.720
Test-Data: Prec: 0.582, Rec: 0.522, F1: 0.550
Epoch 2/120
 
Train-Data: Prec: 0.690, Rec: 0.613, F1: 0.649
Dev-Data: Prec: 0.802, Rec: 0.729, F1: 0.764
Test-Data: Prec: 0.608, Rec: 0.574, F1: 0.591
Epoch 3/120
 
Train-Data: Prec: 0.773, Rec: 0.679, F1: 0.723
Dev-Data: Prec: 0.822, Rec: 0.747, F1: 0.783
Test-Data: Prec: 0.699, Rec: 0.628, F1: 0.662
Epoch 4/120
 
Train-Data: Prec: 0.783, Rec: 0.753, F1: 0.768
Dev-Data: Prec: 0.830, Rec: 0.811, F1: 0.820
Test-Data: Prec: 0.719, Rec: 0.753, F1: 0.736
Epoch 5/120
 
Train-Data: Prec: 0.816, Rec: 0.790, F1: 0.803
Dev-Data: Prec: 0.850, Rec: 0.825, F1: 0.837
Test-Data: Prec: 0.764, Rec: 0.783, F1: 0.773
Epoch 6/120
 
Train-Data: Prec: 0.824, Rec: 0.829, F1: 0.826
Dev-Data: Prec: 0.845, Rec: 0.836, F1: 

'\nThêm data alarm lưu vào ner_1 epouch 120\nDev-Data: Prec: 0.953, Rec: 0.948, F1: 0.950\nTest-Data: Prec: 0.945, Rec: 0.941, F1: 0.943\nTrain-Data: Prec: 0.995, Rec: 0.994, F1: 0.994\n'

In [0]:
!pip install tensorflow==1.14.0

Collecting tensorflow==1.14.0
[?25l  Downloading https://files.pythonhosted.org/packages/de/f0/96fb2e0412ae9692dbf400e5b04432885f677ad6241c088ccc5fe7724d69/tensorflow-1.14.0-cp36-cp36m-manylinux1_x86_64.whl (109.2MB)
[K     |████████████████████████████████| 109.2MB 94kB/s 
Collecting tensorflow-estimator<1.15.0rc0,>=1.14.0rc0
[?25l  Downloading https://files.pythonhosted.org/packages/3c/d5/21860a5b11caf0678fbc8319341b0ae21a07156911132e0e71bffed0510d/tensorflow_estimator-1.14.0-py2.py3-none-any.whl (488kB)
[K     |████████████████████████████████| 491kB 31.1MB/s 
Collecting tensorboard<1.15.0,>=1.14.0
[?25l  Downloading https://files.pythonhosted.org/packages/91/2d/2ed263449a078cd9c8a9ba50ebd50123adf1f8cfbea1492f9084169b89d9/tensorboard-1.14.0-py3-none-any.whl (3.1MB)
[K     |████████████████████████████████| 3.2MB 42.0MB/s 
Installing collected packages: tensorflow-estimator, tensorboard, tensorflow
  Found existing installation: tensorflow-estimator 2.2.0
    Uninstalling tenso

In [0]:
!pip install keras==2.2.5

Collecting keras==2.2.5
[?25l  Downloading https://files.pythonhosted.org/packages/f8/ba/2d058dcf1b85b9c212cc58264c98a4a7dd92c989b798823cc5690d062bb2/Keras-2.2.5-py2.py3-none-any.whl (336kB)
[K     |████████████████████████████████| 337kB 3.4MB/s 
Installing collected packages: keras
  Found existing installation: Keras 2.3.1
    Uninstalling Keras-2.3.1:
      Successfully uninstalled Keras-2.3.1
Successfully installed keras-2.2.5


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
!ls "/content/drive/My Drive/"

 005EbJRDgy1fj6ai8dge7j30kf10b7di.jpg
 005GFLHugy1fjusfnfxnkj33rm2nfe87.jpg
 005Kr1Opgy1fk2z9bas4zj30k00zkqa0.jpg
 005Kr1Opgy1fk2z9brvl7j30k00zk0zz.jpg
 005tgyivgy1fjjlg3yk6uj32e42e4npg.jpg
 006cwmYJgy1fj38vrcffej30qt12u7bp.jpg
 006cwmYJgy1fj38vtjrh7j30ow130dm2.jpg
 006iKZ3fly1fjfnkz8bkfj30pr0ehwgt.jpg
 006iKZ3fly1fjfnle2pn7j30rb0fdmzv.jpg
 006iKZ3fly1fjfnluvj1aj30pa0e876k.jpg
 006iKZ3fly1fjfnlvy1byj30pa0e8tb1.jpg
 006iKZ3fly1fjfnlwd4d2j30pn0efgnu.jpg
 006iKZ3fly1fjfnlwqadhj30pr0ehwgq.jpg
 006LlTgdly1fjus0ol66nj30qo141tf4.jpg
 006LlTgdly1fjus1e5dy0j328z3dgu0z.jpg
 006LveXAly1fj21b6zljpj317d1zjnpe.jpg
 006LveXAly1fjusm1rqbpj31n32gg7wk.jpg
 006oGXVply1fjx4lxxxuaj31kw11xhdv.jpg
 006oGXVply1fjx5ua97rkj31jk2bc4qs.jpg
 006oGXVply1fkeh8yh7xij31jk2bcqv7.jpg
 006XQbxQgy1fk6c4etc9yj31jk2bcnpf.jpg
'00-Gioi thieu ve khoa hoc-IT1110.pdf'
 00_NEU_PHH_Mucluc_v1.0013104209.pdf
 010-Introduction.pdf
'01a_share_import_prog_courses_2016-11-01-HUST-Vietnam (1).xls'
 01a_share_import_prog_courses_2016-11-0

In [0]:
! pip install pyspark


Collecting pyspark
[?25l  Downloading https://files.pythonhosted.org/packages/9a/5a/271c416c1c2185b6cb0151b29a91fff6fcaed80173c8584ff6d20e46b465/pyspark-2.4.5.tar.gz (217.8MB)
[K     |████████████████████████████████| 217.8MB 65kB/s 
[?25hCollecting py4j==0.10.7
[?25l  Downloading https://files.pythonhosted.org/packages/e3/53/c737818eb9a7dc32a7cd4f1396e787bd94200c3997c72c1dbe028587bd76/py4j-0.10.7-py2.py3-none-any.whl (197kB)
[K     |████████████████████████████████| 204kB 52.4MB/s 
[?25hBuilding wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-2.4.5-py2.py3-none-any.whl size=218257927 sha256=ff21868699ae2a0fc491158847e5456817c73e116ccb8f22fcf0c12de41ceb53
  Stored in directory: /root/.cache/pip/wheels/bf/db/04/61d66a5939364e756eb1c1be4ec5bdce6e04047fc7929a3c3c
Successfully built pyspark
Installing collected packages: py4j, pyspark
Successfully installed py4j-0.10.7 pyspark-2.4.5


In [0]:
! spark-submit --executor-memory 1g movie-similarities-1m.py 260

20/03/31 08:00:01 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
  File "/content/movie-similarities-1m.py", line 18
    def makePairs((user, ratings)):
                  ^
SyntaxError: invalid syntax
log4j:WARN No appenders could be found for logger (org.apache.spark.util.ShutdownHookManager).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.


In [0]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null


In [0]:

!wget -q https://www-us.apache.org/dist/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz


In [0]:
!tar xf spark-2.4.1-bin-hadoop2.7.tgz


tar: spark-2.4.1-bin-hadoop2.7.tgz: Cannot open: No such file or directory
tar: Error is not recoverable: exiting now


In [0]:
!pip install -q findspark

In [0]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://www-us.apache.org/dist/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz
!tar xf spark-2.4.1-bin-hadoop2.7.tgz
!pip install -q findspark

tar: spark-2.4.1-bin-hadoop2.7.tgz: Cannot open: No such file or directory
tar: Error is not recoverable: exiting now


In [0]:
#for the most recent update on 02/29/2020

!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://www-us.apache.org/dist/spark/spark-3.0.0-preview2/spark-3.0.0-preview2-bin-hadoop3.2.tgz
!tar -xvf spark-3.0.0-preview2-bin-hadoop3.2.tgz
!pip install -q findspark
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-3.0.0-preview2-bin-hadoop3.2"

ERROR! Session/line number was not unique in database. History logging moved to new session 59
spark-3.0.0-preview2-bin-hadoop3.2/
spark-3.0.0-preview2-bin-hadoop3.2/data/
spark-3.0.0-preview2-bin-hadoop3.2/data/streaming/
spark-3.0.0-preview2-bin-hadoop3.2/data/streaming/AFINN-111.txt
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/sample_binary_classification_data.txt
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/sample_kmeans_data.txt
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/sample_multiclass_classification_data.txt
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/sample_lda_libsvm_data.txt
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/iris_libsvm.txt
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/pagerank_data.txt
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/sample_linear_regression_data.txt
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/pic_data.txt
spark-3.0.0-preview2-bin-hadoop3.2/data/mllib/als/
spark-3.0.0-preview2-bin-hadoop3.2/d

In [0]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.3-bin-hadoop2.7"

In [0]:
! spark-submit --executor-memory 1g movie-similarities-1m.py 260

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
20/03/31 09:06:28 INFO PythonRunner: Times: total = 53, boot = -20203, init = 20210, finish = 46
20/03/31 09:06:28 INFO Executor: Finished task 0.0 in stage 2.0 (TID 102). 1851 bytes result sent to driver
20/03/31 09:06:28 INFO TaskSetManager: Starting task 2.0 in stage 2.0 (TID 104, 71718670bcdf, executor driver, partition 2, NODE_LOCAL, 7143 bytes)
20/03/31 09:06:28 INFO Executor: Running task 2.0 in stage 2.0 (TID 104)
20/03/31 09:06:28 INFO ShuffleBlockFetcherIterator: Getting 100 (21.1 MiB) non-empty blocks including 100 (21.1 MiB) local and 0 (0.0 B) host-local and 0 (0.0 B) remote blocks
20/03/31 09:06:28 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
20/03/31 09:06:28 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 102) in 8215 ms on 71718670bcdf (executor driver) (1/100)
20/03/31 09:06:28 INFO PythonRunner: Times: total = 8283, boot = -20001, init = 20008, finish = 8276
20/03/31 09:06

In [0]:
! spark-submit local.py 260

20/03/31 10:33:23 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
20/03/31 10:33:24 INFO SparkContext: Running Spark version 3.0.0-preview2
20/03/31 10:33:24 INFO ResourceUtils: Resources for spark.driver:

20/03/31 10:33:24 INFO SparkContext: Submitted application: MovieSimilarities
20/03/31 10:33:24 INFO SecurityManager: Changing view acls to: root
20/03/31 10:33:24 INFO SecurityManager: Changing modify acls to: root
20/03/31 10:33:24 INFO SecurityManager: Changing view acls groups to: 
20/03/31 10:33:24 INFO SecurityManager: Changing modify acls groups to: 
20/03/31 10:33:24 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(root); groups with view permissions: Set(); users  with modify permissions: Set(root); groups with modify permissions: Set()
20/03/31 10:33:25

In [0]:
import os
import io
import numpy
import pandas as pd
with open('true.txt', 'r',encoding='utf8') as f:
#     data = [list( row for row in f.read().split('\n\n'))]
#     data = [list(map(str, row.split())) for row in f.read().split('\n\n')]
    dataa = [ row for row in f.read().split('\n\n')]

data = pd.DataFrame(dataa)
import re
import nltk
import gensim 
import lxml
import xml.etree.ElementTree
from nltk.corpus import stopwords
# nltk.download('stopswords')
REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
# BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
# STOPWORDS = set(stopwords.words('english'))
STOPWORDS=["cả" ]
data = data.reset_index(drop=True)
def clean_text(text):
    text = text.lower() # lowercase text
    text = REPLACE_BY_SPACE_RE.sub(' ', text) # replace REPLACE_BY_SPACE_RE symbols by space in text. substitute the matched string in REPLACE_BY_SPACE_RE with space.
#     text = BAD_SYMBOLS_RE.sub(' ', text) # remove symbols which are in BAD_SYMBOLS_RE from text. substitute the matched string in BAD_SYMBOLS_RE with nothing. 
#     text = text.replace('x', ' ')
    text = re.sub(r'\W+', ' ', text)
    text = ' '.join(word for word in text.split() if word not in STOPWORDS) # remove stopwors from text
    return text
def remove_tags(text):
    return ''.join(xml.etree.ElementTree.fromstring(text).itertext())
import re
def cleanPunc(sentence): #function to clean the word of any punctuation or special characters
    cleaned = re.sub(r'[?|!|\'|"|#]',r'',sentence)
#     cleaned = re.sub(r'[.|,|)|(|\|/]',r' ',cleaned)
    cleaned = re.sub(r'[,|)|(|\|/]',r' ',cleaned)
    cleaned = cleaned.strip()
    cleaned = cleaned.replace("\n"," ")
    return cleaned
def keepAlpha(sentence):
    alpha_sent = ""
    for word in sentence.split():
        alpha_word = re.sub('[^a-z A-Z]+', ' ', word)
        alpha_sent += alpha_word
        alpha_sent += " "
    alpha_sent = alpha_sent.strip()
    return alpha_sen
def cleanhtml(raw_html):
    cleanr = re.compile('<.*?>')
    cleantext = re.sub(cleanr, '', raw_html)
    cleantext = gensim.utils.simple_preprocess(cleantext) # xoa cac ki tu dac biet 
    return cleantext
data[0]=data[0].astype(str)

data[0] = data[0].str.replace(',',' ')
data[0] = data[0].str.replace('\n',' melllko ')
# data[0] = data[0].str.replace('.',' melllko ')
data[0] = data[0].str.replace('\t',' ')
# data['message'] = data['message'].apply(cleanhtml)
data[0] = data[0].apply(clean_text)
data[0] = data[0].apply(cleanPunc)
# data[0] = data[0].apply(remove_tags)
# data[0] = data[0].str.replace('\d+', ' ')
data

Unnamed: 0,0
0,hiệp bạn muốn mua thứ gì để ăn cùng với tôi kh...
1,hiệp bạn có muốn ăn tối với tôi không melllko ...
2,chào lan melllko chào melllko bạn dự tính làm ...
3,chris bạn đang đi đâu vậy melllko tôi đi cửa h...
4,laura bạn sẽ làm gì hôm nay melllko tôi sẽ đi ...
...,...
225,chiều ở cái quán mới khai trương á đoạn nhà ph...
226,xin chào melllko về việc lịch hẹn cho dự án sắ...
227,do thứ 5 tuần này cô có việc bận nên mình hủy ...
228,xin chào mình xin lịch gặp để phỏng vấn bạn và...


In [0]:
!pip install spacy



In [0]:
!pip install pyvi
!pip install https://github.com/trungtv/vi_spacy/raw/master/packages/vi_spacy_model-0.2.1/dist/vi_spacy_model-0.2.1.tar.gz

Collecting pyvi
[?25l  Downloading https://files.pythonhosted.org/packages/10/e1/0e5bc6b5e3327b9385d6e0f1b0a7c0404f28b74eb6db59a778515b30fd9c/pyvi-0.1-py2.py3-none-any.whl (8.5MB)
[K     |████████████████████████████████| 8.5MB 3.2MB/s 
Collecting sklearn-crfsuite
  Downloading https://files.pythonhosted.org/packages/25/74/5b7befa513482e6dee1f3dd68171a6c9dfc14c0eaa00f885ffeba54fe9b0/sklearn_crfsuite-0.3.6-py2.py3-none-any.whl
Collecting python-crfsuite>=0.8.3
[?25l  Downloading https://files.pythonhosted.org/packages/95/99/869dde6dbf3e0d07a013c8eebfb0a3d30776334e0097f8432b631a9a3a19/python_crfsuite-0.9.7-cp36-cp36m-manylinux1_x86_64.whl (743kB)
[K     |████████████████████████████████| 747kB 41.2MB/s 
[?25hInstalling collected packages: python-crfsuite, sklearn-crfsuite, pyvi
Successfully installed python-crfsuite-0.9.7 pyvi-0.1 sklearn-crfsuite-0.3.6
Collecting https://github.com/trungtv/vi_spacy/raw/master/packages/vi_spacy_model-0.2.1/dist/vi_spacy_model-0.2.1.tar.gz
[?25l  Do

In [0]:
import spacy 
# import vi_spacy_model
# nlp = vi_spacy_model.load()
def token(text):
    nlp=spacy.load('vi_spacy_model')
    doc=nlp(text)
    return doc.text
data[0] = data[0].apply(token)

In [0]:
#ls -la
data.to_pickle('tachtu')

In [0]:
data

In [0]:
data[3]

KeyError: ignored

NeuroNER

In [2]:
!pip install pyneuroner[gpu]


Collecting pyneuroner[gpu]
[?25l  Downloading https://files.pythonhosted.org/packages/19/cb/7fe87cdfe3f969078edebf3f41520efbc0d1d883e2b05a22b6a068593db1/pyneuroner-1.0.8-py2.py3-none-any.whl (26.9MB)
[K     |████████████████████████████████| 26.9MB 112kB/s 
Collecting pycorenlp>=0.3.0
  Downloading https://files.pythonhosted.org/packages/cd/40/e74eb4fc7906d630b73a84c9ae9d824f694bd4c5a1d727b8e18beadff613/pycorenlp-0.3.0.tar.gz
Collecting tensorflow-gpu>=1.0.0; extra == "gpu"
[?25l  Downloading https://files.pythonhosted.org/packages/31/bf/c28971266ca854a64f4b26f07c4112ddd61f30b4d1f18108b954a746f8ea/tensorflow_gpu-2.2.0-cp36-cp36m-manylinux2010_x86_64.whl (516.2MB)
[K     |████████████████████████████████| 516.2MB 30kB/s 
Building wheels for collected packages: pycorenlp
  Building wheel for pycorenlp (setup.py) ... [?25l[?25hdone
  Created wheel for pycorenlp: filename=pycorenlp-0.3.0-cp36-none-any.whl size=2143 sha256=a8bc7736914052335c24041b1bd9d720a10b25ffc666bd619bcee4cc4931a3

In [0]:
!python ./test/test_main.py

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
!pip install tensorflow==1.1.0

Collecting tensorflow==1.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/cd/e4/b2a8bcd1fa689489050386ec70c5c547e4a75d06f2cc2b55f45463cd092c/tensorflow-1.1.0-cp36-cp36m-manylinux1_x86_64.whl (31.4MB)
[K     |████████████████████████████████| 31.4MB 102kB/s 
Installing collected packages: tensorflow
  Found existing installation: tensorflow 2.2.0
    Uninstalling tensorflow-2.2.0:
      Successfully uninstalled tensorflow-2.2.0
Successfully installed tensorflow-1.1.0


In [0]:
# !python ./neuroner/__main__.py


{'character_embedding_dimension': 25,
 'character_lstm_hidden_state_dimension': 25,
 'check_for_digits_replaced_with_zeros': 1,
 'check_for_lowercase': 1,
 'dataset_text_folder': './data/conll2003/en',
 'debug': 0,
 'dropout_rate': 0.5,
 'experiment_name': 'test',
 'fetch_data': '',
 'fetch_trained_model': '',
 'freeze_token_embeddings': 0,
 'gradient_clipping_value': 5.0,
 'learning_rate': 0.005,
 'load_all_pretrained_token_embeddings': 0,
 'load_only_pretrained_token_embeddings': 0,
 'main_evaluation_mode': 'conll',
 'maximum_number_of_epochs': 100,
 'number_of_cpu_threads': 8,
 'number_of_gpus': 0,
 'optimizer': 'sgd',
 'output_folder': './output',
 'output_scores': 0,
 'parameters_filepath': './parameters.ini',
 'patience': 10,
 'plot_format': 'pdf',
 'pretrained_model_folder': './trained_models/conll_2003_en',
 'reload_character_embeddings': 1,
 'reload_character_lstm': 1,
 'reload_crf': 1,
 'reload_feedforward': 1,
 'reload_token_embeddings': 1,
 'reload_token_lstm': 1,
 'remap_u

In [0]:
# !neuroner --train_model=False --use_pretrained_model=True --dataset_text_folder=./data/example_unannotated_texts --pretrained_model_folder=./trained_models/conll_2003_en
!neuroner --output_folder=/content/drive/My\ Drive/NeuroNER-master/output --token_pretrained_embedding_filepath=/content/drive/My\ Drive/NeuroNER-master/data/word_vectors/glove.6B.100d.txt --parameters_filepath=/content/drive/My\ Drive/NeuroNER-master/neuroner/trained_models/conll_2003_en/parameters.ini --train_model=False --use_pretrained_model=True --dataset_text_folder=/content/drive/My\ Drive/NeuroNER-master/neuroner/data/example_unannotated_texts --pretrained_model_folder=/content/drive/My\ Drive/NeuroNER-master/neuroner/trained_models/conll_2003_en


{'character_embedding_dimension': 25,
 'character_lstm_hidden_state_dimension': 25,
 'check_for_digits_replaced_with_zeros': 1,
 'check_for_lowercase': 1,
 'dataset_text_folder': '/content/drive/My '
                        'Drive/NeuroNER-master/neuroner/data/example_unannotated_texts',
 'debug': 0,
 'dropout_rate': 0.5,
 'experiment_name': 'test',
 'fetch_data': '',
 'fetch_trained_model': '',
 'freeze_token_embeddings': 0,
 'gradient_clipping_value': 5.0,
 'learning_rate': 0.01,
 'load_all_pretrained_token_embeddings': 0,
 'load_only_pretrained_token_embeddings': 0,
 'main_evaluation_mode': 'conll',
 'maximum_number_of_epochs': 100,
 'number_of_cpu_threads': 8,
 'number_of_gpus': 0,
 'optimizer': 'sgd',
 'output_folder': '/content/drive/My Drive/NeuroNER-master/output',
 'output_scores': 0,
 'parameters_filepath': '/content/drive/My '
                        'Drive/NeuroNER-master/neuroner/trained_models/conll_2003_en/parameters.ini',
 'patience': 100,
 'plot_format': 'pdf',
 'pretr

In [0]:
!python drive/My\ Drive/NeuroNER-master/test/test_main.py

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [0]:
#############################################################################
# !neuroner --output_folder=/content/drive/My\ Drive/NeuroNER-master/output
#  --train_model=False --train_model=False --use_pretrained_model=True 
#  --dataset_text_folder=/content/drive/My\ Drive/NeuroNER-master/neuroner/data/example_unannotated_texts 
#  --token_embedding_dimension=300 --token_lstm_hidden_state_dimension=300 
#  --pretrained_model_folder=/content/drive/My\ Drive/NeuroNER-master/neuroner/trained_models/conll_2003_en 
#  --token_pretrained_embedding_filepath=/content/drive/My\ Drive/NeuroNER-master/data/word_vectors/glove.6B.100d.txt
# --tagging_format=bio
!neuroner  --learning_rate=0.05 --use_pretrained_model=False --maximum_number_of_epochs=120 --spacylanguage=vi_spacy_model --token_pretrained_embedding_filepath=/content/drive/My\ Drive/NeuroNER-master/data/word_vectors/glove.6B.100d.txt  --train_model=True --dataset_text_folder="/content/drive/My Drive/NeuroNER-master/neuroner/data/example_unannotated_texts" --output_folder="./output"

{'character_embedding_dimension': 25,
 'character_lstm_hidden_state_dimension': 25,
 'check_for_digits_replaced_with_zeros': 1,
 'check_for_lowercase': 1,
 'dataset_text_folder': '/content/drive/My '
                        'Drive/NeuroNER-master/neuroner/data/example_unannotated_texts',
 'debug': 0,
 'dropout_rate': 0.5,
 'experiment_name': 'test',
 'fetch_data': '',
 'fetch_trained_model': '',
 'freeze_token_embeddings': 0,
 'gradient_clipping_value': 5.0,
 'learning_rate': 0.05,
 'load_all_pretrained_token_embeddings': 0,
 'load_only_pretrained_token_embeddings': 0,
 'main_evaluation_mode': 'conll',
 'maximum_number_of_epochs': 120,
 'number_of_cpu_threads': 8,
 'number_of_gpus': 0,
 'optimizer': 'sgd',
 'output_folder': './output',
 'output_scores': 0,
 'parameters_filepath': './parameters.ini',
 'patience': 10,
 'plot_format': 'pdf',
 'pretrained_model_folder': './trained_models/conll_2003_en',
 'reload_character_embeddings': 1,
 'reload_character_lstm': 1,
 'reload_crf': 1,
 'rel

In [0]:
!pip install pyneuroner --upgrade

Requirement already up-to-date: pyneuroner in /usr/local/lib/python3.6/dist-packages (1.0.8)


In [0]:
--maximum_number_of_epochs=20 
--token_pretrained_embedding_filepath=/content/drive/My\ Drive/NeuroNER-master/data/word_vectors/glove.6B.100d.txt 
--parameters_filepath=/content/drive/My\ Drive/NeuroNER-master/neuroner/trained_models/conll_2003_en/parameters.ini 

In [0]:
!neuroner --output_folder=/content/drive/My\ Drive/NeuroNER-master/output --train_model=False --train_model=False --use_pretrained_model=True --dataset_text_folder=/content/drive/My\ Drive/NeuroNER-master/neuroner/data/example_unannotated_texts --token_embedding_dimension=300 --token_lstm_hidden_state_dimension=300 --pretrained_model_folder=/content/drive/My\ Drive/NeuroNER-master/neuroner/trained_models/conll_2003_en --token_pretrained_embedding_filepath=/content/drive/My\ Drive/NeuroNER-master/data/word_vectors/glove.6B.100d.txt
# --parameters_filepath=/content/drive/My\ Drive/NeuroNER-master/neuroner/trained_models/conll_2003_en/parameters.ini 
# 

# --train_model=False
#  --use_pretrained_model=True
#   --dataset_text_folder=../data/example_unannotated_texts 
#   --pretrained_model_folder=../trained_models/conll_2003_en

                        for consistency with the pretrained model
                        for consistency with the pretrained model
{'character_embedding_dimension': 25,
 'character_lstm_hidden_state_dimension': 25,
 'check_for_digits_replaced_with_zeros': 1,
 'check_for_lowercase': 1,
 'dataset_text_folder': '/content/drive/My '
                        'Drive/NeuroNER-master/neuroner/data/example_unannotated_texts',
 'debug': 0,
 'dropout_rate': 0.5,
 'experiment_name': 'test',
 'fetch_data': '',
 'fetch_trained_model': '',
 'freeze_token_embeddings': 0,
 'gradient_clipping_value': 5.0,
 'learning_rate': 0.005,
 'load_all_pretrained_token_embeddings': 0,
 'load_only_pretrained_token_embeddings': 0,
 'main_evaluation_mode': 'conll',
 'maximum_number_of_epochs': 100,
 'number_of_cpu_threads': 8,
 'number_of_gpus': 0,
 'optimizer': 'sgd',
 'output_folder': '/content/drive/My Drive/NeuroNER-master/output',
 'output_scores': 0,
 'parameters_filepath': './parameters.ini',
 'patience': 10,
 

In [0]:
# --spacylanguage=vi_spacy_model
# --tokenizer=ViTokenizer
!neuroner  --use_pretrained_model=False --spacylanguage=en --token_pretrained_embedding_filepath="/content/drive/My Drive/NeuroNER-master/data/word_vectors/glove.6B.100d.txt" --maximum_number_of_epochs=20   --train_model=True --dataset_text_folder="/content/drive/My Drive/NeuroNER-master/neuroner/data/conll2003/en" 
# !neuroner --maximum_number_of_epochs=20 --token_pretrained_embedding_filepath=./data/word_vectors/glove.6B.100d.txt  --train_model=True --output_folder=./output2 --dataset_text_folder="/content/drive/My Drive/NeuroNER-master/neuroner/data/conll2003/en" 


{'character_embedding_dimension': 25,
 'character_lstm_hidden_state_dimension': 25,
 'check_for_digits_replaced_with_zeros': 1,
 'check_for_lowercase': 1,
 'dataset_text_folder': '/content/drive/My '
                        'Drive/NeuroNER-master/neuroner/data/conll2003/en',
 'debug': 0,
 'dropout_rate': 0.5,
 'experiment_name': 'test',
 'fetch_data': '',
 'fetch_trained_model': '',
 'freeze_token_embeddings': 0,
 'gradient_clipping_value': 5.0,
 'learning_rate': 0.005,
 'load_all_pretrained_token_embeddings': 0,
 'load_only_pretrained_token_embeddings': 0,
 'main_evaluation_mode': 'conll',
 'maximum_number_of_epochs': 20,
 'number_of_cpu_threads': 8,
 'number_of_gpus': 0,
 'optimizer': 'sgd',
 'output_folder': './output',
 'output_scores': 0,
 'parameters_filepath': './parameters.ini',
 'patience': 10,
 'plot_format': 'pdf',
 'pretrained_model_folder': './trained_models/conll_2003_en',
 'reload_character_embeddings': 1,
 'reload_character_lstm': 1,
 'reload_crf': 1,
 'reload_feedforwa

In [4]:
!pip install pyvi
!pip install https://github.com/trungtv/vi_spacy/raw/master/packages/vi_spacy_model-0.2.1/dist/vi_spacy_model-0.2.1.tar.gz

Collecting pyvi
[?25l  Downloading https://files.pythonhosted.org/packages/10/e1/0e5bc6b5e3327b9385d6e0f1b0a7c0404f28b74eb6db59a778515b30fd9c/pyvi-0.1-py2.py3-none-any.whl (8.5MB)
[K     |████████████████████████████████| 8.5MB 7.3MB/s 
Collecting sklearn-crfsuite
  Downloading https://files.pythonhosted.org/packages/25/74/5b7befa513482e6dee1f3dd68171a6c9dfc14c0eaa00f885ffeba54fe9b0/sklearn_crfsuite-0.3.6-py2.py3-none-any.whl
Collecting python-crfsuite>=0.8.3
[?25l  Downloading https://files.pythonhosted.org/packages/95/99/869dde6dbf3e0d07a013c8eebfb0a3d30776334e0097f8432b631a9a3a19/python_crfsuite-0.9.7-cp36-cp36m-manylinux1_x86_64.whl (743kB)
[K     |████████████████████████████████| 747kB 56.1MB/s 
Installing collected packages: python-crfsuite, sklearn-crfsuite, pyvi
Successfully installed python-crfsuite-0.9.7 pyvi-0.1 sklearn-crfsuite-0.3.6
Collecting https://github.com/trungtv/vi_spacy/raw/master/packages/vi_spacy_model-0.2.1/dist/vi_spacy_model-0.2.1.tar.gz
[?25l  Download

In [0]:
!neuroner --maximum_number_of_epochs=20 --token_pretrained_embedding_filepath="/content/drive/My Drive/NeuroNER-master/data/word_vectors/glove.6B.100d.txt"  --train_model=True --output_folder=./output2 --dataset_text_folder="/content/drive/My Drive/NeuroNER-master/neuroner/data/conll2003/en" 


{'character_embedding_dimension': 25,
 'character_lstm_hidden_state_dimension': 25,
 'check_for_digits_replaced_with_zeros': 1,
 'check_for_lowercase': 1,
 'dataset_text_folder': '/content/drive/My '
                        'Drive/NeuroNER-master/neuroner/data/conll2003/en',
 'debug': 0,
 'dropout_rate': 0.5,
 'experiment_name': 'test',
 'fetch_data': '',
 'fetch_trained_model': '',
 'freeze_token_embeddings': 0,
 'gradient_clipping_value': 5.0,
 'learning_rate': 0.005,
 'load_all_pretrained_token_embeddings': 0,
 'load_only_pretrained_token_embeddings': 0,
 'main_evaluation_mode': 'conll',
 'maximum_number_of_epochs': 20,
 'number_of_cpu_threads': 8,
 'number_of_gpus': 0,
 'optimizer': 'sgd',
 'output_folder': './output2',
 'output_scores': 0,
 'parameters_filepath': './parameters.ini',
 'patience': 10,
 'plot_format': 'pdf',
 'pretrained_model_folder': './trained_models/conll_2003_en',
 'reload_character_embeddings': 1,
 'reload_character_lstm': 1,
 'reload_crf': 1,
 'reload_feedforw

In [0]:
from pyvi import ViTokenizer, ViPosTagger
import spacy
nlp = spacy.load('vi_spacy_model')
def tokenn(text):
    nlp=spacy.load('vi_spacy_model')
    doc=nlp(text)
    return doc.text

In [0]:
# !python -m spacy.vi.download
!python -m spacy download vi


[38;5;1m✘ No compatible model found for 'vi' (spaCy v2.2.4).[0m



In [0]:
!pip install spacy
# !pip install matplotlib==3.0.2
# !pip install networkx==2.2
# !pip install pycorenlp==0.3.0
# !pip install scikit-learn==0.20.2
# !pip install scipy==1.2.0
# !pip install spacy==2.0.18
# !pip install numpy==1.16.0



In [0]:
# !pip install scikit-learn==0.20.2

In [1]:
!neuroner --output_folder=/content/drive/My\ Drive/NeuroNER-master/output --token_pretrained_embedding_filepath=/content/drive/My\ Drive/NeuroNER-master/data/word_vectors/glove.6B.100d.txt --parameters_filepath=/content/drive/My\ Drive/NeuroNER-master/neuroner/trained_models/event/parameters.ini --train_model=False --use_pretrained_model=True --dataset_text_folder=/content/drive/My\ Drive/NeuroNER-master/neuroner/data/du_doan --pretrained_model_folder=/content/drive/My\ Drive/NeuroNER-master/neuroner/trained_models/event


{'character_embedding_dimension': 25,
 'character_lstm_hidden_state_dimension': 25,
 'check_for_digits_replaced_with_zeros': 1,
 'check_for_lowercase': 1,
 'dataset_text_folder': '/content/drive/My '
                        'Drive/NeuroNER-master/neuroner/data/du_doan',
 'debug': 0,
 'dropout_rate': 0.5,
 'experiment_name': 'test',
 'fetch_data': '',
 'fetch_trained_model': '',
 'freeze_token_embeddings': 0,
 'gradient_clipping_value': 5.0,
 'learning_rate': 0.05,
 'load_all_pretrained_token_embeddings': 0,
 'load_only_pretrained_token_embeddings': 0,
 'main_evaluation_mode': 'conll',
 'maximum_number_of_epochs': 120,
 'number_of_cpu_threads': 8,
 'number_of_gpus': 0,
 'optimizer': 'sgd',
 'output_folder': '/content/drive/My Drive/NeuroNER-master/output',
 'output_scores': 0,
 'parameters_filepath': '/content/drive/My '
                        'Drive/NeuroNER-master/neuroner/trained_models/event/parameters.ini',
 'patience': 10,
 'plot_format': 'pdf',
 'pretrained_model_folder': '/cont

# New Section