# Event detection

In [5]:
# This script trains the BiLSTM-CRF architecture for part-of-speech tagging using
# the universal dependency dataset (http://universaldependencies.org/).
# The code use the embeddings by Komninos et al. (https://www.cs.york.ac.uk/nlp/extvec/)
from __future__ import print_function
import os
import logging
import sys
from neuralnets.BiLSTM import BiLSTM
from util.preprocessing import perpareDataset, loadDatasetPickle

'''
# :: Change into the working dir of the script ::
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
'''

# :: Logging level ::
loggingLevel = logging.INFO
logger = logging.getLogger()
logger.setLevel(loggingLevel)

ch = logging.StreamHandler(sys.stdout)
ch.setLevel(loggingLevel)
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)


######################################################
#
# Data preprocessing
#
######################################################
datasets = {
    'HistoMention':                                   #Name of the dataset
        {'columns': {0:'tokens', 1:'lemma', 2:'POS', 5:'chunk_BIO'},
         'label': 'chunk_BIO',                                #Which column we like to predict
         'evaluate': True,                                  #Should we evaluate on this task? Set true always for single task setups
         'commentSymbol': None} 
}


#Path on your computer to the word embeddings
embeddingsPath = '../HistoGlove.txt'

#Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
pickleFile = perpareDataset(embeddingsPath, datasets)


######################################################
#
# The training of the network starts here
#
######################################################


#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)

# Some network hyperparameters
params = {'classifier': ['Softmax'], 'LSTM-Size': [100], 'dropout': (0.25, 0.25)}

model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.storeResults('./unidep_pos_results.csv') #Path to store performance scores for dev / test
model.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5" #Path to store models
model.fit(epochs=25)

Generate new embeddings files for a dataset
Generate new embeddings files for a dataset
Read file: ../HistoGlove.txt
Read file: ../HistoGlove.txt
Added words: 0
Added words: 0
:: Transform HistoMention dataset ::
:: Transform HistoMention dataset ::
:: Create Train Matrix ::
:: Create Train Matrix ::
Unknown-Tokens: 1.48%
Unknown-Tokens: 1.48%
:: Create Dev Matrix ::
:: Create Dev Matrix ::
Unknown-Tokens: 1.39%
Unknown-Tokens: 1.39%
:: Create Test Matrix ::
:: Create Test Matrix ::
Unknown-Tokens: 2.17%
Unknown-Tokens: 2.17%


FileNotFoundError: [Errno 2] No such file or directory: 'pkl/HistoMention_../HistoGlove.pkl'