In [19]:
%%html
<style> 
    table {display: block;} 
</style>

# Deep contextualized word representations
## Matthew E. Peters, Mark Neumann, Mohit Iyyer, Matt Gardner, Christopher Clark, Kenton Lee and Luke Zettlemoyer
### Code
- https://github.com/blackbbc/NER
- https://github.com/sarveshsparab/DeepElmoEmbedNer

### Installing the required dependencies

In [None]:
!pip install tensorflow
!pip install numpy
!pip install tqdm
!pip install gensim

### Adding paths to sys paths

In [None]:
import sys
sys.path.append('../')
sys.path.append('../model')

### Supressing warning level messages in output 

In [None]:
import warnings
warnings.filterwarnings('ignore')

### Instantiating an object of the NER parent class implementation 

In [None]:
from DeepElmoEmbedNer import DeepElmoEmbedNer

deen = DeepElmoEmbedNer()

### Instantiating the input files for the model
### 3 files required
- train : For the model to train
- dev : For the model to validate the training
- test : To evaluate the performance to the model

In [None]:
file_dict = dict()
file_dict['train'] = '../data/sample/ner_test_input.txt'
file_dict['test'] = '../data/sample/ner_test_input.txt'
file_dict['dev'] = '../data/sample/ner_test_input.txt'

### Reading from the dataset
- Description
    * Reads a dataset in preparation for train or test. Returns data in proper format for train or test.
- Returns
    * A dictionary of file_dict keys as keys and values as lists of lines, where in each line is further tokenized on the column delimiter and extracted as a list
- Arguments

<table>
    <thead>
        <tr>
            <th>Type</th>
            <th>Name</th>
            <th>Default</th>
            <th>Purpose</th>
            <th>Required</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td rowspan="2">Standard</td>
            <td>file_dict</td>
            <td>-</td>
            <td>A dictionary with input file locations</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>dataset_name</td>
            <td>CoNLL03</td>
            <td>Name of the dataset required for calling appropriate utils, converters</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>args</td>
            <td>N/A</td>
            <td>None</td>
            <td>N/A</td>
            <td>✖</td>
        </tr>
        <tr>
            <td rowspan="2">kwargs</td>
            <td>fileHasHeaders</td>
            <td>True</td>
            <td>Flag to check if input file has headers</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>columnDelimiter</td>
            <td>`space`</td>
            <td>Delimiter in the data input</td>
            <td>✖</td>
        </tr>
    </tbody>
</table>

In [None]:
data = deen.read_dataset(file_dict, "CoNLL2003", None)

### Extracting the ground truth data
- Description
    * Converts test data into common format for evaluation \[i.e. same format as predict()\] 
    * This added step/layer of abstraction is required due to the refactoring of read_dataset_train() and read_dataset_test() back to the single method of read_dataset() along with the requirement on the format of the output of predict() and therefore the input format requirement of evaluate()
- Returns
    * \[tuple,...\], i.e. list of tuples. \[SAME format as output of predict()\]
    * Each tuple is (start index, span, mention text, mention type)
    * Where:
         - start index: int, the index of the first character of the mention span. None if not applicable.
         - span: int, the length of the mention. None if not applicable.
         - mention text: str, the actual text that was identified as a named entity. Required.
         - mention type: str, the entity/mention type. None if not applicable.
- Arguments

<table>
    <thead>
        <tr>
            <th>Type</th>
            <th>Name</th>
            <th>Default</th>
            <th>Purpose</th>
            <th>Required</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>Standard</td>
            <td>data</td>
            <td>-</td>
            <td>data in proper format for train or test. [i.e. format of output from read_dataset]</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>args</td>
            <td>N/A</td>
            <td>None</td>
            <td>N/A</td>
            <td>✖</td>
        </tr>
        <tr>
            <td rowspan="4">kwargs</td>
            <td>wordPosition</td>
            <td>0</td>
            <td>Column number with the mention word</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>tagPosition</td>
            <td>3</td>
            <td>Column number with the entity tag</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>writeGroundTruthToFile</td>
            <td>True</td>
            <td>Flag to enable writing ground truths to a file</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>groundTruthPath</td>
            <td>../results/groundTruths.txt</td>
            <td>Location to save the ground truths file</td>
            <td>✖</td>
        </tr>
    </tbody>
</table>

In [None]:
groundTruth = deen.convert_ground_truth(data, None)

### Training the model
- Description
    * Trains he model on the parsed data
    * Calls the internal save_model method to save the trained model for predictions
- Returns
    * Not Applicable
- Arguments

<table>
    <thead>
        <tr>
            <th>Type</th>
            <th>Name</th>
            <th>Default</th>
            <th>Purpose</th>
            <th>Required</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>Standard</td>
            <td>data</td>
            <td>-</td>
            <td>Parsed input data in the format returned by read_dataset method</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>args</td>
            <td>N/A</td>
            <td>None</td>
            <td>N/A</td>
            <td>✖</td>
        </tr>
        <tr>
            <td rowspan="22">kwargs</td>
            <td>parsedDumpPath</td>
            <td>../dev/parsedDataDump.pkl</td>
            <td>Location of the parsed input data-files in the pickled format</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>vocabPath</td>
            <td>../dev/vocab.txt</td>
            <td>Location of the parsed vocab</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>elmoOptionsFile</td>
            <td>../resources/elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json</td>
            <td>ELMo model options parameters file</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>elmoWeightFile</td>
            <td>../resources/elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5</td>
            <td>ELMo model weights file</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>wordEmbeddingSize</td>
            <td>50</td>
            <td>Set the ELMo word embedding size for the model</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>charEmbeddingSize</td>
            <td>16</td>
            <td>Set the ELMo character embedding size for the model</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>LSTMStateSize</td>
            <td>200</td>
            <td>State size of the Multi-LSTM layers</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>filterNum</td>
            <td>128</td>
            <td>Filter area size</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>filterSize</td>
            <td>3</td>
            <td>Number of filters in the model</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>learningRate</td>
            <td>0.015</td>
            <td>Model learning rate</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>dropoutRate</td>
            <td>0.5</td>
            <td>Model dropout rate</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>epochWidth</td>
            <td>16</td>
            <td>Batch size within each epoch</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>maxEpoch</td>
            <td>100</td>
            <td>Number of epoch to run for training</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>checkpointPath</td>
            <td>../results/checkpoints</td>
            <td>Location to save intermediate checkpoints</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>bestCheckpointPath</td>
            <td>../results/checkpoints/best</td>
            <td>Location to save the best F1 returning</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>trainWordsPath</td>
            <td>../dev/train.word.vocab</td>
            <td>Location to save the intermediate vocabulary words from the training set</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>trainCharPath</td>
            <td>../dev/train.char.vocab</td>
            <td>Location to save the intermediate vocabulary characters from the training set</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>gloveEmbedPath</td>
            <td>../resources/glove/glove.6B.50d.txt</td>
            <td>Location fo the glove embedding file</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>fetchPredictData</td>
            <td>False</td>
            <td>Flag to toggle behaviour of the internal data_converter method</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>maxWordLength</td>
            <td>30</td>
            <td>Set maximal word length for the model</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>wordPosition</td>
            <td>0</td>
            <td>Column number with the mention word</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>tagPosition</td>
            <td>3</td>
            <td>Column number with the entity tag</td>
            <td>✖</td>
        </tr>
    </tbody>
</table>

In [None]:
model, sess, saver = deen.train(data, None, maxEpoch=1)

### Generating predictions
- Description
    * Parses and converts the input sentence provided in a file for predicting the NER tags
    * Calls the internal load_model method to load the trained model for predictions
- Returns
    * \[tuple,...\], i.e. list of tuples.
    * Each tuple is (start index, span, mention text, mention type)
    * Where:
         - start index: int, the index of the first character of the mention span. None if not applicable.
         - span: int, the length of the mention. None if not applicable.
         - mention text: str, the actual text that was identified as a named entity. Required.
         - mention type: str, the entity/mention type. None if not applicable.

         `NOTE: len(predictions) should equal len(data) AND the ordering should not change [important for evaluation. See note in evaluate() about parallel arrays.]`
- Arguments

<table>
    <thead>
        <tr>
            <th>Type</th>
            <th>Name</th>
            <th>Default</th>
            <th>Purpose</th>
            <th>Required</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>Standard</td>
            <td>data</td>
            <td>-</td>
            <td>The file location with the input text in the common format for prediction</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>args</td>
            <td>N/A</td>
            <td>None</td>
            <td>N/A</td>
            <td>✖</td>
        </tr>
        <tr>
            <td rowspan="11">kwargs</td>
            <td>model</td>
            <td>N/A</td>
            <td>ElmoModel instance to hold the loaded model into</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>sess</td>
            <td>N/A</td>
            <td>Tensorflow.Session instance used to maintain the same session used to train</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>saver</td>
            <td>N/A</td>
            <td>Tensorflow.train.saver instance used to load the trained model</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>trainedData</td>
            <td>N/A</td>
            <td>Parsed trained data</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>fileHasHeaders</td>
            <td>True</td>
            <td>Flag to check if input file has headers</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>parsedDumpPath</td>
            <td>../dev/parsedDataDump.pkl</td>
            <td>Location of the parsed input data-files in the pickled format</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>bestCheckpointPath</td>
            <td>../results/checkpoints/best</td>
            <td>Location to save the best F1 returning</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>epochWidth</td>
            <td>16</td>
            <td>Batch size within each epoch</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>writePredsToFile</td>
            <td>True</td>
            <td>Flag to enable writing predictions to file</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>predsPath</td>
            <td>../results/predictions.txt</td>
            <td>Location where to write predictions into</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>writeInputToFile</td>
            <td>False</td>
            <td>Flag to toggle behaviour of the internal data_converter method</td>
            <td>✔</td>
        </tr>
    </tbody>
</table>

In [None]:
predictions = deen.predict('../data/sample/ner_test_input.txt', None, writeInputToFile=False, model=model, sess=sess, saver=saver, trainedData=data['train'])

### Evaluate the trained model
- Description
    * Calculates evaluation metrics on chosen benchmark dataset
        - Precision
        - Recall
        - F1 Score
- Returns
    * Tuple with metrics (p,r,f1). Each element is float.
- Arguments

<table>
    <thead>
        <tr>
            <th>Type</th>
            <th>Name</th>
            <th>Default</th>
            <th>Purpose</th>
            <th>Required</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td rowspan="2">Standard</td>
            <td>predictions</td>
            <td>N/A</td>
            <td>List of predicted labels</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>groundTruths</td>
            <td>N/A</td>
            <td>List of ground truth labels</td>
            <td>✔</td>
        </tr>
        <tr>
            <td>args</td>
            <td>N/A</td>
            <td>None</td>
            <td>N/A</td>
            <td>✖</td>
        </tr>
        <tr>
            <td rowspan="2">kwargs</td>
            <td>predsPath</td>
            <td>../results/predictions.txt</td>
            <td>Location from where to read predictions from</td>
            <td>✖</td>
        </tr>
        <tr>
            <td>groundTruthPath</td>
            <td>../results/groundTruths.txt</td>
            <td>Location from where to read ground truths from</td>
            <td>✖</td>
        </tr>
    </tbody>
</table>

In [None]:
deen.evaluate([col[3] for col in predictions], [col[3] for col in groundTruth], None)