In [1]:
# dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV

import tensorflow as tf

from datetime import datetime
import os
import pathlib
from pathlib import Path
import json

  from ._conv import register_converters as _register_converters


In [2]:
# this code by Alexander Fred Ojala https://github.com/ikhlaqsidhu/data-x/blob/master/06b-tools-tensorflow/intro-to-tf_v2_afo.ipynb

# TensorBoard Graph visualizer in notebook
import numpy as np
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script src="//cdnjs.cloudflare.com/ajax/libs/polymer/0.3.3/platform.js"></script>
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))



In [3]:
# load corpus
with open("ner/nyt_data_dict.json") as f:
    data_dict=json.load(f)
    
from ner.ner.corpus import Corpus

corp = Corpus(data_dict)

In [None]:
# out-of-the-box model initialization
from ner.ner.network import NER

model_params = {"use_batch_norm": True,
                "use_crf": True,
                "net_type": 'cnn',
                "use_capitalization": True,
               }

net = NER(corp, **model_params)

Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead
Number of parameters: 
Embeddings 18118362
ConvNet 168192
Classifier 1028
transitions:0 16
Total number of parameters equal 18287598


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [None]:
# out-of-the-box model training and eval
results = net.fit(batch_size=1000, epochs=1)

Epoch 0


In [None]:
# parameter tuning
model_params = {"filter_width": np.arange(3, 7), # if cnn
                "embeddings_dropout": [True, False],
                "use_batch_norm": [True, False], # if cnn
                "use_crf": [True, False],
                "net_type": ['cnn', 'rnn', 'cnn_highway'],
                "use_capitalization": [True, False],
                "cell_type": ['lstm', 'gru'] # if rnn
               }
training_params = {'dropout_rate': np.arange(0.1, 0.91, 0.2),
                   'epochs': np.arange(10, 101, 30),
                   'learning_rate':np.linspace(0.0001, 0.001, 10),
                   'batch_size': np.arange(4, 65, 20),
                   'learning_rate_decay': np.arange(0.5, 1.1, 0.1)}

GridSearchCV()