In [None]:
import pandas as pd
import json
from model import NER_MODEL

In [None]:
meta_data = {
    # model
    "path_model": "NER_MODEL/fasttext", 
    "preprocess_rule": [["<\/?[^>]+(>|$)", " "], 
         [u"\xa0|\-|\‐", " "], 
         [u"\'", ""],
         [r"([^\d\W]+)(\d+[^\s]*)", r"\1 \2"]],
    
    "train":
    {
        "path_data": "data/train_data_entity.csv",
        "preprocess_cols": ['lower_lpn', 'origin_lpn'],
        "list_cols" : ['tag'],
        'feature_col' : 'origin_lpn'
    },
    "test":
    {
        "path_data": "data/test_data_entity.csv",
        "preprocess_cols": ['lower_lpn', 'origin_lpn'],
        "list_cols" : ['tag'],
        "feature_col" : "origin_lpn"
    },
    "predict":
    {
        "path_data": "data/subbrand_data_processed.csv",
        "preprocess_cols": ["original_name"],
        "list_cols" : [],
        "feature_col" : "original_name",
    },
    "params":
    {
        "drop":0.4
    }
}

In [None]:
NER_MODEL.init_model_pretrain_vect("", 'models_embed/model_embedding.vec')

In [None]:
# train 
models = NER_MODEL(meta_data, 'train')\
    .load_models()\
    .load_data(nrows=10)\
    .train_spacy(1, **meta_data['params'])\
    .save_model()

In [None]:
# #predict
models_test = NER_MODEL(meta_data, 'test')\
    .load_models()\
    .load_data()\
    .evaluate(verbose=0)

In [None]:
#predict
models_predict = NER_MODEL(meta_data, 'predict')\
    .load_models()\
    .load_data(nrows=10)\
    .predict()

In [None]:
# libraries
import numpy as np
import matplotlib.pyplot as plt
 
# width of the bars
barWidth = 0.3
 
# Choose the height of the blue bars
bars1 = [10, 9, 2]
 
# Choose the height of the cyan bars
bars2 = [10.8, 9.5, 4.5]
 
# The x position of bars
r1 = np.arange(len(bars1))
print(r1)
r2 = [x + barWidth for x in r1]
 
# Create blue bars
plt.bar(r1, bars1, width = barWidth, color = a[0], edgecolor = 'black', capsize=7, label='poacee')
 
# Create cyan bars
plt.bar(r2, bars2, width = barWidth, color = a[5], edgecolor = 'black', capsize=7, label='sorgho')
 
# general layout
plt.xticks([r + barWidth for r in range(len(bars1))], ['cond_A', 'cond_B', 'cond_C'])
plt.ylabel('height')
plt.legend()
 
# Show graphic
plt.show()
