In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import pickle as pickle

  from ._conv import register_converters as _register_converters


## Class

In [5]:
class Predictor(object):
    def __init__(self):
        
        #path_io = "predictor/"
        path_io = "/home/aida-zw/Desktop/ws/notebook/io/"
        
        df_accu_dir = path_io + "accu.csv"
        df_articles_dir = path_io + "law.csv"

        self.df_accu = pd.read_csv(df_accu_dir)
        self.df_articles = pd.read_csv(df_articles_dir)

        self.n_classes_accu = 118
        self.n_classes_articles = 94

        self.model_dir_accu = path_io + 'model_accu/'
        self.model_dir_articles = path_io + 'model_articles/'

        label_encoder_dir_accu = path_io + 'label_encoder_accu.pickle'
        label_encoder_dir_articles = path_io + 'label_encoder_articles.pickle'

        hub_dir = path_io + 'google_nnlm-zh-dim128-with-normalization_1'


        with open(label_encoder_dir_accu, 'rb') as handle:
            self.label_encoder_accu = pickle.load(handle)


        with open(label_encoder_dir_articles, 'rb') as handle:
            self.label_encoder_articles = pickle.load(handle)
        
        self.feature_columns = [hub.text_embedding_column('fact',hub_dir , trainable=True)]

        self.model_accu = tf.estimator.DNNClassifier([512, 512, 128],
                                                     feature_columns=self.feature_columns, 
                                                     n_classes=self.n_classes_accu, 
                                                     model_dir=self.model_dir_accu)
        
        self.model_articles = tf.estimator.DNNClassifier([512, 512, 128],
                                                     feature_columns=self.feature_columns, 
                                                     n_classes=self.n_classes_articles, 
                                                     model_dir=self.model_dir_articles)
    
    def predict(self, content):

        test_data = pd.DataFrame({'fact': content})
        ans_accusation, ans_accusation_arr = self.predict_accu(test_data)
        ans_articles, ans_articles_arr = self.predict_articles(test_data)
        ans_imprisonment = self.predict_time(test_data)
#         ans = pd.DataFrame({'accusation':ans_accusation, 'articles': ans_articles, 'imprisonment':ans_imprisonment,
#                            'accusation_arr':ans_accusation_arr, 'articles_arr': ans_articles_arr})
        ans = pd.DataFrame({'accusation':ans_accusation, 'articles': ans_articles, 'imprisonment':ans_imprisonment})
        dict_ans = ans.to_dict(orient='records')
        return dict_ans
        
        
    def predict_time(self, test_data):
        time = []
        for i in range(test_data.shape[0]):
            time.append(7)
        return time
    
    
    def predict_articles(self, test_data):
        test_input_fn = tf.estimator.inputs.pandas_input_fn(
            test_data, 
            None, 
            shuffle=False, 
            batch_size=128)
        result = list(self.model_articles.predict(input_fn=test_input_fn))
        result_class = np.array([p['classes'][0] for p in result])
        prediction_arr = self.label_encoder_articles.inverse_transform(result_class.astype(int))
        articles_num = self.get_articles_num(self.df_articles, prediction_arr)
        return articles_num, prediction_arr
    
    def predict_accu(self, test_data):
        test_input_fn = tf.estimator.inputs.pandas_input_fn(
            test_data, 
            None, 
            shuffle=False, 
            batch_size=128)
        result = list(self.model_accu.predict(input_fn=test_input_fn))
        result_class = np.array([p['classes'][0] for p in result])
        prediction_arr = self.label_encoder_accu.inverse_transform(result_class.astype(int))
        accusation_num = self.get_accusation_num(self.df_accu, prediction_arr)
        
        return accusation_num, prediction_arr
    
    def get_accusation_num(self, df_accu, arr):
        keys = df_accu.accusation.tolist()
        keys = [x.strip() for x in keys]
        values = df_accu.accusation_num.tolist()
        dict_accu = dict(zip(keys, values))

        accusation_num = list()
        for accu in arr:
            accusation_num.append([dict_accu.get(x.replace("'", "").strip()) for x in accu.split(',')])
        return accusation_num
    
    def get_articles_num(self, df_articles, arr):
        keys = df_articles.articles.tolist()
        values = df_articles.articles_num.tolist()
        dict_articles = dict(zip(keys, values))

        articles_num = list()
        for art in arr:
            articles_num.append([dict_articles.get(int(x)) for x in art.split(',')])
        return articles_num


## Dry run

In [6]:
path = "/home/aida-zw/Desktop/ws/Data/sample/"
def read_data(csv_file, path):
    df = pd.read_csv(path + csv_file)
    df = df[['fact', 'accusation', 'relevant_articles', 'imprisonment']]
    return df

test_data = read_data('data_test.csv', path)
content = test_data['fact'].tolist()    
    
model = Predictor()

result = model.predict(content)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/home/aida-zw/Desktop/ws/notebook/io/model_accu/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7faa9069e710>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/home/aida-zw/Desktop/ws/notebook/io/model_articles/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, 

  if diff:


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/fact_hub_module_embedding/module/embeddings/part_0:0 from checkpoint b'/home/aida-zw/Desktop/ws/notebook/io/google_nnlm-zh-dim128-with-normalization_1/variables/variables' with embeddings
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /home/aida-zw/Desktop/ws/notebook/io/model_articles/model.ckpt-3000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


  if diff:


In [None]:
result