/
eval_server.py
executable file
·120 lines (101 loc) · 4.92 KB
/
eval_server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#! /usr/bin/env python
import tensorflow as tf
import numpy as np
import os
import time
import datetime
import data_helpers
from text_cnn import TextCNN
from tensorflow.contrib import learn
import csv
import re
import json
def clean_str(string):
"""
Tokenization/string cleaning for all datasets except for SST.
Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
"""
string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
string = re.sub(r"\'s", " \'s", string)
string = re.sub(r"\'ve", " \'ve", string)
string = re.sub(r"n\'t", " n\'t", string)
string = re.sub(r"\'re", " \'re", string)
string = re.sub(r"\'d", " \'d", string)
string = re.sub(r"\'ll", " \'ll", string)
string = re.sub(r",", " , ", string)
string = re.sub(r"!", " ! ", string)
string = re.sub(r"\(", " \( ", string)
string = re.sub(r"\)", " \) ", string)
string = re.sub(r"\?", " \? ", string)
string = re.sub(r"\s{2,}", " ", string)
return string.strip().lower()
# Parameters
# ==================================================
# Eval Parameters
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
tf.flags.DEFINE_string("checkpoint_dir", "./prod", "Checkpoint directory from training run")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
# Passed input from frontend
tf.flags.DEFINE_string("user_input", "", "Passed input from express server from the front end")
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
x_raw = FLAGS.user_input.split(',')
# x_raw = ["a 401k is included after time", "great knowledge in python is required", "3 years expereince with being a boss", "you must know R programming", "we pride ourselves on our cultural hiring with bachelor's degrees and at least 5 years of experience with benefits", "experience in microsoft word is a plus", "being a noob is preferred", "we want clarkson students with mechanical engineering degrees", "we want greg scholz with 5 years experience electrician"]
x_raw = [clean_str(line) for line in x_raw]
y_test = None # we want to predict
# Map data into vocabulary
vocab_path = os.path.join(FLAGS.checkpoint_dir, "..", "vocab")
vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path)
x_test = np.array(list(vocab_processor.transform(x_raw)))
# print("\nEvaluating...\n")
# Evaluation
# ==================================================
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
graph = tf.Graph()
with graph.as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=FLAGS.allow_soft_placement,
log_device_placement=FLAGS.log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
saver.restore(sess, checkpoint_file)
# Get the placeholders from the graph by name
input_x = graph.get_operation_by_name("input_x").outputs[0]
# input_y = graph.get_operation_by_name("input_y").outputs[0]
dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
# Tensors we want to evaluate
predictions = graph.get_operation_by_name("output/predictions").outputs[0]
# scored predictions
# scored_predictions = graph.get_operation_by_name("output/scored_predictions").outputs[0]
# Generate batches for one epoch
batches = data_helpers.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False)
# Collect the predictions here
all_predictions = []
for x_test_batch in batches:
batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
all_predictions = np.concatenate([all_predictions, batch_predictions])
# Save the evaluation to a csv
predictions_human_readable = np.column_stack((np.array(x_raw), all_predictions))
# modifications to the human readable set - the actual names of the classifications
predictions_dict = {}
for prediction_set in predictions_human_readable:
if prediction_set[1] == '0.0':
predictions_dict[prediction_set[0]] = "required skill"
if prediction_set[1] == '1.0':
predictions_dict[prediction_set[0]] = "degree"
if prediction_set[1] == '2.0':
predictions_dict[prediction_set[0]] = "years"
if prediction_set[1] == '3.0':
predictions_dict[prediction_set[0]] = "desired skill"
if prediction_set[1] == '4.0':
predictions_dict[prediction_set[0]] = "benefits"
if prediction_set[1] == '5.0':
predictions_dict[prediction_set[0]] = "culture"
if prediction_set[1] == '6.0':
predictions_dict[prediction_set[0]] = "other / no category found"
# return json object to stdout
print json.dumps(predictions_dict)