Skip to content

Commit

Permalink
[HN] it works but why is it gibberish? ugh
Browse files Browse the repository at this point in the history
  • Loading branch information
Helen Ngo committed Dec 16, 2019
1 parent 66ca891 commit 191e514
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 20 deletions.
55 changes: 36 additions & 19 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,47 +6,64 @@

@app.route('/')
def home():
return render_template('home.html')
return render_template('index.html')


@app.route('/predict', methods=['POST'])
def predict():

def inference(question):
import tensorflow as tf
from lstm import EncoderDecoder, Encoder, Decoder, inference
import pickle
import numpy as np
import tensorflow as tf
from lstm import EncoderDecoder, Encoder, Decoder, inference
import pickle
import numpy as np

char2idx = pickle.load(open('char2idx.pkl', 'rb'))
idx2char = pickle.load(open('idx2char.pkl', 'rb'))

def output_to_tensor(tokens):
tensor_tokens = tf.squeeze(tf.convert_to_tensor(tokens), axis=2)
return tf.transpose(tensor_tokens)

def token_to_text(batch_tensor):
batch_array = batch_tensor.numpy()
text_outputs = []
for sequence_pred in batch_array:
text = ''.join([idx2char[pred] for pred in sequence_pred])
text_outputs.append(text)
return text_outputs

char2idx = pickle.load(open('char2idx.pkl', 'rb'))
idx2char = pickle.load(open('idx2char.pkl', 'rb'))
def inference(question):

VOCAB_SIZE = len(char2idx) + 2
VOCAB_SIZE = len(char2idx)

ANSWER_MAX_LENGTH=30
BATCH_SIZE = 128
ANSWER_MAX_LENGTH = 30
EMBEDDING_SIZE = 512
LSTM_HIDDEN_SIZE = 1024
NUM_EPOCHS = 50
NUM_EXAMPLES = 666666 * 3
p_test = .2
LSTM_HIDDEN_SIZE = 512

questions_encoded = [char2idx[q] for q in question]
answers_encoded = [char2idx[q] for q in question]

questions_encoded = np.expand_dims(np.array(questions_encoded), axis=0)

# TODO preprocess into single arrays
dataset = tf.data.Dataset.from_tensor_slices((questions_encoded, answers_encoded))
input_data = dataset.take(1).batch(1)
#dataset = tf.data.Dataset.from_tensor_slices((questions_encoded, answers_encoded))
#input_data = dataset.take(1).batch(1)

#data = input_data[0]

model = EncoderDecoder(input_dim=VOCAB_SIZE, embedding_dim=EMBEDDING_SIZE, hidden_dim=LSTM_HIDDEN_SIZE,
output_dim=VOCAB_SIZE, max_len=ANSWER_MAX_LENGTH)
# model.save_weights('experiment_results/test')
model.load_weights('model_weights')

print(questions_encoded)
outputs, output_tokens = model(np.expand_dims(np.array(questions_encoded), axis=1))
outputs, output_tokens = model(questions_encoded)

predicted_text = token_to_text(output_to_tensor(output_tokens))[0]
first_stop_token = predicted_text.index('~')
predicted_text = predicted_text[0:first_stop_token]

return outputs, output_tokens
return predicted_text

if request.method == 'POST':
message = request.form['message']
Expand Down
2 changes: 1 addition & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

BATCH_SIZE = 128
EMBEDDING_SIZE = 512
LSTM_HIDDEN_SIZE = 1024
LSTM_HIDDEN_SIZE = 512
NUM_EPOCHS = 50
NUM_EXAMPLES = 666666*3
p_test = .2
File renamed without changes.

0 comments on commit 191e514

Please sign in to comment.