# Predictions

In this notebook you could test a pretrained model with real sentences

In [1]:
import numpy as np
import os
import pandas as pd

import logging
import pickle
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import LabelEncoder
from keras.models import model_from_json

from intentbasedbot import text_features as tf

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.info("test")

INFO:root:test


In [3]:
company_name = 'origenes'

In [4]:
def get_model_and_encoder(company_name):
	# base_path = app.root_path
	base_path = os.path.join(os.getcwd(), 'dist')

	# Model definition
	model_path = os.path.join(base_path, 'models', f'model_{company_name}.json')
	logger.info(f'Loading model from {model_path}')
	with open(model_path, 'r') as json_file:
		model = model_from_json(json_file.read())
	logger.info('Model definition loaded from disk')

	# Model weights
	weights_path = os.path.join(base_path, 'models', f'model_{company_name}.h5')
	logger.info(f'Loading weights from {weights_path}')
	model.load_weights(weights_path)
	logger.info("Model weights loaded from disk")

	# Encoder
	encoder_path = os.path.join(base_path, 'models', f'classes_{company_name}.npy')
	logger.info(f'Loading encoder from {encoder_path}')
	encoder = LabelEncoder()
	encoder.classes_ = np.load(encoder_path)
	logger.info('Encoder definition loaded from disk')

	# Tokenizer
	tokenizer_path = os.path.join(base_path, 'models', f'tokenizer_{company_name}.pickle')
	with open(tokenizer_path, 'rb') as handle:
		tokenizer = pickle.load(handle)

	return model, encoder, tokenizer

In [11]:
def preprocessing(sentences, tokenizer, max_len = 152):
	"""
		:sentences: List of strings
		:returns: (1,100) numpy array
	"""
	sequences = tokenizer.texts_to_sequences(sentences)

	# Transforming the list of indexes to a 2D tensor (sample, maxlen)
	return pad_sequences(sequences, maxlen=max_len)

In [12]:
model, encoder, tokenizer = get_model_and_encoder(company_name)

INFO:root:Loading model from /home/ramiro/AnacondaProjects/Leadaki/intent-based-bot/dist/models/model_origenes.json
INFO:root:Model definition loaded from disk
INFO:root:Loading weights from /home/ramiro/AnacondaProjects/Leadaki/intent-based-bot/dist/models/model_origenes.h5
INFO:root:Model weights loaded from disk
INFO:root:Loading encoder from /home/ramiro/AnacondaProjects/Leadaki/intent-based-bot/dist/models/classes_origenes.npy
INFO:root:Encoder definition loaded from disk


In [13]:
tokenize_utterance = lambda utterance: [token for token in tf.filter_tokenize(utterance) if token not in tf.get_stop_words_es()]
tokenize_batch = lambda utterances: [' '.join(tokenize_utterance(utterance)) for utterance in utterances]

In [17]:
utterances = [
    'hola muy buenos dias quiero sacar un seguro de vida para mi mama y para mi',
    'quiero saber si estamos cubiertos',
    'me robaron el celular en constitucion'
]

In [18]:
test_data = preprocessing(tokenize_batch(utterances), tokenizer)
predictions = model.predict_classes(test_data, batch_size=1, verbose=1)



In [19]:
for i in range(0, len(predictions)):
    print('Class for {}: {}'.format(utterances[i], encoder.inverse_transform(predictions[i])))

Class for hola muy buenos dias quiero sacar un seguro de vida para mi mama y para mi: seguro vida
Class for quiero saber si estamos cubiertos: solicitud informacion
Class for me robaron el celular en constitucion: robo celular


  if diff:
  if diff:
  if diff:
