In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pickle
import keras

from keras_preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras_preprocessing import sequence
from keras.preprocessing import text

Parameters

In [2]:
model_path = '../models/tc_model_1_1680701562.h5'
dictionary_path = '../data/small_dictionary_1680697004.pickle'
tokenizer_path = '../models/tokenizer_model_1_1680697854.pickle'

The method that loads model & tokenizer & dictionary

In [3]:
def load(model_path, tokenizer_path, dictionary_path):
    with open(tokenizer_path, 'rb') as file:
        tokenizer = pickle.load(file)
        
    with open(dictionary_path, 'rb') as file:
        dictionary = pickle.load(file)
        dictionary = {value: key for key, value in dictionary.items()}
        
    model = keras.models.load_model(model_path)
    
    return model, tokenizer, dictionary

The method takes model, tokenizer & the input text to calculate the interest vector

In [4]:
def calculate_vector(model, tokenizer, text):
    sequences = tokenizer.texts_to_sequences([text])
    maxlen = model.layers[0].input_length
    x = pad_sequences(sequences, maxlen=maxlen)
    return model.predict(x, verbose=0)[0]

The method that calculates difference between user profile and the places profiles

In [5]:
def mse(vector, place_vector):
    return np.square(vector - place_vector).mean()

The method sorts places by the best fitting ones

In [6]:
def fit_places(vector, places, difference_metric=mse):
    buffer = []
    for place in places:
        buffer.append((place, difference_metric(vector, place[7])))
        
    buffer.sort(key=lambda x: x[1])
    return [b[0] for b in buffer], [b[1] for b in buffer]

### Predict the result on given input text

Set the text for prediction

In [7]:
text = 'I really like eating in bars like that. I would like to drink more beer'

Load model & tokenizer

In [8]:
model, tokenizer, dictionary = load(model_path, tokenizer_path, dictionary_path)

FileNotFoundError: [Errno 2] No such file or directory: '../models/tokenizer_model_1_1680697854.pickle'

Calculate the result vector

In [None]:
result = calculate_vector(model, tokenizer, text)

Load places data

In [None]:
with open('../data/data_krakow_vectorized_1680882410', 'rb') as file:
    places = pickle.load(file)

Sort those places by user interests

In [None]:
places_sorted, distances = fit_places(result, places)

### Visualize the result

The method decodes the result vector to human readable form

In [11]:
def visualize_vector(vector, dictionary, text):
    label_weight = [(dictionary[index], weight) for index, weight in enumerate(vector)]
    label_weight.sort(key=lambda x: x[1], reverse=True)
    y, x = zip(*label_weight)
    x = np.array(x)
    x *= 100
    
    plt.figure(figsize=(10, 0.25*len(y)))
    plt.ylabel('Category')
    plt.xlabel('Weight in %')
    plt.title(f'Result for text "{text}"')

    plt.plot(x, y, 'r.', label='Category weight')
    plt.plot(x, y, 'b', label='Category weight (line)', linewidth=0.5)
    plt.xticks(np.arange(0, 101, 10))
    
    plt.legend()
    plt.show()

The method presents the sorting results

In [12]:
def visualize_places(places_sorted, distances, dictionary, text):
    y = [f'{place[0]} [{dictionary[np.argmax(place[7])]}]' for place in places_sorted]
    x = np.array(distances, dtype='float64')*100
    
    plt.figure(figsize=(10, 0.25*len(y)))
    plt.ylabel('Place name with best fitting category')
    plt.xlabel('Difference in %')
    plt.title(f'Best fitting places for text "{text}"')

    plt.plot(x, y, 'r.', label='Difference')
    plt.plot(x, y, 'b', label='Difference (line)', linewidth=0.5)
    plt.xticks(np.arange(0, 101, 10))
    
    plt.legend()
    plt.show()

In [13]:
visualize_vector(result, dictionary, text)

NameError: name 'result' is not defined

In [None]:
visualize_places(places_sorted, distances, dictionary, text)

In [24]:
print('x')

x
