# Algorithm function

The algorithm function retrieves the trained algorithm and dictionaries, and generate prediction based on input. 

In [42]:
import json
import numpy as np
import keras
import keras.preprocessing.text as kpt
from keras.preprocessing.text import Tokenizer
from keras.models import model_from_json
import tensorflow as tf

In [43]:
max_words = 5000

# we're still going to use a Tokenizer here, but we don't need to fit it
tokenizer = Tokenizer(num_words=max_words)
# for human-friendly printing
labels = ['negative', 'positive']

# read in our saved dictionary
with open('dictionary.json', 'r') as dictionary_file:
    dictionary = json.load(dictionary_file)

In [44]:
# this utility makes sure that all the words in your input
# are registered in the dictionary
# before trying to turn them into a matrix.
def convert_text_to_index_array(text):
    words = kpt.text_to_word_sequence(text)
    wordIndices = []
    for word in words:
        if word in dictionary:
            wordIndices.append(dictionary[word])
        else:
            print("'%s' not in training corpus; ignoring." %(word))
    return wordIndices

# read in your saved model structure
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
# and create a model from that
model = model_from_json(loaded_model_json)
# and weight your nodes with your saved values
model.load_weights('model.h5')

In [45]:
def algorithm(evalSentence):
    if len(evalSentence) == 0:
        return (-1,-1)

    testArr = convert_text_to_index_array(evalSentence)
    data = tokenizer.sequences_to_matrix([testArr], mode='binary')
    
    pred = model.predict(data)
    
    result = [np.argmax(pred), pred[0][np.argmax(pred)]]
    
    return result

Testing our function with a news article.

In [46]:
algorithm("""Comme partout en France, la main-mise de la franc-maçonnerie sur la politique persiste en Corse. Le 1er mars 2018, Paul Scaglia, Grand Maître Provincial de la GLNF depuis 2010, a été élu président du Conseil Economique, Social, Environnemental et Culturel de Corse (CESEC) par 30 voix contre 28.

 

Pour Henri Franceschi, qui était également candidat à la présidence du CESEC, il ne fait aucun doute que ce sont les réseaux maçonniques qui ont accordé le poste à Paul Scaglia.

Auparavant, Paul Scaglia avait déjà mobilisé les mêmes réseaux maçonniques pour prendre la présidence du Tribunal de Commerce d’Ajaccio.""")

'scaglia' not in training corpus; ignoring.
'economique' not in training corpus; ignoring.
'cesec' not in training corpus; ignoring.
'franceschi' not in training corpus; ignoring.
'cesec' not in training corpus; ignoring.
'scaglia' not in training corpus; ignoring.
'scaglia' not in training corpus; ignoring.


[0, 0.99993765]