# Prototype d'un réseau de neurone pour la désambiguisation du corpus termITH

je me suis basé sur le git ci-dessous pour concevoir ce prototype : 

- Author: Aymeric Damien
- Project: https://github.com/aymericdamien/TensorFlow-Examples/

In [1]:
from __future__ import print_function
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import json
import os
import shutil


In [2]:
# corpus loader
path = './resources/test/'
context = '1.json'
corpus_words = [i.strip() for i in open(path + "corpus").readlines()]
print("corpus words : ", corpus_words)
train = json.load(open(path + "training/" + context))
print("train corpus : ", train)
evaluation = json.load(open(path +"evaluation/" + context))
print("evaluation corpus : ", evaluation)

# Parameters
learning_rate = 0.1
num_steps = 1000
batch_size = 128
display_step = 100

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = len(corpus_words) # MNIST data input (img shape: 28*28)
num_classes = 2 # MNIST total classes (0-9 digits)

corpus words :  ['patate', 'banane', 'biche', 'ouaf', 'carotte', 'chien', 'girafe']
train corpus :  [{'context': {'banane': 0.254, 'biche': 0.4521, 'carotte': 0.3545, 'patate': 0.8454, 'ouaf': 0.84}, 'type': 'lexOn'}, {'context': {'banane': 0.254, 'carotte': 0.354, 'patate': 0.7854, 'girafe': 0.84, 'chien': 0.12145}, 'type': 'lexOff'}, {'context': {'banane': 0.254, 'carotte': 0.354, 'patate': 0.7854, 'girafe': 0.84, 'chien': 0.12145}, 'type': 'lexOff'}, {'context': {'banane': 0.254, 'carotte': 0.354, 'patate': 0.7854, 'girafe': 0.84, 'chien': 0.12145}, 'type': 'lexOn'}]
evaluation corpus :  [{'context': {'banane': 0.254, 'carotte': 0.354, 'patate': 0.7854, 'girafe': 0.84, 'chien': 0.12145}, 'type': 'DM1'}, {'context': {'banane': 0.254, 'carotte': 0.354, 'girafe': 0.84, 'chien': 0.12145}, 'type': 'DM1'}, {'context': {'banane': 0.254, 'carotte': 0.354, 'patate': 0.7854, 'girafe': 0.84, 'chat': 0.12145}, 'type': 'DM1'}, {'context': {'carotte': 0.354, 'girafe': 0.2, 'chien': 0.12145}, 'typ

In [3]:
# define the function for deserialize training data
def deserialize(corpus, corpus_words):
    label = []
    contexts = []
    for el in corpus:
        contexts.append([])
        for word in corpus_words:
            if word in el['context'].keys():
                contexts[-1].append(el['context'][word])
            else:
                contexts[-1].append(0)
        label.append(el['type'])
    return np.array([1 if la == 'lexOn' else 0 for la in label]), np.array(contexts)

In [4]:
# Define the input function for training
l, w_s = deserialize(train, corpus_words)
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'spec_scores': w_s}, y=l,
    batch_size=128, num_epochs=None, shuffle=True)

In [5]:
# Define the neural network
def neural_net(x_dict):
    # TF Estimator input is a dict, in case of multiple inputs
    x = x_dict['spec_scores']
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.layers.dense(x, n_hidden_1)
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.layers.dense(layer_1, n_hidden_2)
    # Output fully connected layer with a neuron for each class
    out_layer = tf.layers.dense(layer_2, num_classes)
    return out_layer

In [6]:
# Define the model function (following TF Estimator Template)
def model_fn(features, labels, mode):
    
    # Build the neural network
    logits = neural_net(features)
    
    # Predictions
    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)
    
    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes) 
        
    # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=tf.cast(labels, dtype=tf.int32)))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
    
    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
    
    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
      mode=mode,
      predictions=pred_classes,
      loss=loss_op,
      train_op=train_op,
      eval_metric_ops={'accuracy': acc_op})

    return estim_specs

In [7]:
# Build the Estimator
model = tf.estimator.Estimator(model_fn)

INFO:tensorflow:Using default config.


In [8]:
# Train the Model
model.train(input_fn, steps=num_steps)

INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp9e03t0tg/model.ckpt.


INFO:tensorflow:step = 1, loss = 0.6595628733382045


INFO:tensorflow:global_step/sec: 206.573


INFO:tensorflow:step = 101, loss = 0.3996654142131645 (0.486 sec)


INFO:tensorflow:global_step/sec: 260.28


INFO:tensorflow:step = 201, loss = 0.495567385612934 (0.383 sec)


INFO:tensorflow:global_step/sec: 263.384


INFO:tensorflow:step = 301, loss = 0.4937328977651102 (0.380 sec)


INFO:tensorflow:global_step/sec: 276.845


INFO:tensorflow:step = 401, loss = 0.49586190679069303 (0.362 sec)


INFO:tensorflow:global_step/sec: 262.791


INFO:tensorflow:step = 501, loss = 0.5078851129348199 (0.379 sec)


INFO:tensorflow:global_step/sec: 262.561


INFO:tensorflow:step = 601, loss = 0.4632469484187212 (0.381 sec)


INFO:tensorflow:global_step/sec: 255.141


INFO:tensorflow:step = 701, loss = 0.48306813861943715 (0.392 sec)


INFO:tensorflow:global_step/sec: 260.646


INFO:tensorflow:step = 801, loss = 0.5746175148434394 (0.384 sec)


INFO:tensorflow:global_step/sec: 269.484


INFO:tensorflow:step = 901, loss = 0.40085046968455007 (0.371 sec)


INFO:tensorflow:Saving checkpoints for 1000 into /tmp/tmp9e03t0tg/model.ckpt.


INFO:tensorflow:Loss for final step: 0.49959980627557254.


<tensorflow.python.estimator.estimator.Estimator at 0x7fc09c1dcd68>

In [9]:
# Evaluate the Model
# Define the input function for evaluating
# input_fn = tf.estimator.inputs.numpy_input_fn(
#     x={'images': mnist.test.images}, y=mnist.test.labels,
#     batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
# model.evaluate(input_fn)

In [10]:
e_l, e_w_s = deserialize(evaluation, corpus_words)
# Prepare the input data
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'spec_scores': e_w_s}, shuffle=False)
# Use the model to predict the images class
preds = list(model.predict(input_fn))
# Display
for i in range(len(e_w_s)):
    print("Model prediction:", preds[i])


INFO:tensorflow:Restoring parameters from /tmp/tmp9e03t0tg/model.ckpt-1000


Model prediction: 0
Model prediction: 0
Model prediction: 0
Model prediction: 1


In [11]:
# write results on json
for idx, e in enumerate(evaluation):
    e['result'] = 'DaOn' if preds[idx] == 1 else 'DaOff'
if os.path.exists(path + 'result/'):
    shutil.rmtree(path + 'result/')
os.makedirs(path + 'result/')
with open(path + 'result/' + context, 'w') as outfile:
    json.dump(evaluation, outfile, indent=4)
