###Imports###

In [1]:
import json
import keras
import requests
import numpy as np
import pandas as pd

from gensim.models import Word2Vec
from gensim.models.callbacks import CallbackAny2Vec

from keras import backend as K
from keras.utils.data_utils import pad_sequences
from sklearn.model_selection import train_test_split

###Functions###

In [2]:
import json
import io
import shutil

# Read list to memory
def read_list(url):
    myfile = requests.get(url)
    myfile.raise_for_status()
    n_list = json.load(io.BytesIO(myfile.content))
    return n_list

def read_labels(url):
    response = requests.get(url)
    response.raise_for_status()
    data = np.load(io.BytesIO(response.content))
    return np.array(data)

def get_model(filename, url):
    response = requests.get(url, stream=True)
    with open(filename, 'wb') as fin:
        shutil.copyfileobj(response.raw, fin)

In [3]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

###Load pre-processed Dataset###

In [4]:
sentencesMCTIList = read_list("https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/xp8_list.json?raw=true")
labels = read_labels("https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/labels.npy?raw=true")

###Load Word2Vec Model###

In [5]:
#initialise callback class
class callback(CallbackAny2Vec):
  """
  Print the loss value after each epoch
  """
  def __init__(self):
    self.epoch = 0
    #gensim loss is cumulative, so we record previous values to print
    self.loss_previous_step = 0 

  def on_epoch_end(self, model):
    loss = model.get_latest_training_loss()
    if self.epoch % 100 == 0:
      print('Loss after epoch {}: {}'.format(self.epoch, loss-self.loss_previous_step))

    self.epoch+= 1
    self.loss_previous_step = loss

In [6]:
get_model("word2vec_xp8.model", "https://github.com/chap0lin/PPF-MCTI/blob/master/Meta10/pesos/word2vec/word2vec_xp8.model?raw=true")

In [7]:
reloaded_w2v_model = Word2Vec.load('word2vec_xp8.model')
words = list(reloaded_w2v_model.wv.vocab)

In [8]:
mapped_words = []
for word in words:
  mapped_words.append(reloaded_w2v_model.wv[word])

In [9]:
# create training testing data

MCTIinput_vector = []
for sentence in sentencesMCTIList:
  aux_vector = []
  for word in sentence:
    aux_vector.append(reloaded_w2v_model.wv[word])
  MCTIinput_vector.append(aux_vector)

max_sizeMCTI = 0
for sentence in MCTIinput_vector:
  if len(sentence) > max_sizeMCTI:
    max_sizeMCTI = len(sentence)
  

lengthMCTI = max_sizeMCTI
MCTIinput_vector = pad_sequences(MCTIinput_vector, maxlen=lengthMCTI, padding='pre')


#MCTI
x_trainMCTI, x_testMCTI, y_trainMCTI, y_testMCTI = train_test_split(MCTIinput_vector, 
                                                                    labels, 
                                                                    test_size=0.20, 
                                                                    random_state=20)

###Evaluate###

In [21]:
get_model("best weights LSTM.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Meta10/pesos/word2vec/best%20weights%20LSTM.h5?raw=true")

In [22]:
reconstructed_model_LSTM = keras.models.load_model("best weights LSTM.h5", 
                                                   custom_objects={'f1_m':f1_m, 
                                                                   "precision_m":precision_m, 
                                                                   "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_LSTM.evaluate(x_testMCTI, 
                                                                     y_testMCTI, 
                                                                     verbose=0)
print('Accuracy LSTM: %f' % (accuracy*100))
print('f1_score LSTM: %f' % (f1_score*100))
print('precision LSTM: %f' % (precision*100))
print('recall LSTM: %f' % (recall*100))

Accuracy LSTM: 89.784944
f1_score LSTM: 85.067540
precision LSTM: 97.095960
recall LSTM: 75.905579


In [17]:
get_model("best weights CNN.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Meta10/pesos/word2vec/best%20weights%20CNN.h5?raw=true")

In [18]:
reconstructed_model_CNN = keras.models.load_model("best weights CNN.h5", 
                                                   custom_objects={'f1_m':f1_m, 
                                                                   "precision_m":precision_m, 
                                                                   "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_CNN.evaluate(x_testMCTI, 
                                                                     y_testMCTI, 
                                                                     verbose=0)
print('Accuracy CNN: %f' % (accuracy*100))
print('f1_score CNN: %f' % (f1_score*100))
print('precision CNN: %f' % (precision*100))
print('recall CNN: %f' % (recall*100))

Accuracy CNN: 92.473119
f1_score CNN: 88.419127
precision CNN: 98.717946
recall CNN: 80.850625


In [19]:
get_model("best weights DNN.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Meta10/pesos/word2vec/best%20weights%20DNN.h5?raw=true")

In [20]:
reconstructed_model_DNN = keras.models.load_model("best weights DNN.h5", 
                                                   custom_objects={'f1_m':f1_m, 
                                                                   "precision_m":precision_m, 
                                                                   "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_DNN.evaluate(x_testMCTI, 
                                                                     y_testMCTI, 
                                                                     verbose=0)
print('Accuracy DNN: %f' % (accuracy*100))
print('f1_score DNN: %f' % (f1_score*100))
print('precision DNN: %f' % (precision*100))
print('recall DNN: %f' % (recall*100))

Accuracy DNN: 90.322578
f1_score DNN: 86.522371
precision DNN: 88.696486
recall DNN: 85.175008


In [21]:
get_model("best weights SNN.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Meta10/pesos/word2vec/best%20weights%20SNN.h5?raw=true")

In [22]:
reconstructed_model_SNN = keras.models.load_model("best weights SNN.h5", 
                                                   custom_objects={'f1_m':f1_m, 
                                                                   "precision_m":precision_m, 
                                                                   "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_SNN.evaluate(x_testMCTI, 
                                                                     y_testMCTI, 
                                                                     verbose=0)
print('Accuracy SNN: %f' % (accuracy*100))
print('f1_score SNN: %f' % (f1_score*100))
print('precision SNN: %f' % (precision*100))
print('recall SNN: %f' % (recall*100))

Accuracy SNN: 89.247310
f1_score SNN: 83.824784
precision SNN: 97.095960
recall SNN: 74.145293
