# Introdução

In [2]:
import math
import time
import requests
import unicodedata

import numpy as np
from numpy import array
import tensorflow as tf 
from tensorflow import keras

import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Input
from keras.layers import Activation
from keras.layers import GlobalMaxPool1D
from keras.layers import Dropout, Conv1D, GlobalMaxPooling1D
from keras.layers import LSTM
from keras.layers import Embedding
from keras.preprocessing.text import one_hot
from keras.utils import pad_sequences


from sklearn.model_selection import train_test_split

In [3]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

# Dataset

In [None]:
import json
import io
import shutil

# Read list to memory
def read_list(url):
    myfile = requests.get(url)
    myfile.raise_for_status()
    n_list = json.load(io.BytesIO(myfile.content))
    return n_list

def read_labels(url):
    response = requests.get(url)
    response.raise_for_status()
    data = np.load(io.BytesIO(response.content))
    return np.array(data)

def get_model(filename, url):
    response = requests.get(url, stream=True)
    with open(filename, 'wb') as fin:
        shutil.copyfileobj(response.raw, fin)

In [None]:
sentencesMCTIList = read_list("https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/xp8_list.json?raw=true")
labels = read_labels("https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/labels.npy?raw=true")

In [49]:
vocab = []
for sentence in sentencesMCTIList:
  for word in sentence:
    if word not in vocab:
      vocab.append(word)

vocab_size = len(set(vocab))
input_vector = []
i = 0
for sentence in sentencesMCTIList:
  input_vector.append([one_hot(word, vocab_size, filters='') for word in sentence])

max_size = 0
for sentence in input_vector:
  if len(sentence) > max_size:
    max_size = len(sentence)

input_vector = pad_sequences(input_vector, maxlen=max_size, padding='pre')

# Split dataset into train and test data
x_train, x_test, y_train, y_test = train_test_split(input_vector, labels, test_size=0.20, random_state=20)


## SNN

In [None]:
get_model("best weights SNN.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/Pesos/best%20weights%20SNN.h5?raw=true")

In [None]:
path = "best weights SNN.h5"

reconstructed_model_NN = keras.models.load_model(path, 
                                                 custom_objects={'f1_m':f1_m, 
                                                                 "precision_m":precision_m, 
                                                                 "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_NN.evaluate(x_test, 
                                                                              y_test, 
                                                                              verbose=0)

print("XP8:")
print('Accuracy NN: %f' % (accuracy*100))
print('f1_score NN: %f' % (f1_score*100))
print('precision NN: %f' % (precision*100))
print('recall NN: %f' % (recall*100))

XP8:
Accuracy NN: 92.473119
f1_score NN: 88.460702
precision NN: 100.000000
recall NN: 79.660153


## DNN

In [None]:
get_model("best weights DNN.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/Pesos/best%20weights%20DNN.h5?raw=true")

In [None]:
path = "best weights DNN.h5"

reconstructed_model_DNN = keras.models.load_model(path, 
                                                 custom_objects={'f1_m':f1_m, 
                                                                 "precision_m":precision_m, 
                                                                 "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_DNN.evaluate(x_test, 
                                                                              y_test, 
                                                                              verbose=0)

print('Base:')
print('Accuracy DNN: %f' % (accuracy*100))
print('f1_score DNN: %f' % (f1_score*100))
print('precision DNN: %f' % (precision*100))
print('recall DNN: %f' % (recall*100))

Base:
Accuracy DNN: 89.784944
f1_score DNN: 84.410125
precision DNN: 92.567152
recall DNN: 77.808303


## CNN

In [None]:
get_model("best weights CNN.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/Pesos/best%20weights%20CNN.h5?raw=true")

In [None]:
path = "best weights CNN.h5"

reconstructed_model_cnn = keras.models.load_model(path, 
                                                 custom_objects={'f1_m':f1_m, 
                                                                 "precision_m":precision_m, 
                                                                 "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_cnn.evaluate(x_test, 
                                                                              y_test, 
                                                                              verbose=0)

print('Base:')
print('Accuracy cnn: %f' % (accuracy*100))
print('f1_score cnn: %f' % (f1_score*100))
print('precision cnn: %f' % (precision*100))
print('recall cnn: %f' % (recall*100))

Base:
Accuracy cnn: 93.010753
f1_score cnn: 89.913744
precision cnn: 95.694447
recall cnn: 85.175008


## LSTM

In [None]:
get_model("best weights LSTM.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/Pesos/best%20weights%20LSTM.h5?raw=true")

In [None]:
path = "best weights LSTM.h5"

reconstructed_model_lstm = keras.models.load_model(path, 
                                                 custom_objects={'f1_m':f1_m, 
                                                                 "precision_m":precision_m, 
                                                                 "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_lstm.evaluate(x_test, 
                                                                              y_test, 
                                                                              verbose=0)

print('Base:')
print('Accuracy lstm: %f' % (accuracy*100))
print('f1_score lstm: %f' % (f1_score*100))
print('precision lstm: %f' % (precision*100))
print('recall lstm: %f' % (recall*100))

Base:
Accuracy lstm: 93.010753
f1_score lstm: 88.937265
precision lstm: 95.535713
recall lstm: 83.323151


In [69]:
print(tf.__version__)

2.9.2


In [70]:
!pip show keras

Name: keras
Version: 2.9.0
Summary: Deep learning for humans.
Home-page: https://keras.io/
Author: Keras team
Author-email: keras-users@googlegroups.com
License: Apache 2.0
Location: /usr/local/lib/python3.7/dist-packages
Requires: 
Required-by: tensorflow, keras-vis
