###Imports###

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.23.1-py3-none-any.whl (5.3 MB)
[K     |████████████████████████████████| 5.3 MB 15.0 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 56.9 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 65.8 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.1 transformers-4.23.1


In [None]:
import json
import keras
import requests
import numpy as np
import pandas as pd

from transformers import LongformerTokenizerFast, TFLongformerModel

from keras import backend as K
from keras.utils.data_utils import pad_sequences
from sklearn.model_selection import train_test_split

###Functions###

In [None]:
import json
import io
import shutil

# Read list to memory
def read_list(url):
    myfile = requests.get(url)
    myfile.raise_for_status()
    n_list = json.load(io.BytesIO(myfile.content))
    return n_list

def read_labels(url):
    response = requests.get(url)
    response.raise_for_status()
    data = np.load(io.BytesIO(response.content))
    return np.array(data)

def get_model(filename, url):
    response = requests.get(url, stream=True)
    with open(filename, 'wb') as fin:
        shutil.copyfileobj(response.raw, fin)

In [None]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

###Load pre-processed Dataset###

In [None]:
sentencesMCTI = read_list("https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/xp8_sent.json?raw=true")
labels = read_labels("https://github.com/chap0lin/PPF-MCTI/blob/master/Pre-processamento/labels.npy?raw=true")

###Load Longformer model###

In [None]:
model = TFLongformerModel.from_pretrained('allenai/longformer-base-4096',
                                                          gradient_checkpointing=False,
                                                          attention_window = 512)

tokenizer = LongformerTokenizerFast.from_pretrained('allenai/longformer-base-4096', max_length = 4096)

Downloading:   0%|          | 0.00/694 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/765M [00:00<?, ?B/s]

Some layers from the model checkpoint at allenai/longformer-base-4096 were not used when initializing TFLongformerModel: ['lm_head']
- This IS expected if you are initializing TFLongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFLongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFLongformerModel were initialized from the model checkpoint at allenai/longformer-base-4096.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFLongformerModel for predictions without further training.


Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
encodings = []
for sentence in sentencesMCTI:
  encoding = tokenizer(sentence, return_tensors="tf",  padding = 'max_length', truncation=True)
  encodings.append(encoding)

In [None]:
model_outputs = []
for encoding in encodings:
  input_ids = encoding["input_ids"]
  output_without_mask = model(input_ids)[0]
  model_outputs.append(output_without_mask.numpy())

model_outputs = np.array(model_outputs)

In [None]:
model_outputs = np.reshape(model_outputs, (928,4096,768))

In [None]:
x_trainMCTI, x_testMCTI, y_trainMCTI, y_testMCTI = train_test_split(model_outputs, labels, test_size=0.20, random_state=20)

In [None]:
del model_outputs

###Evaluate###

In [None]:
get_model("best weights LSTM.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Meta10/pesos/longformer/best%20weights%20LSTM.h5?raw=true")

In [None]:
reconstructed_model_LSTM = keras.models.load_model("best weights LSTM.h5", 
                                                   custom_objects={'f1_m':f1_m, 
                                                                   "precision_m":precision_m, 
                                                                   "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_LSTM.evaluate(x_testMCTI, 
                                                                     y_testMCTI, 
                                                                     verbose=0)
print('Accuracy LSTM: %f' % (accuracy*100))
print('f1_score LSTM: %f' % (f1_score*100))
print('precision LSTM: %f' % (precision*100))
print('recall LSTM: %f' % (recall*100))

Accuracy LSTM: 61.290324
f1_score LSTM: 0.000000
precision LSTM: 0.000000
recall LSTM: 0.000000


In [None]:
get_model("best weights CNN.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Meta10/pesos/longformer/best%20weights%20CNN.h5?raw=true")

In [None]:
reconstructed_model_CNN = keras.models.load_model("best weights CNN.h5", 
                                                   custom_objects={'f1_m':f1_m, 
                                                                   "precision_m":precision_m, 
                                                                   "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_CNN.evaluate(x_testMCTI, 
                                                                     y_testMCTI, 
                                                                     verbose=0)
print('Accuracy CNN: %f' % (accuracy*100))
print('f1_score CNN: %f' % (f1_score*100))
print('precision CNN: %f' % (precision*100))
print('recall CNN: %f' % (recall*100))

Accuracy CNN: 94.086021
f1_score CNN: 90.685892
precision CNN: 100.000000
recall CNN: 83.414727


In [None]:
get_model("best weights DNN.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Meta10/pesos/longformer/best%20weights%20DNN.h5?raw=true")

In [None]:
reconstructed_model_DNN = keras.models.load_model("best weights DNN.h5", 
                                                   custom_objects={'f1_m':f1_m, 
                                                                   "precision_m":precision_m, 
                                                                   "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_DNN.evaluate(x_testMCTI, 
                                                                     y_testMCTI, 
                                                                     verbose=0)
print('Accuracy DNN: %f' % (accuracy*100))
print('f1_score DNN: %f' % (f1_score*100))
print('precision DNN: %f' % (precision*100))
print('recall DNN: %f' % (recall*100))

Accuracy DNN: 91.935486
f1_score DNN: 87.619048
precision DNN: 97.619051
recall DNN: 80.372405


In [None]:
get_model("best weights SNN.h5", "https://github.com/chap0lin/PPF-MCTI/blob/master/Meta10/pesos/longformer/best%20weights%20SNN.h5?raw=true")

In [None]:
reconstructed_model_SNN = keras.models.load_model("best weights SNN.h5", 
                                                   custom_objects={'f1_m':f1_m, 
                                                                   "precision_m":precision_m, 
                                                                   "recall_m":recall_m})
# evaluate the model
loss, accuracy, f1_score, precision, recall = reconstructed_model_SNN.evaluate(x_testMCTI, 
                                                                     y_testMCTI, 
                                                                     verbose=0)
print('Accuracy SNN: %f' % (accuracy*100))
print('f1_score SNN: %f' % (f1_score*100))
print('precision SNN: %f' % (precision*100))
print('recall SNN: %f' % (recall*100))

Accuracy SNN: 61.290324
f1_score SNN: 0.000000
precision SNN: 0.000000
recall SNN: 0.000000
