<a href="https://colab.research.google.com/github/loyoladesa/qoeprediction/blob/main/notebooks/Experimentos_Pytorch_Dados_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Configuração Ambiente

In [None]:
!pip install torch-geometric
!pip install torch-sparse
!pip install torch-scatter
!pip install torch-cluster
!pip install torch-spline-conv

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install 'scipy>=1.8'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install 'networkx<2.7'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip freeze

absl-py==1.4.0
alabaster==0.7.13
albumentations==1.2.1
altair==4.2.2
anyio==3.6.2
appdirs==1.4.4
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
arviz==0.15.1
astropy==5.2.2
astunparse==1.6.3
attrs==22.2.0
audioread==3.0.0
autograd==1.5
Babel==2.12.1
backcall==0.2.0
beautifulsoup4==4.11.2
bleach==6.0.0
blis==0.7.9
bokeh==2.4.3
branca==0.6.0
CacheControl==0.12.11
cached-property==1.5.2
cachetools==5.3.0
catalogue==2.0.8
certifi==2022.12.7
cffi==1.15.1
chardet==4.0.0
charset-normalizer==2.0.12
chex==0.1.7
click==8.1.3
cloudpickle==2.2.1
cmake==3.25.2
cmdstanpy==1.1.0
colorcet==3.0.1
colorlover==0.3.0
community==1.0.0b1
confection==0.0.4
cons==0.4.5
contextlib2==0.6.0.post1
contourpy==1.0.7
convertdate==2.4.0
cryptography==40.0.1
cufflinks==0.17.3
cvxopt==1.3.0
cvxpy==1.3.1
cycler==0.11.0
cymem==2.0.7
Cython==0.29.33
dask==2022.12.1
datascience==0.17.6
db-dtypes==1.1.1
dbus-python==1.2.16
debugpy==1.6.6
decorator==4.4.2
defusedxml==0.7.1
distributed==2022.12.1
dlib==19.24.1
dm-tree==0.1.

## Graph Attention Networks


In [None]:
# We assume that PyTorch is already installed
import torch
torchversion = torch.__version__

# Install PyTorch Scatter, PyTorch Sparse, and PyTorch Geometric
#!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-{torchversion}.html
#!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-{torchversion}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

# Numpy for matrices
import numpy as np
np.random.seed(0)

# Visualization
import networkx as nx
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for torch_geometric (pyproject.toml) ... [?25l[?25hdone


## Implement GAT vs. GCN

In [None]:
import torch.nn.functional as F
from torch.nn import Linear, Dropout
from torch_geometric.nn import GCNConv, GATv2Conv


class GCN(torch.nn.Module):
  """Graph Convolutional Network"""
  def __init__(self, dim_in, dim_h, dim_out):
    super().__init__()
    self.gcn1 = GCNConv(dim_in, dim_h)
    self.gcn2 = GCNConv(dim_h, dim_out)
    self.optimizer = torch.optim.Adam(self.parameters(),
                                      lr=0.01,
                                      weight_decay=5e-4)

  def forward(self, x, edge_index):
    h = F.dropout(x, p=0.5, training=self.training)
    h = self.gcn1(h, edge_index)
    h = torch.relu(h)
    h = F.dropout(h, p=0.5, training=self.training)
    h = self.gcn2(h, edge_index)
    return h, F.log_softmax(h, dim=1)


class GAT(torch.nn.Module):
  """Graph Attention Network"""
  def __init__(self, dim_in, dim_h, dim_out, heads=8):
    super().__init__()
    self.gat1 = GATv2Conv(dim_in, dim_h, heads=heads)
    self.gat2 = GATv2Conv(dim_h*heads, dim_out, heads=1)
    self.optimizer = torch.optim.Adam(self.parameters(),
                                      lr=0.005,
                                      weight_decay=5e-4)

  def forward(self, x, edge_index):
    h = F.dropout(x, p=0.6, training=self.training)
    h = self.gat1(x, edge_index)
    h = F.elu(h)
    h = F.dropout(h, p=0.6, training=self.training)
    h = self.gat2(h, edge_index)
    return h, F.log_softmax(h, dim=1)

def accuracy(pred_y, y):
    """Calculate accuracy."""
    return ((pred_y == y).sum() / len(y)).item()

def precision(pred_y, y):
    """Calculate accuracy."""
    pred_positivos = (pred_y == 1)  
    real_positivos = (y == 1)
    tp = torch.bitwise_and(pred_positivos,real_positivos).sum()      
       
    return (tp/pred_positivos.sum())

def recall(pred_y, y):
    """Calculate accuracy."""
    pred_positivos = (pred_y == 1)  
    real_positivos = (y == 1)
    tp = torch.bitwise_and(pred_positivos,real_positivos).sum()

    pred_negativos = (pred_y == 0)  
    real_negativos = (y == 0)
    tn = torch.bitwise_and(pred_negativos,real_negativos).sum()
    fn = pred_negativos.sum() - tn

    return (tp/(tp+fn))

def f1_score(pred_y, y):
    """Calculate accuracy."""
    prec = precision(pred_y,y)
    rec = recall(pred_y,y)
    
    return ((2*rec*prec)/(rec+prec))

def train(model, data):
    """Train a GNN model and return the trained model."""
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = model.optimizer
    epochs = 200

    model.train()
    for epoch in range(epochs+1):
        # Training
        optimizer.zero_grad()
        _, out = model(data.x, data.edge_index)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        acc = accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        # Validation
        val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
        val_acc = accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])

        # Print metrics every 10 epochs
        if(epoch % 10 == 0):
            print(f'Epoch {epoch:>3} | Train Loss: {loss:.3f} | Train Acc: '
                  f'{acc*100:>6.2f}% | Val Loss: {val_loss:.2f} | '
                  f'Val Acc: {val_acc*100:.2f}%')
          
    return model

def test(model, data):
    """Evaluate the model on test set and print the accuracy score."""
    model.eval()
    _, out = model(data.x, data.edge_index)
    acc = accuracy(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
    prec = precision(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
    rec = recall(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
    f1 = f1_score(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
    return acc,prec,rec,f1

# Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Experimento Classificação Dataset Completo

## Dataset RNP

### Carregar Vértices

In [None]:
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

In [None]:
path_dataset_metricas_rnp_transposto = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_dataset_metricas_rnp_transposto_final_semana = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_final_semana.csv'
path_dataset_metricas_rnp_normalizado = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_3_normalizado.csv'
path_dataset_rnp_3 = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_testes_2.csv'

In [None]:
dados = pd.read_csv(path_dataset_metricas_rnp_transposto, delimiter=",")
dados = dados.T
dados

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,215,216,217,218,219,220,221,222,223,224
id,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,215.0,216.0,217.0,218.0,219.0,220.0,221.0,222.0,223.0,224.0
f_0,845572.0,820199.0,112000.0,7840729.0,866019.0,858103.0,884162.0,839173.0,4102432.0,869847.0,...,58.0,53.0,58.0,0.0,0.0,58.0,56.0,54.0,58.0,58.0
f_1,847565.0,787375.0,112000.0,3424706.0,864759.0,878133.0,890030.0,869218.0,3399700.0,860043.0,...,58.0,53.0,58.0,0.0,0.0,58.0,56.0,54.0,58.0,58.0
f_2,803419.0,831836.0,112000.0,3847770.0,861982.0,694367.0,904923.0,852103.0,3679127.0,817417.0,...,58.0,53.0,58.0,0.0,0.0,58.0,56.0,54.0,58.0,58.0
f_3,355415.0,808513.0,112000.0,7283799.0,854601.0,873149.0,932813.0,894016.0,1751930.0,871788.0,...,58.0,53.0,58.0,0.0,0.0,58.0,56.0,54.0,58.0,58.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.0,0.0,8830.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f_24317,0.0,0.0,9080.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f_24318,0.0,0.0,52830.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
f_24319,0.0,0.0,46200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
df_max_scaled = dados.copy() 
for column in df_max_scaled.columns: 
    df_max_scaled[column] = df_max_scaled[column]  / df_max_scaled[column].max() 
      
display(df_max_scaled)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,215,216,217,218,219,220,221,222,223,224
id,0.000000,0.000001,0.000001,3.619861e-07,0.000004,0.000006,0.000006,0.000008,0.000002,0.000010,...,1.000000,1.00000,1.000000,1.0,1.0,1.000000,1.000000,1.000000,1.00000,1.000000
f_0,0.927910,0.911870,0.072964,9.460782e-01,0.965740,0.974980,0.929866,0.938655,0.807083,0.957579,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_1,0.930097,0.875378,0.072964,4.132319e-01,0.964335,0.997738,0.936037,0.972262,0.668833,0.946787,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_2,0.881652,0.924808,0.072964,4.642797e-01,0.961239,0.788942,0.951700,0.953118,0.723805,0.899861,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_3,0.390024,0.898878,0.072964,8.788779e-01,0.953008,0.992075,0.981032,1.000000,0.344662,0.959716,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.000000,0.000000,0.005752,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24317,0.000000,0.000000,0.005915,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24318,0.000000,0.000000,0.034417,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24319,0.000000,0.000000,0.030098,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000


In [None]:
df_max_scaled.to_csv(path_dataset_metricas_rnp_normalizado,index=False)

In [None]:
mediana_RTT = df_max_scaled[2].median()
print(mediana_RTT)

0.005921824104234527


In [None]:
def classificar_alvo(x):
  if x < mediana_RTT:
    return 0
  return 1

In [None]:
dados = df_max_scaled

### Obter Target

In [None]:
target =dados[2].apply(classificar_alvo)

In [None]:
target

id         0
f_0        1
f_1        1
f_2        1
f_3        1
          ..
f_24316    0
f_24317    0
f_24318    1
f_24319    1
f_24320    1
Name: 2, Length: 24322, dtype: int64

In [None]:
target.value_counts()

1    12197
0    12125
Name: 2, dtype: int64

In [None]:
dados.to_csv(path_dataset_rnp_3,index='False')

### Constantes

In [None]:
path_adj = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/grafo.adj'
path_gexf = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/grafo_causalidade_rnp_1.gexf'
path_nodes = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto.csv'
path_dataset_rnp_final = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_dataset_metricas_rnp_transposto = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_gat_model = '/content/drive/MyDrive/Seagate/RNP/gat_model_1.csv'

### Carregar Arestas

In [None]:
X = nx.read_gexf(path_gexf)

In [None]:
n_vertices = X.number_of_nodes()
n_arestas = X.number_of_edges()
print(n_arestas)
print(n_vertices)

9085
225


In [None]:
df_edge = nx.to_pandas_edgelist(X)

In [None]:
df_edge

Unnamed: 0,source,target,id
0,0,1,0
1,0,3,1
2,0,4,2
3,0,5,3
4,0,6,4
...,...,...,...
9080,224,88,9080
9081,224,96,9081
9082,224,223,9082
9083,224,78,9083


In [None]:
colunas = list(df_edge.columns)

In [None]:
convert_dict = {}
for coluna in colunas:
  convert_dict[coluna] = float

In [None]:
df_edge = df_edge.astype(convert_dict)  
print(df_edge.dtypes)

source    float64
target    float64
id        float64
dtype: object


In [None]:
df_edge

Unnamed: 0,source,target,id
0,0.0,1.0,0.0
1,0.0,3.0,1.0
2,0.0,4.0,2.0
3,0.0,5.0,3.0
4,0.0,6.0,4.0
...,...,...,...
9080,224.0,88.0,9080.0
9081,224.0,96.0,9081.0
9082,224.0,223.0,9082.0
9083,224.0,78.0,9083.0


In [None]:
df_edge = df_edge.drop(columns=['id'])
df_edge

Unnamed: 0,source,target
0,0.0,1.0
1,0.0,3.0
2,0.0,4.0
3,0.0,5.0
4,0.0,6.0
...,...,...
9080,224.0,88.0
9081,224.0,96.0
9082,224.0,223.0
9083,224.0,78.0


In [None]:
df_edge = df_edge.T
df_edge

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084
source,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0
target,1.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,...,67.0,221.0,217.0,104.0,212.0,88.0,96.0,223.0,78.0,216.0


In [None]:
df_edge.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, source to target
Columns: 9085 entries, 0 to 9084
dtypes: float64(9085)
memory usage: 142.1+ KB


In [None]:
type(df_edge.values)

numpy.ndarray

### Transformar em Tensores

In [None]:
dados

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,215,216,217,218,219,220,221,222,223,224
id,0.000000,0.000001,0.000001,3.619861e-07,0.000004,0.000006,0.000006,0.000008,0.000002,0.000010,...,1.000000,1.00000,1.000000,1.0,1.0,1.000000,1.000000,1.000000,1.00000,1.000000
f_0,0.927910,0.911870,0.072964,9.460782e-01,0.965740,0.974980,0.929866,0.938655,0.807083,0.957579,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_1,0.930097,0.875378,0.072964,4.132319e-01,0.964335,0.997738,0.936037,0.972262,0.668833,0.946787,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_2,0.881652,0.924808,0.072964,4.642797e-01,0.961239,0.788942,0.951700,0.953118,0.723805,0.899861,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_3,0.390024,0.898878,0.072964,8.788779e-01,0.953008,0.992075,0.981032,1.000000,0.344662,0.959716,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.000000,0.000000,0.005752,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24317,0.000000,0.000000,0.005915,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24318,0.000000,0.000000,0.034417,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24319,0.000000,0.000000,0.030098,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000


In [None]:
dados = dados.drop(2,axis='columns')
dados

Unnamed: 0,0,1,3,4,5,6,7,8,9,10,...,215,216,217,218,219,220,221,222,223,224
id,0.000000,0.000001,3.619861e-07,0.000004,0.000006,0.000006,0.000008,0.000002,0.000010,0.000011,...,1.000000,1.00000,1.000000,1.0,1.0,1.000000,1.000000,1.000000,1.00000,1.000000
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.974980,0.929866,0.938655,0.807083,0.957579,0.944346,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.997738,0.936037,0.972262,0.668833,0.946787,0.966271,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.788942,0.951700,0.953118,0.723805,0.899861,0.890265,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.992075,0.981032,1.000000,0.344662,0.959716,0.961735,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24317,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24318,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24319,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000


In [None]:
x = torch.tensor(dados.values, dtype=torch.float)

In [None]:
edge_index = torch.tensor(df_edge.values, dtype=torch.long)

In [None]:
y = target.to_numpy()

In [None]:
y = torch.tensor(target.values, dtype=torch.long)

## Criar Data

In [None]:
dados

Unnamed: 0,0,1,3,4,5,6,7,8,9,10,...,215,216,217,218,219,220,221,222,223,224
id,0.000000,0.000001,3.619861e-07,0.000004,0.000006,0.000006,0.000008,0.000002,0.000010,0.000011,...,1.000000,1.00000,1.000000,1.0,1.0,1.000000,1.000000,1.000000,1.00000,1.000000
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.974980,0.929866,0.938655,0.807083,0.957579,0.944346,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.997738,0.936037,0.972262,0.668833,0.946787,0.966271,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.788942,0.951700,0.953118,0.723805,0.899861,0.890265,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.992075,0.981032,1.000000,0.344662,0.959716,0.961735,...,0.269767,0.24537,0.267281,0.0,0.0,0.263636,0.253394,0.243243,0.26009,0.258929
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24317,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24318,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000
f_24319,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.00000,0.000000


### Validação Cruzada

#### K-fold

In [None]:
def kfold(k,random_indices):  
  print(random_indices)
  intervalo = len(random_indices)/k
  print('Intervalo = ',intervalo)
  folds = []
  for fold in range(k):
    if((fold+1)==k):
      dados_fold = random_indices[int(fold*intervalo):]
      folds.append(dados_fold)    
    else:
      dados_fold = random_indices[int(fold*intervalo):(int((fold+1)*intervalo))]
      folds.append(dados_fold)    
  return folds

In [None]:
random_indices = np.random.permutation(range(dados.shape[0]))

In [None]:
folds = kfold(10,random_indices)

[18617 10231 20130 ... 11980  5164 15778]
Intervalo =  2432.2


In [None]:
type(folds[2])

numpy.ndarray

#### CrossVal

In [None]:
from numpy.core.multiarray import concatenate
soma_acc=0
soma_prec=0
soma_rec=0
soma_f1=0
for rodada in range(len(folds)):
  teste = rodada
  val = 0  
  if(rodada!=9):
    val = teste + 1  
  test_data = folds[teste]
  val_data = folds[val]
  train_data = []
  for treino in range(len(folds)):
    if(treino!=teste and treino!=val):
      train_data = train_data + (folds[treino].tolist())
  train_mask = []
  test_mask = []
  val_mask = []

  for cont in range(len(random_indices)):
    if(cont in train_data):
      train_mask.append(True)
      test_mask.append(False)
      val_mask.append(False)
    else:
      train_mask.append(False)
      test_mask.append(True)
      val_mask.append(True)
  train_mask = torch.tensor(train_mask, dtype=torch.bool)
  test_mask = torch.tensor(test_mask, dtype=torch.bool)
  val_mask = torch.tensor(val_mask, dtype=torch.bool)
  data = Data(x=x, edge_index=edge_index, y=y)
  data.train_mask = train_mask
  data.test_mask = test_mask
  data.val_mask = val_mask
  

    # Create GAT model
  gat = GAT(dados.shape[1], 8, 2)
  print(gat)

# Train
  train(gat, data)

# Test
  acc,prec,rec,f1 = test(gat, data)
  print(f'\nGAT test precision: {prec*100:.2f}%\n')
  print(f'\nGAT test recall: {rec*100:.2f}%\n')
  print(f'\nGAT test f1 score: {f1*100:.2f}%\n')
  print(f'\nGAT test accuracy: {acc*100:.2f}%\n')
  soma_acc = soma_acc + acc
  soma_prec = soma_prec + prec
  soma_rec = soma_rec + rec
  soma_f1 = soma_f1 + f1


GAT(
  (gat1): GATv2Conv(224, 8, heads=8)
  (gat2): GATv2Conv(64, 2, heads=1)
)
Epoch   0 | Train Loss: 0.797 | Train Acc:  49.80% | Val Loss: 0.79 | Val Acc: 50.68%
Epoch  10 | Train Loss: 0.693 | Train Acc:  52.34% | Val Loss: 0.70 | Val Acc: 51.05%
Epoch  20 | Train Loss: 0.653 | Train Acc:  59.65% | Val Loss: 0.66 | Val Acc: 58.61%
Epoch  30 | Train Loss: 0.627 | Train Acc:  69.35% | Val Loss: 0.63 | Val Acc: 69.45%
Epoch  40 | Train Loss: 0.605 | Train Acc:  68.35% | Val Loss: 0.60 | Val Acc: 68.85%
Epoch  50 | Train Loss: 0.593 | Train Acc:  69.10% | Val Loss: 0.59 | Val Acc: 69.76%
Epoch  60 | Train Loss: 0.584 | Train Acc:  69.29% | Val Loss: 0.58 | Val Acc: 70.35%
Epoch  70 | Train Loss: 0.575 | Train Acc:  70.02% | Val Loss: 0.57 | Val Acc: 71.07%
Epoch  80 | Train Loss: 0.567 | Train Acc:  70.84% | Val Loss: 0.56 | Val Acc: 71.24%
Epoch  90 | Train Loss: 0.558 | Train Acc:  71.35% | Val Loss: 0.56 | Val Acc: 71.79%
Epoch 100 | Train Loss: 0.552 | Train Acc:  71.83% | Val Los

In [None]:
print(f'\nGAT test precision: {(soma_prec/10)*100:.2f}%\n')
print(f'\nGAT test recall: {(soma_rec/10)*100:.2f}%\n')
print(f'\nGAT test f1 score: {(soma_f1/10)*100:.2f}%\n')
print(f'\nGAT test accuracy: {(soma_acc/10)*100:.2f}%\n')


GAT test precision: 78.42%


GAT test recall: 67.71%


GAT test f1 score: 72.32%


GAT test accuracy: 74.21%



# Experimento Classificação Dataset Final de Semana

## Dataset RNP

### Carregar Vértices

In [None]:
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

In [None]:
path_dataset_metricas_rnp_transposto = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_dataset_metricas_rnp_transposto_final_semana = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_final_semana.csv'

path_dataset_rnp_3 = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_testes_2_final_semana.csv'



In [None]:
nos_retirados = [4, 14, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 52, 56, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 148, 158, 166, 176, 188, 189, 190, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215]

In [None]:
len(nos_retirados)

84

In [None]:
dados = pd.read_csv(path_dataset_metricas_rnp_transposto_final_semana, delimiter=",")
dados = dados.T
dados

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,302,303,304,305,306,307,308,309,310,311
id,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,302.0,303.0,304.0,305.0,306.0,307.0,308.0,309.0,310.0,311.0
f_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,70560.0,16820.0,43740.0,57940.0,132430.0,38210.0,18670.0,26910.0,38390.0,168540.0
f_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,68000.0,16300.0,34800.0,57900.0,90000.0,38100.0,18600.0,26600.0,38300.0,8440.0
f_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,93200.0,20500.0,124000.0,58100.0,514000.0,39000.0,18800.0,28800.0,38500.0,1535000.0
f_3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,68080.0,15280.0,34880.0,59120.0,101120.0,25300.0,50270.0,26720.0,38470.0,9210.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_6739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,14700.0,56400.0,57600.0,115000.0,23500.0,19500.0,32200.0,28100.0,8880.0
f_6740,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47600.0,18500.0,56600.0,57700.0,115000.0,23700.0,32800.0,32300.0,28300.0,9040.0
f_6741,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47570.0,14710.0,56450.0,57660.0,115000.0,23650.0,22090.0,32270.0,28190.0,10068.0
f_6742,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,14600.0,56400.0,57600.0,115000.0,23600.0,19500.0,32200.0,28100.0,8930.0


In [None]:
dados = dados.drop(columns=nos_retirados)

In [None]:
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,302,303,304,305,306,307,308,309,310,311
id,0.0,1.0,2.0,3.0,5.0,6.0,7.0,8.0,9.0,10.0,...,302.0,303.0,304.0,305.0,306.0,307.0,308.0,309.0,310.0,311.0
f_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,70560.0,16820.0,43740.0,57940.0,132430.0,38210.0,18670.0,26910.0,38390.0,168540.0
f_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,68000.0,16300.0,34800.0,57900.0,90000.0,38100.0,18600.0,26600.0,38300.0,8440.0
f_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,93200.0,20500.0,124000.0,58100.0,514000.0,39000.0,18800.0,28800.0,38500.0,1535000.0
f_3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,68080.0,15280.0,34880.0,59120.0,101120.0,25300.0,50270.0,26720.0,38470.0,9210.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_6739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,14700.0,56400.0,57600.0,115000.0,23500.0,19500.0,32200.0,28100.0,8880.0
f_6740,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47600.0,18500.0,56600.0,57700.0,115000.0,23700.0,32800.0,32300.0,28300.0,9040.0
f_6741,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47570.0,14710.0,56450.0,57660.0,115000.0,23650.0,22090.0,32270.0,28190.0,10068.0
f_6742,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,14600.0,56400.0,57600.0,115000.0,23600.0,19500.0,32200.0,28100.0,8930.0


In [None]:
df_max_scaled = dados.copy() 
for column in df_max_scaled.columns: 
    df_max_scaled[column] = df_max_scaled[column]  / df_max_scaled[column].max() 
      
display(df_max_scaled) 

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,302,303,304,305,306,307,308,309,310,311
id,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.000462,0.000393,0.000357,0.000343,0.000190,0.000319,0.000410,0.000145,0.000108,0.000203
f_0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.107890,0.021816,0.051398,0.065174,0.082204,0.039719,0.024827,0.012664,0.013409,0.109798
f_1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.103976,0.021141,0.040893,0.065129,0.055866,0.039605,0.024734,0.012518,0.013378,0.005498
f_2,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.142508,0.026589,0.145711,0.065354,0.319056,0.040541,0.025000,0.013553,0.013447,1.000000
f_3,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.104098,0.019818,0.040987,0.066502,0.062768,0.026299,0.066848,0.012574,0.013437,0.006000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_6739,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072630,0.019066,0.066275,0.064792,0.071384,0.024428,0.025931,0.015153,0.009815,0.005785
f_6740,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072783,0.023995,0.066510,0.064904,0.071384,0.024636,0.043617,0.015200,0.009885,0.005889
f_6741,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072737,0.019079,0.066334,0.064859,0.071384,0.024584,0.029375,0.015186,0.009846,0.006559
f_6742,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072630,0.018936,0.066275,0.064792,0.071384,0.024532,0.025931,0.015153,0.009815,0.005818


In [None]:
dados = df_max_scaled

In [None]:
dados = dados.fillna(0)
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,302,303,304,305,306,307,308,309,310,311
id,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.000462,0.000393,0.000357,0.000343,0.000190,0.000319,0.000410,0.000145,0.000108,0.000203
f_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.107890,0.021816,0.051398,0.065174,0.082204,0.039719,0.024827,0.012664,0.013409,0.109798
f_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.103976,0.021141,0.040893,0.065129,0.055866,0.039605,0.024734,0.012518,0.013378,0.005498
f_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.142508,0.026589,0.145711,0.065354,0.319056,0.040541,0.025000,0.013553,0.013447,1.000000
f_3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.104098,0.019818,0.040987,0.066502,0.062768,0.026299,0.066848,0.012574,0.013437,0.006000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_6739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072630,0.019066,0.066275,0.064792,0.071384,0.024428,0.025931,0.015153,0.009815,0.005785
f_6740,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072783,0.023995,0.066510,0.064904,0.071384,0.024636,0.043617,0.015200,0.009885,0.005889
f_6741,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072737,0.019079,0.066334,0.064859,0.071384,0.024584,0.029375,0.015186,0.009846,0.006559
f_6742,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072630,0.018936,0.066275,0.064792,0.071384,0.024532,0.025931,0.015153,0.009815,0.005818


In [None]:
média_RTT = dados[311].median()
print(média_RTT)

0.005563517915309446


In [None]:
def classificar_alvo(x):
  if x < média_RTT:
    return 0
  return 1

### Obter Target

In [None]:
target =dados[311].apply(classificar_alvo)

In [None]:
target

id        0
f_0       1
f_1       0
f_2       1
f_3       1
         ..
f_6739    1
f_6740    1
f_6741    1
f_6742    1
f_6743    1
Name: 311, Length: 6745, dtype: int64

In [None]:
target.value_counts()

1    3399
0    3346
Name: 311, dtype: int64

In [None]:
dados.to_csv(path_dataset_rnp_3,index='False')

### Constantes

In [None]:
path_adj = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/grafo.adj'
path_gexf = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/grafo_causalidade_rnp_1.gexf'
path_nodes = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto.csv'
path_dataset_rnp_final = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_dataset_metricas_rnp_transposto = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_gat_model = '/content/drive/MyDrive/Seagate/RNP/gat_model_1.csv'

### Carregar Arestas

In [None]:
X = nx.read_gexf(path_gexf)

In [None]:
n_vertices = X.number_of_nodes()
n_arestas = X.number_of_edges()
print(n_arestas)
print(n_vertices)

9085
225


In [None]:
df_edge = nx.to_pandas_edgelist(X)

In [None]:
df_edge

Unnamed: 0,source,target,id
0,0,1,0
1,0,3,1
2,0,4,2
3,0,5,3
4,0,6,4
...,...,...,...
9080,224,88,9080
9081,224,96,9081
9082,224,223,9082
9083,224,78,9083


In [None]:
colunas = list(df_edge.columns)

In [None]:
convert_dict = {}
for coluna in colunas:
  convert_dict[coluna] = float

In [None]:
df_edge = df_edge.astype(convert_dict)  
print(df_edge.dtypes)

source    float64
target    float64
id        float64
dtype: object


In [None]:
df_edge

Unnamed: 0,source,target,id
0,0.0,1.0,0.0
1,0.0,3.0,1.0
2,0.0,4.0,2.0
3,0.0,5.0,3.0
4,0.0,6.0,4.0
...,...,...,...
9080,224.0,88.0,9080.0
9081,224.0,96.0,9081.0
9082,224.0,223.0,9082.0
9083,224.0,78.0,9083.0


In [None]:
df_edge = df_edge.drop(columns=['id'])
df_edge

Unnamed: 0,source,target
0,0.0,1.0
1,0.0,3.0
2,0.0,4.0
3,0.0,5.0
4,0.0,6.0
...,...,...
9080,224.0,88.0
9081,224.0,96.0
9082,224.0,223.0
9083,224.0,78.0


### Transformar em Tensores

In [None]:
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,302,303,304,305,306,307,308,309,310,311
id,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.000462,0.000393,0.000357,0.000343,0.000190,0.000319,0.000410,0.000145,0.000108,0.000203
f_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.107890,0.021816,0.051398,0.065174,0.082204,0.039719,0.024827,0.012664,0.013409,0.109798
f_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.103976,0.021141,0.040893,0.065129,0.055866,0.039605,0.024734,0.012518,0.013378,0.005498
f_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.142508,0.026589,0.145711,0.065354,0.319056,0.040541,0.025000,0.013553,0.013447,1.000000
f_3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.104098,0.019818,0.040987,0.066502,0.062768,0.026299,0.066848,0.012574,0.013437,0.006000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_6739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072630,0.019066,0.066275,0.064792,0.071384,0.024428,0.025931,0.015153,0.009815,0.005785
f_6740,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072783,0.023995,0.066510,0.064904,0.071384,0.024636,0.043617,0.015200,0.009885,0.005889
f_6741,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072737,0.019079,0.066334,0.064859,0.071384,0.024584,0.029375,0.015186,0.009846,0.006559
f_6742,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.072630,0.018936,0.066275,0.064792,0.071384,0.024532,0.025931,0.015153,0.009815,0.005818


In [None]:
dados = dados.drop(311,axis='columns')
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,301,302,303,304,305,306,307,308,309,310
id,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.000258,0.000462,0.000393,0.000357,0.000343,0.000190,0.000319,0.000410,0.000145,0.000108
f_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.030060,0.107890,0.021816,0.051398,0.065174,0.082204,0.039719,0.024827,0.012664,0.013409
f_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029452,0.103976,0.021141,0.040893,0.065129,0.055866,0.039605,0.024734,0.012518,0.013378
f_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.034760,0.142508,0.026589,0.145711,0.065354,0.319056,0.040541,0.025000,0.013553,0.013447
f_3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029538,0.104098,0.019818,0.040987,0.066502,0.062768,0.026299,0.066848,0.012574,0.013437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_6739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029195,0.072630,0.019066,0.066275,0.064792,0.071384,0.024428,0.025931,0.015153,0.009815
f_6740,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029281,0.072783,0.023995,0.066510,0.064904,0.071384,0.024636,0.043617,0.015200,0.009885
f_6741,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029555,0.072737,0.019079,0.066334,0.064859,0.071384,0.024584,0.029375,0.015186,0.009846
f_6742,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029538,0.072630,0.018936,0.066275,0.064792,0.071384,0.024532,0.025931,0.015153,0.009815


In [None]:
x = torch.tensor(dados.values, dtype=torch.float)

In [None]:
edge_index = torch.tensor(df_edge.values, dtype=torch.long)

In [None]:
y = target.to_numpy()

In [None]:
y = torch.tensor(target.values, dtype=torch.long)

## Criar Data

In [None]:
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,301,302,303,304,305,306,307,308,309,310
id,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.000258,0.000462,0.000393,0.000357,0.000343,0.000190,0.000319,0.000410,0.000145,0.000108
f_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.030060,0.107890,0.021816,0.051398,0.065174,0.082204,0.039719,0.024827,0.012664,0.013409
f_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029452,0.103976,0.021141,0.040893,0.065129,0.055866,0.039605,0.024734,0.012518,0.013378
f_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.034760,0.142508,0.026589,0.145711,0.065354,0.319056,0.040541,0.025000,0.013553,0.013447
f_3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029538,0.104098,0.019818,0.040987,0.066502,0.062768,0.026299,0.066848,0.012574,0.013437
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_6739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029195,0.072630,0.019066,0.066275,0.064792,0.071384,0.024428,0.025931,0.015153,0.009815
f_6740,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029281,0.072783,0.023995,0.066510,0.064904,0.071384,0.024636,0.043617,0.015200,0.009885
f_6741,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029555,0.072737,0.019079,0.066334,0.064859,0.071384,0.024584,0.029375,0.015186,0.009846
f_6742,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.029538,0.072630,0.018936,0.066275,0.064792,0.071384,0.024532,0.025931,0.015153,0.009815


### Validação Cruzada

#### K-fold

In [None]:
def kfold(k,random_indices):  
  print(random_indices)
  intervalo = len(random_indices)/k
  print('Intervalo = ',intervalo)
  folds = []
  for fold in range(k):
    if((fold+1)==k):
      dados_fold = random_indices[int(fold*intervalo):]
      folds.append(dados_fold)    
    else:
      dados_fold = random_indices[int(fold*intervalo):(int((fold+1)*intervalo))]
      folds.append(dados_fold)    
  return folds

In [None]:
random_indices = np.random.permutation(range(dados.shape[0]))

In [None]:
folds = kfold(10,random_indices)

[1397 1450 4994 ... 3692 1795 4702]
Intervalo =  674.5


#### CrossVal

In [None]:
from numpy.core.multiarray import concatenate
soma_acc=0
soma_prec=0
soma_rec=0
soma_f1=0
for rodada in range(len(folds)):
  teste = rodada
  val = 0  
  if(rodada!=9):
    val = teste + 1  
  test_data = folds[teste]
  val_data = folds[val]
  train_data = []
  for treino in range(len(folds)):
    if(treino!=teste and treino!=val):
      train_data = train_data + (folds[treino].tolist())
  train_mask = []
  test_mask = []
  val_mask = []

  for cont in range(len(random_indices)):
    if(cont in train_data):
      train_mask.append(True)
      test_mask.append(False)
      val_mask.append(False)
    else:
      train_mask.append(False)
      test_mask.append(True)
      val_mask.append(True)
  train_mask = torch.tensor(train_mask, dtype=torch.bool)
  test_mask = torch.tensor(test_mask, dtype=torch.bool)
  val_mask = torch.tensor(val_mask, dtype=torch.bool)
  data = Data(x=x, edge_index=edge_index, y=y)
  data.train_mask = train_mask
  data.test_mask = test_mask
  data.val_mask = val_mask
  

    # Create GAT model
  gat = GAT(dados.shape[1], 8, 2)
  print(gat)

# Train
  train(gat, data)

# Test
  acc,prec,rec,f1 = test(gat, data)
  print(f'\nGAT test precision: {prec*100:.2f}%\n')
  print(f'\nGAT test recall: {rec*100:.2f}%\n')
  print(f'\nGAT test f1 score: {f1*100:.2f}%\n')
  print(f'\nGAT test accuracy: {acc*100:.2f}%\n')
  soma_acc = soma_acc + acc
  soma_prec = soma_prec + prec
  soma_rec = soma_rec + rec
  soma_f1 = soma_f1 + f1


GAT(
  (gat1): GATv2Conv(227, 8, heads=8)
  (gat2): GATv2Conv(64, 2, heads=1)
)
Epoch   0 | Train Loss: 0.730 | Train Acc:  49.05% | Val Loss: 0.73 | Val Acc: 48.78%
Epoch  10 | Train Loss: 0.693 | Train Acc:  53.28% | Val Loss: 0.69 | Val Acc: 54.11%
Epoch  20 | Train Loss: 0.675 | Train Acc:  58.65% | Val Loss: 0.67 | Val Acc: 59.67%
Epoch  30 | Train Loss: 0.658 | Train Acc:  63.25% | Val Loss: 0.65 | Val Acc: 64.05%
Epoch  40 | Train Loss: 0.641 | Train Acc:  68.12% | Val Loss: 0.63 | Val Acc: 70.72%
Epoch  50 | Train Loss: 0.621 | Train Acc:  68.68% | Val Loss: 0.61 | Val Acc: 72.72%
Epoch  60 | Train Loss: 0.605 | Train Acc:  70.07% | Val Loss: 0.59 | Val Acc: 72.57%
Epoch  70 | Train Loss: 0.589 | Train Acc:  70.33% | Val Loss: 0.57 | Val Acc: 73.09%
Epoch  80 | Train Loss: 0.581 | Train Acc:  70.29% | Val Loss: 0.56 | Val Acc: 73.98%
Epoch  90 | Train Loss: 0.572 | Train Acc:  71.15% | Val Loss: 0.55 | Val Acc: 73.61%
Epoch 100 | Train Loss: 0.569 | Train Acc:  71.26% | Val Los

In [None]:
print(f'\nGAT test precision: {(soma_prec/10)*100:.2f}%\n')
print(f'\nGAT test recall: {(soma_rec/10)*100:.2f}%\n')
print(f'\nGAT test f1 score: {(soma_f1/10)*100:.2f}%\n')
print(f'\nGAT test accuracy: {(soma_acc/10)*100:.2f}%\n')


GAT test precision: 76.18%


GAT test recall: 67.81%


GAT test f1 score: 71.32%


GAT test accuracy: 72.71%



# Experimento Classificação Dataset Dias de Semana

## Dataset RNP

### Carregar Vértices

In [None]:
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

In [None]:
path_dataset_metricas_rnp_transposto = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_dataset_metricas_rnp_transposto_final_semana = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_final_semana.csv'
path_dataset_metricas_rnp_transposto_dia_semana = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_dia_semana.csv'
path_dataset_rnp_3 = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_testes_2_dias_semana.csv'

In [None]:
nos_retirados = [4, 14, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 52, 56, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 148, 158, 166, 176, 188, 189, 190, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215]

In [None]:
len(nos_retirados)

84

In [None]:
dados = pd.read_csv(path_dataset_metricas_rnp_transposto_dia_semana, delimiter=",")
dados = dados.T
dados

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,302,303,304,305,306,307,308,309,310,311
id,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,302.0,303.000000,304.0,305.0,306.000000,307.0,308.0,309.00000,310.000000,311.0
f_0,845572.0,820199.0,7840729.0,866019.0,0.0,858103.0,884162.0,839173.0,4102432.0,869847.0,...,82010.0,16320.000000,51430.0,86720.0,101000.000000,105500.0,66540.0,43470.00000,28140.000000,112000.0
f_1,847565.0,787375.0,3424706.0,864759.0,0.0,878133.0,890030.0,869218.0,3399700.0,860043.0,...,81900.0,16300.000000,51400.0,86200.0,101000.000000,105000.0,59800.0,43400.00000,28100.000000,112000.0
f_2,803419.0,831836.0,3847770.0,861982.0,0.0,694367.0,904923.0,852103.0,3679127.0,817417.0,...,82200.0,16400.000000,51500.0,89800.0,101000.000000,106000.0,127000.0,43600.00000,28400.000000,112000.0
f_3,355415.0,808513.0,7283799.0,854601.0,0.0,873149.0,932813.0,894016.0,1751930.0,871788.0,...,82030.0,16290.000000,51440.0,86290.0,111300.000000,105100.0,59820.0,44430.00000,29670.000000,112000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_17572,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,40351.896512,37500.0,49200.0,101313.714174,23600.0,19600.0,41237.69003,53190.357948,8830.0
f_17573,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,40351.896512,37700.0,49400.0,101313.714174,24800.0,19800.0,41237.69003,53190.357948,9080.0
f_17574,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47520.0,40351.896512,37590.0,49863.0,101313.714174,23620.0,19630.0,41237.69003,53190.357948,52830.0
f_17575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47400.0,40351.896512,37500.0,49300.0,101313.714174,23500.0,19600.0,41237.69003,53190.357948,46200.0


In [None]:
dados = dados.drop(columns=nos_retirados)

In [None]:
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,302,303,304,305,306,307,308,309,310,311
id,0.0,1.0,2.0,3.0,5.0,6.0,7.0,8.0,9.0,10.0,...,302.0,303.000000,304.0,305.0,306.000000,307.0,308.0,309.00000,310.000000,311.0
f_0,845572.0,820199.0,7840729.0,866019.0,858103.0,884162.0,839173.0,4102432.0,869847.0,851390.0,...,82010.0,16320.000000,51430.0,86720.0,101000.000000,105500.0,66540.0,43470.00000,28140.000000,112000.0
f_1,847565.0,787375.0,3424706.0,864759.0,878133.0,890030.0,869218.0,3399700.0,860043.0,871157.0,...,81900.0,16300.000000,51400.0,86200.0,101000.000000,105000.0,59800.0,43400.00000,28100.000000,112000.0
f_2,803419.0,831836.0,3847770.0,861982.0,694367.0,904923.0,852103.0,3679127.0,817417.0,802633.0,...,82200.0,16400.000000,51500.0,89800.0,101000.000000,106000.0,127000.0,43600.00000,28400.000000,112000.0
f_3,355415.0,808513.0,7283799.0,854601.0,873149.0,932813.0,894016.0,1751930.0,871788.0,867068.0,...,82030.0,16290.000000,51440.0,86290.0,111300.000000,105100.0,59820.0,44430.00000,29670.000000,112000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_17572,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,40351.896512,37500.0,49200.0,101313.714174,23600.0,19600.0,41237.69003,53190.357948,8830.0
f_17573,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,40351.896512,37700.0,49400.0,101313.714174,24800.0,19800.0,41237.69003,53190.357948,9080.0
f_17574,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47520.0,40351.896512,37590.0,49863.0,101313.714174,23620.0,19630.0,41237.69003,53190.357948,52830.0
f_17575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47400.0,40351.896512,37500.0,49300.0,101313.714174,23500.0,19600.0,41237.69003,53190.357948,46200.0


In [None]:
df_max_scaled = dados.copy() 
for column in df_max_scaled.columns: 
    df_max_scaled[column] = df_max_scaled[column]  / df_max_scaled[column].max() 
      
display(df_max_scaled) 

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,302,303,304,305,306,307,308,309,310,311
id,0.000000,0.000001,2.413240e-07,0.000003,0.000006,0.000006,0.000008,0.000002,0.000010,0.000011,...,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000167,0.000096,0.000222
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.974980,0.929866,0.938655,0.807083,0.957579,0.944346,...,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.023485,0.008723,0.080000
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.997738,0.936037,0.972262,0.668833,0.946787,0.966271,...,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.023447,0.008710,0.080000
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.788942,0.951700,0.953118,0.723805,0.899861,0.890265,...,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.023555,0.008803,0.080000
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.992075,0.981032,1.000000,0.344662,0.959716,0.961735,...,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.024003,0.009197,0.080000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_17572,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.022279,0.016488,0.006307
f_17573,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.022279,0.016488,0.006486
f_17574,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.022279,0.016488,0.037736
f_17575,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.022279,0.016488,0.033000


In [None]:
dados = df_max_scaled

In [None]:
dados = dados.fillna(0)
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,302,303,304,305,306,307,308,309,310,311
id,0.000000,0.000001,2.413240e-07,0.000003,0.000006,0.000006,0.000008,0.000002,0.000010,0.000011,...,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000167,0.000096,0.000222
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.974980,0.929866,0.938655,0.807083,0.957579,0.944346,...,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.023485,0.008723,0.080000
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.997738,0.936037,0.972262,0.668833,0.946787,0.966271,...,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.023447,0.008710,0.080000
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.788942,0.951700,0.953118,0.723805,0.899861,0.890265,...,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.023555,0.008803,0.080000
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.992075,0.981032,1.000000,0.344662,0.959716,0.961735,...,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.024003,0.009197,0.080000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_17572,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.022279,0.016488,0.006307
f_17573,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.022279,0.016488,0.006486
f_17574,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.022279,0.016488,0.037736
f_17575,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.022279,0.016488,0.033000


In [None]:
média_RTT = dados[311].median()
print(média_RTT)

0.012785714285714286


In [None]:
def classificar_alvo(x):
  if x < média_RTT:
    return 0
  return 1

### Obter Target

In [None]:
target =dados[311].apply(classificar_alvo)

In [None]:
target

id         0
f_0        1
f_1        1
f_2        1
f_3        1
          ..
f_17572    0
f_17573    0
f_17574    1
f_17575    1
f_17576    1
Name: 311, Length: 17578, dtype: int64

In [None]:
target.value_counts()

1    8808
0    8770
Name: 311, dtype: int64

### Constantes

In [None]:
path_adj = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/grafo.adj'
path_gexf = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/grafo_causalidade_rnp_1.gexf'
path_nodes = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto.csv'
path_dataset_rnp_final = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_dataset_metricas_rnp_transposto = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_gat_model = '/content/drive/MyDrive/Seagate/RNP/gat_model_1.csv'

### Carregar Arestas

In [None]:
X = nx.read_gexf(path_gexf)

In [None]:
n_vertices = X.number_of_nodes()
n_arestas = X.number_of_edges()
print(n_arestas)
print(n_vertices)

9085
225


In [None]:
df_edge = nx.to_pandas_edgelist(X)

In [None]:
df_edge

Unnamed: 0,source,target,id
0,0,1,0
1,0,3,1
2,0,4,2
3,0,5,3
4,0,6,4
...,...,...,...
9080,224,88,9080
9081,224,96,9081
9082,224,223,9082
9083,224,78,9083


In [None]:
colunas = list(df_edge.columns)

In [None]:
convert_dict = {}
for coluna in colunas:
  convert_dict[coluna] = float

In [None]:
df_edge = df_edge.astype(convert_dict)  
print(df_edge.dtypes)

source    float64
target    float64
id        float64
dtype: object


In [None]:
df_edge

Unnamed: 0,source,target,id
0,0.0,1.0,0.0
1,0.0,3.0,1.0
2,0.0,4.0,2.0
3,0.0,5.0,3.0
4,0.0,6.0,4.0
...,...,...,...
9080,224.0,88.0,9080.0
9081,224.0,96.0,9081.0
9082,224.0,223.0,9082.0
9083,224.0,78.0,9083.0


In [None]:
df_edge = df_edge.drop(columns=['id'])
df_edge

Unnamed: 0,source,target
0,0.0,1.0
1,0.0,3.0
2,0.0,4.0
3,0.0,5.0
4,0.0,6.0
...,...,...
9080,224.0,88.0
9081,224.0,96.0
9082,224.0,223.0
9083,224.0,78.0


In [None]:
df_edge = df_edge.T
df_edge

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084
source,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0,224.0
target,1.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,...,67.0,221.0,217.0,104.0,212.0,88.0,96.0,223.0,78.0,216.0


### Transformar em Tensores

In [None]:
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,302,303,304,305,306,307,308,309,310,311
id,0.000000,0.000001,2.413240e-07,0.000003,0.000006,0.000006,0.000008,0.000002,0.000010,0.000011,...,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000167,0.000096,0.000222
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.974980,0.929866,0.938655,0.807083,0.957579,0.944346,...,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.023485,0.008723,0.080000
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.997738,0.936037,0.972262,0.668833,0.946787,0.966271,...,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.023447,0.008710,0.080000
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.788942,0.951700,0.953118,0.723805,0.899861,0.890265,...,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.023555,0.008803,0.080000
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.992075,0.981032,1.000000,0.344662,0.959716,0.961735,...,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.024003,0.009197,0.080000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_17572,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.022279,0.016488,0.006307
f_17573,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.022279,0.016488,0.006486
f_17574,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.022279,0.016488,0.037736
f_17575,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.022279,0.016488,0.033000


In [None]:
dados = dados.drop(311,axis='columns')
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,301,302,303,304,305,306,307,308,309,310
id,0.000000,0.000001,2.413240e-07,0.000003,0.000006,0.000006,0.000008,0.000002,0.000010,0.000011,...,0.000190,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000167,0.000096
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.974980,0.929866,0.938655,0.807083,0.957579,0.944346,...,0.032353,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.023485,0.008723
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.997738,0.936037,0.972262,0.668833,0.946787,0.966271,...,0.032321,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.023447,0.008710
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.788942,0.951700,0.953118,0.723805,0.899861,0.890265,...,0.032448,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.023555,0.008803
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.992075,0.981032,1.000000,0.344662,0.959716,0.961735,...,0.032366,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.024003,0.009197
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_17572,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.023340,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.022279,0.016488
f_17573,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.023466,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.022279,0.016488
f_17574,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.022707,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.022279,0.016488
f_17575,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.021505,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.022279,0.016488


In [None]:
x = torch.tensor(dados.values, dtype=torch.float)

In [None]:
edge_index = torch.tensor(df_edge.values, dtype=torch.long)

In [None]:
y = target.to_numpy()

In [None]:
y = torch.tensor(target.values, dtype=torch.long)

## Criar Data

In [None]:
dados

Unnamed: 0,0,1,2,3,5,6,7,8,9,10,...,301,302,303,304,305,306,307,308,309,310
id,0.000000,0.000001,2.413240e-07,0.000003,0.000006,0.000006,0.000008,0.000002,0.000010,0.000011,...,0.000190,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000167,0.000096
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.974980,0.929866,0.938655,0.807083,0.957579,0.944346,...,0.032353,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.023485,0.008723
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.997738,0.936037,0.972262,0.668833,0.946787,0.966271,...,0.032321,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.023447,0.008710
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.788942,0.951700,0.953118,0.723805,0.899861,0.890265,...,0.032448,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.023555,0.008803
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.992075,0.981032,1.000000,0.344662,0.959716,0.961735,...,0.032366,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.024003,0.009197
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_17572,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.023340,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.022279,0.016488
f_17573,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.023466,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.022279,0.016488
f_17574,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.022707,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.022279,0.016488
f_17575,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.021505,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.022279,0.016488


### Validação Cruzada

#### K-fold

In [None]:
def kfold(k,random_indices):  
  print(random_indices)
  intervalo = len(random_indices)/k
  print('Intervalo = ',intervalo)
  folds = []
  for fold in range(k):
    if((fold+1)==k):
      dados_fold = random_indices[int(fold*intervalo):]
      folds.append(dados_fold)    
    else:
      dados_fold = random_indices[int(fold*intervalo):(int((fold+1)*intervalo))]
      folds.append(dados_fold)    
  return folds

In [None]:
random_indices = np.random.permutation(range(dados.shape[0]))

In [None]:
folds = kfold(10,random_indices)

[ 5792 11355 17487 ... 17571  5010 17151]
Intervalo =  1757.8


#### CrossVal

In [None]:
from numpy.core.multiarray import concatenate
soma_acc=0
soma_prec=0
soma_rec=0
soma_f1=0
for rodada in range(len(folds)):
  teste = rodada
  val = 0  
  if(rodada!=9):
    val = teste + 1  
  test_data = folds[teste]
  val_data = folds[val]
  train_data = []
  for treino in range(len(folds)):
    if(treino!=teste and treino!=val):
      train_data = train_data + (folds[treino].tolist())
  train_mask = []
  test_mask = []
  val_mask = []

  for cont in range(len(random_indices)):
    if(cont in train_data):
      train_mask.append(True)
      test_mask.append(False)
      val_mask.append(False)
    else:
      train_mask.append(False)
      test_mask.append(True)
      val_mask.append(True)
  train_mask = torch.tensor(train_mask, dtype=torch.bool)
  test_mask = torch.tensor(test_mask, dtype=torch.bool)
  val_mask = torch.tensor(val_mask, dtype=torch.bool)
  data = Data(x=x, edge_index=edge_index, y=y)
  data.train_mask = train_mask
  data.test_mask = test_mask
  data.val_mask = val_mask
  

    # Create GAT model
  gat = GAT(dados.shape[1], 8, 2)
  print(gat)

# Train
  train(gat, data)

# Test
  acc,prec,rec,f1 = test(gat, data)
  print(f'\nGAT test precision: {prec*100:.2f}%\n')
  print(f'\nGAT test recall: {rec*100:.2f}%\n')
  print(f'\nGAT test f1 score: {f1*100:.2f}%\n')
  print(f'\nGAT test accuracy: {acc*100:.2f}%\n')
  soma_acc = soma_acc + acc
  soma_prec = soma_prec + prec
  soma_rec = soma_rec + rec
  soma_f1 = soma_f1 + f1


GAT(
  (gat1): GATv2Conv(227, 8, heads=8)
  (gat2): GATv2Conv(64, 2, heads=1)
)
Epoch   0 | Train Loss: 0.732 | Train Acc:  51.72% | Val Loss: 0.74 | Val Acc: 50.07%
Epoch  10 | Train Loss: 0.674 | Train Acc:  53.07% | Val Loss: 0.67 | Val Acc: 54.37%
Epoch  20 | Train Loss: 0.644 | Train Acc:  63.27% | Val Loss: 0.65 | Val Acc: 63.67%
Epoch  30 | Train Loss: 0.619 | Train Acc:  66.60% | Val Loss: 0.63 | Val Acc: 65.86%
Epoch  40 | Train Loss: 0.601 | Train Acc:  66.78% | Val Loss: 0.61 | Val Acc: 65.55%
Epoch  50 | Train Loss: 0.586 | Train Acc:  68.08% | Val Loss: 0.60 | Val Acc: 66.77%
Epoch  60 | Train Loss: 0.570 | Train Acc:  70.17% | Val Loss: 0.58 | Val Acc: 68.48%
Epoch  70 | Train Loss: 0.558 | Train Acc:  71.59% | Val Loss: 0.57 | Val Acc: 70.75%
Epoch  80 | Train Loss: 0.549 | Train Acc:  72.42% | Val Loss: 0.56 | Val Acc: 72.12%
Epoch  90 | Train Loss: 0.539 | Train Acc:  73.26% | Val Loss: 0.55 | Val Acc: 71.95%
Epoch 100 | Train Loss: 0.532 | Train Acc:  73.73% | Val Los

In [None]:
print(f'\nGAT test precision: {(soma_prec/10)*100:.2f}%\n')
print(f'\nGAT test recall: {(soma_rec/10)*100:.2f}%\n')
print(f'\nGAT test f1 score: {(soma_f1/10)*100:.2f}%\n')
print(f'\nGAT test accuracy: {(soma_acc/10)*100:.2f}%\n')


GAT test precision: 81.72%


GAT test recall: 68.21%


GAT test f1 score: 74.07%


GAT test accuracy: 76.23%



# Experimento Classificação Dataset Sem Causalidade

## Dataset RNP

### Carregar Vértices

In [None]:
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

In [None]:
path_dataset_metricas_rnp_transposto = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto.csv'

path_dataset_metricas_rnp_transposto_final_semana = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_final_semana.csv'
path_dataset_metricas_rnp_transposto_dia_semana = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_dia_semana.csv'
path_dataset_rnp_3 = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_testes_2_sem_causalidade.csv'

In [None]:
dados = pd.read_csv(path_dataset_metricas_rnp_transposto, delimiter=",")
dados = dados.T
dados

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,302,303,304,305,306,307,308,309,310,311
id,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,302.0,303.000000,304.0,305.0,306.000000,307.0,308.0,309.00000,310.000000,311.0
f_0,845572.0,820199.0,7840729.0,866019.0,0.0,858103.0,884162.0,839173.0,4102432.0,869847.0,...,82010.0,16320.000000,51430.0,86720.0,101000.000000,105500.0,66540.0,43470.00000,28140.000000,112000.0
f_1,847565.0,787375.0,3424706.0,864759.0,0.0,878133.0,890030.0,869218.0,3399700.0,860043.0,...,81900.0,16300.000000,51400.0,86200.0,101000.000000,105000.0,59800.0,43400.00000,28100.000000,112000.0
f_2,803419.0,831836.0,3847770.0,861982.0,0.0,694367.0,904923.0,852103.0,3679127.0,817417.0,...,82200.0,16400.000000,51500.0,89800.0,101000.000000,106000.0,127000.0,43600.00000,28400.000000,112000.0
f_3,355415.0,808513.0,7283799.0,854601.0,0.0,873149.0,932813.0,894016.0,1751930.0,871788.0,...,82030.0,16290.000000,51440.0,86290.0,111300.000000,105100.0,59820.0,44430.00000,29670.000000,112000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,40351.896512,37500.0,49200.0,101313.714174,23600.0,19600.0,41237.69003,53190.357948,8830.0
f_24317,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47500.0,40351.896512,37700.0,49400.0,101313.714174,24800.0,19800.0,41237.69003,53190.357948,9080.0
f_24318,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47520.0,40351.896512,37590.0,49863.0,101313.714174,23620.0,19630.0,41237.69003,53190.357948,52830.0
f_24319,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,47400.0,40351.896512,37500.0,49300.0,101313.714174,23500.0,19600.0,41237.69003,53190.357948,46200.0


In [None]:
df_max_scaled = dados.copy() 
for column in df_max_scaled.columns: 
    df_max_scaled[column] = df_max_scaled[column]  / df_max_scaled[column].max() 
      
display(df_max_scaled) 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,302,303,304,305,306,307,308,309,310,311
id,0.000000,0.000001,2.413240e-07,0.000003,1.0,0.000006,0.000006,0.000008,0.000002,0.000010,...,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000145,0.000096,0.000203
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.0,0.974980,0.929866,0.938655,0.807083,0.957579,...,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.020456,0.008723,0.072964
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.0,0.997738,0.936037,0.972262,0.668833,0.946787,...,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.020424,0.008710,0.072964
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.0,0.788942,0.951700,0.953118,0.723805,0.899861,...,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.020518,0.008803,0.072964
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.0,0.992075,0.981032,1.000000,0.344662,0.959716,...,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.020908,0.009197,0.072964
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.019406,0.016488,0.005752
f_24317,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.019406,0.016488,0.005915
f_24318,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.019406,0.016488,0.034417
f_24319,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.019406,0.016488,0.030098


In [None]:
dados = df_max_scaled

In [None]:
dados = dados.fillna(0)
dados

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,302,303,304,305,306,307,308,309,310,311
id,0.000000,0.000001,2.413240e-07,0.000003,1.0,0.000006,0.000006,0.000008,0.000002,0.000010,...,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000145,0.000096,0.000203
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.0,0.974980,0.929866,0.938655,0.807083,0.957579,...,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.020456,0.008723,0.072964
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.0,0.997738,0.936037,0.972262,0.668833,0.946787,...,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.020424,0.008710,0.072964
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.0,0.788942,0.951700,0.953118,0.723805,0.899861,...,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.020518,0.008803,0.072964
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.0,0.992075,0.981032,1.000000,0.344662,0.959716,...,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.020908,0.009197,0.072964
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.019406,0.016488,0.005752
f_24317,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.019406,0.016488,0.005915
f_24318,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.019406,0.016488,0.034417
f_24319,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.019406,0.016488,0.030098


In [None]:
média_RTT = dados[311].median()
print(média_RTT)

0.005921824104234527


In [None]:
def classificar_alvo(x):
  if x < média_RTT:
    return 0
  return 1

### Obter Target

In [None]:
target =dados[311].apply(classificar_alvo)

In [None]:
target

id         0
f_0        1
f_1        1
f_2        1
f_3        1
          ..
f_24316    0
f_24317    0
f_24318    1
f_24319    1
f_24320    1
Name: 311, Length: 24322, dtype: int64

In [None]:
target.value_counts()

1    12197
0    12125
Name: 311, dtype: int64

### Constantes

In [None]:
path_adj = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/grafo.adj'
path_gexf = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/grafo_causalidade_rnp_1.gexf'
path_nodes = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto.csv'
path_dataset_rnp_final = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_dataset_metricas_rnp_transposto = '/content/drive/MyDrive/Seagate/RNP/dataset_set_2022/dataset/dataset_rnp_transposto_1.csv'
path_gat_model = '/content/drive/MyDrive/Seagate/RNP/gat_model_1.csv'

### Carregar Arestas

In [None]:
X = nx.read_gexf(path_gexf)

In [None]:
n_vertices = X.number_of_nodes()
n_arestas = X.number_of_edges()
print(n_arestas)
print(n_vertices)

9085
225


In [None]:
df_edge = nx.to_pandas_edgelist(X)

In [None]:
df_edge

Unnamed: 0,source,target,id
0,0,1,0
1,0,3,1
2,0,4,2
3,0,5,3
4,0,6,4
...,...,...,...
9080,224,88,9080
9081,224,96,9081
9082,224,223,9082
9083,224,78,9083


In [None]:
colunas = list(df_edge.columns)

In [None]:
convert_dict = {}
for coluna in colunas:
  convert_dict[coluna] = float

In [None]:
df_edge = df_edge.astype(convert_dict)  
print(df_edge.dtypes)

source    float64
target    float64
id        float64
dtype: object


In [None]:
df_edge

Unnamed: 0,source,target,id
0,0.0,1.0,0.0
1,0.0,3.0,1.0
2,0.0,4.0,2.0
3,0.0,5.0,3.0
4,0.0,6.0,4.0
...,...,...,...
9080,224.0,88.0,9080.0
9081,224.0,96.0,9081.0
9082,224.0,223.0,9082.0
9083,224.0,78.0,9083.0


In [None]:
df_edge = df_edge.drop(columns=['id'])
df_edge

Unnamed: 0,source,target
0,0.0,1.0
1,0.0,3.0
2,0.0,4.0
3,0.0,5.0
4,0.0,6.0
...,...,...
9080,224.0,88.0
9081,224.0,96.0
9082,224.0,223.0
9083,224.0,78.0


In [None]:
for indice in range(9083):
  if(indice >= 2):
    df_edge = df_edge.drop(indice)

In [None]:
df_edge = df_edge.drop(9083)
df_edge = df_edge.drop(9084)

In [None]:
df_edge

Unnamed: 0,source,target
0,0.0,1.0
1,0.0,3.0


In [None]:
df_edge = df_edge.T
df_edge

Unnamed: 0,0,1
source,0.0,0.0
target,1.0,3.0


In [None]:
df_edge.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, source to target
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       2 non-null      float64
 1   1       2 non-null      float64
dtypes: float64(2)
memory usage: 156.0+ bytes


In [None]:
type(df_edge.values)

numpy.ndarray

### Transformar em Tensores

In [None]:
dados

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,302,303,304,305,306,307,308,309,310,311
id,0.000000,0.000001,2.413240e-07,0.000003,1.0,0.000006,0.000006,0.000008,0.000002,0.000010,...,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000145,0.000096,0.000203
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.0,0.974980,0.929866,0.938655,0.807083,0.957579,...,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.020456,0.008723,0.072964
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.0,0.997738,0.936037,0.972262,0.668833,0.946787,...,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.020424,0.008710,0.072964
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.0,0.788942,0.951700,0.953118,0.723805,0.899861,...,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.020518,0.008803,0.072964
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.0,0.992075,0.981032,1.000000,0.344662,0.959716,...,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.020908,0.009197,0.072964
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.019406,0.016488,0.005752
f_24317,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.019406,0.016488,0.005915
f_24318,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.019406,0.016488,0.034417
f_24319,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.019406,0.016488,0.030098


In [None]:
dados = dados.drop(311,axis='columns')
dados

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,301,302,303,304,305,306,307,308,309,310
id,0.000000,0.000001,2.413240e-07,0.000003,1.0,0.000006,0.000006,0.000008,0.000002,0.000010,...,0.000190,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000145,0.000096
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.0,0.974980,0.929866,0.938655,0.807083,0.957579,...,0.032353,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.020456,0.008723
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.0,0.997738,0.936037,0.972262,0.668833,0.946787,...,0.032321,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.020424,0.008710
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.0,0.788942,0.951700,0.953118,0.723805,0.899861,...,0.032448,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.020518,0.008803
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.0,0.992075,0.981032,1.000000,0.344662,0.959716,...,0.032366,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.020908,0.009197
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.023340,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.019406,0.016488
f_24317,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.023466,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.019406,0.016488
f_24318,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.022707,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.019406,0.016488
f_24319,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.021505,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.019406,0.016488


In [None]:
x = torch.tensor(dados.values, dtype=torch.float)

In [None]:
edge_index = torch.tensor(df_edge.values, dtype=torch.long)

In [None]:
y = target.to_numpy()

In [None]:
y = torch.tensor(target.values, dtype=torch.long)

## Criar Data

In [None]:
dados

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,301,302,303,304,305,306,307,308,309,310
id,0.000000,0.000001,2.413240e-07,0.000003,1.0,0.000006,0.000006,0.000008,0.000002,0.000010,...,0.000190,0.000209,0.000154,0.000220,0.000251,0.000149,0.000044,0.000166,0.000145,0.000096
f_0,0.927910,0.911870,9.460782e-01,0.965740,0.0,0.974980,0.929866,0.938655,0.807083,0.957579,...,0.032353,0.056833,0.008284,0.037214,0.071492,0.049316,0.014994,0.035948,0.020456,0.008723
f_1,0.930097,0.875378,4.132319e-01,0.964335,0.0,0.997738,0.936037,0.972262,0.668833,0.946787,...,0.032321,0.056757,0.008274,0.037192,0.071063,0.049316,0.014923,0.032307,0.020424,0.008710
f_2,0.881652,0.924808,4.642797e-01,0.961239,0.0,0.788942,0.951700,0.953118,0.723805,0.899861,...,0.032448,0.056965,0.008325,0.037265,0.074031,0.049316,0.015065,0.068612,0.020518,0.008803
f_3,0.390024,0.898878,8.788779e-01,0.953008,0.0,0.992075,0.981032,1.000000,0.344662,0.959716,...,0.032366,0.056847,0.008269,0.037221,0.071138,0.054346,0.014937,0.032318,0.020908,0.009197
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
f_24316,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.023340,0.032918,0.020483,0.027135,0.040561,0.049470,0.003354,0.010589,0.019406,0.016488
f_24317,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.023466,0.032918,0.020483,0.027279,0.040725,0.049470,0.003525,0.010697,0.019406,0.016488
f_24318,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.022707,0.032931,0.020483,0.027200,0.041107,0.049470,0.003357,0.010605,0.019406,0.016488
f_24319,0.000000,0.000000,0.000000e+00,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.021505,0.032848,0.020483,0.027135,0.040643,0.049470,0.003340,0.010589,0.019406,0.016488


### Validação Cruzada

#### K-fold

In [None]:
def kfold(k,random_indices):  
  print(random_indices)
  intervalo = len(random_indices)/k
  print('Intervalo = ',intervalo)
  folds = []
  for fold in range(k):
    if((fold+1)==k):
      dados_fold = random_indices[int(fold*intervalo):]
      folds.append(dados_fold)    
    else:
      dados_fold = random_indices[int(fold*intervalo):(int((fold+1)*intervalo))]
      folds.append(dados_fold)    
  return folds

In [None]:
random_indices = np.random.permutation(range(dados.shape[0]))

In [None]:
folds = kfold(10,random_indices)

[18185 18238  8389 ... 18368 17400 13081]
Intervalo =  2432.2


#### CrossVal

In [None]:
from numpy.core.multiarray import concatenate
soma_acc=0
soma_prec=0
soma_rec=0
soma_f1=0
for rodada in range(len(folds)):
  teste = rodada
  val = 0  
  if(rodada!=9):
    val = teste + 1  
  test_data = folds[teste]
  val_data = folds[val]
  train_data = []
  for treino in range(len(folds)):
    if(treino!=teste and treino!=val):
      train_data = train_data + (folds[treino].tolist())
  train_mask = []
  test_mask = []
  val_mask = []

  for cont in range(len(random_indices)):
    if(cont in train_data):
      train_mask.append(True)
      test_mask.append(False)
      val_mask.append(False)
    else:
      train_mask.append(False)
      test_mask.append(True)
      val_mask.append(True)
  train_mask = torch.tensor(train_mask, dtype=torch.bool)
  test_mask = torch.tensor(test_mask, dtype=torch.bool)
  val_mask = torch.tensor(val_mask, dtype=torch.bool)
  data = Data(x=x, edge_index=edge_index, y=y)
  data.train_mask = train_mask
  data.test_mask = test_mask
  data.val_mask = val_mask
  

    # Create GAT model
  gat = GAT(dados.shape[1], 8, 2)
  print(gat)

# Train
  train(gat, data)

# Test
  acc,prec,rec,f1 = test(gat, data)
  print(f'\nGAT test precision: {prec*100:.2f}%\n')
  print(f'\nGAT test recall: {rec*100:.2f}%\n')
  print(f'\nGAT test f1 score: {f1*100:.2f}%\n')
  print(f'\nGAT test accuracy: {acc*100:.2f}%\n')
  soma_acc = soma_acc + acc
  soma_prec = soma_prec + prec
  soma_rec = soma_rec + rec
  soma_f1 = soma_f1 + f1


GAT(
  (gat1): GATv2Conv(311, 8, heads=8)
  (gat2): GATv2Conv(64, 2, heads=1)
)
Epoch   0 | Train Loss: 0.736 | Train Acc:  50.12% | Val Loss: 0.75 | Val Acc: 48.81%
Epoch  10 | Train Loss: 0.684 | Train Acc:  51.65% | Val Loss: 0.68 | Val Acc: 51.89%
Epoch  20 | Train Loss: 0.647 | Train Acc:  65.62% | Val Loss: 0.65 | Val Acc: 65.95%
Epoch  30 | Train Loss: 0.621 | Train Acc:  69.70% | Val Loss: 0.62 | Val Acc: 69.80%
Epoch  40 | Train Loss: 0.602 | Train Acc:  68.61% | Val Loss: 0.60 | Val Acc: 68.22%
Epoch  50 | Train Loss: 0.589 | Train Acc:  69.91% | Val Loss: 0.59 | Val Acc: 69.55%
Epoch  60 | Train Loss: 0.577 | Train Acc:  70.25% | Val Loss: 0.58 | Val Acc: 69.88%
Epoch  70 | Train Loss: 0.567 | Train Acc:  71.05% | Val Loss: 0.57 | Val Acc: 70.35%
Epoch  80 | Train Loss: 0.557 | Train Acc:  71.46% | Val Loss: 0.56 | Val Acc: 71.38%
Epoch  90 | Train Loss: 0.550 | Train Acc:  72.01% | Val Loss: 0.55 | Val Acc: 72.25%
Epoch 100 | Train Loss: 0.544 | Train Acc:  72.38% | Val Los

In [None]:
print(f'\nGAT test precision: {(soma_prec/10)*100:.2f}%\n')
print(f'\nGAT test recall: {(soma_rec/10)*100:.2f}%\n')
print(f'\nGAT test f1 score: {(soma_f1/10)*100:.2f}%\n')
print(f'\nGAT test accuracy: {(soma_acc/10)*100:.2f}%\n')


GAT test precision: 80.18%


GAT test recall: 65.39%


GAT test f1 score: 71.82%


GAT test accuracy: 74.37%

