<a href="https://colab.research.google.com/github/loyoladesa/qoeprediction/blob/main/jan_2025/Predi%C3%A7%C3%A3o_de_Features_de_um_N%C3%B3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Instalação

In [2]:
!pip install torch-geometric
!pip install torch-sparse
!pip install torch-scatter
!pip install torch-cluster
!pip install torch-spline-conv

Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.6.1
Collecting torch-sparse
  Downloading torch_sparse-0.6.18.tar.gz (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m210.0/210.0 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torch-sparse
  Building wheel for torch-sparse (setup.py) ... [?25l[?25hdone
  Created wheel for torch-sparse: filename=torch_sparse-0.6.18-cp311-cp311-linux_x86_64.whl size=1122943 sha256=4a9affc3134e23c1b6ed3

In [3]:
!pip install 'networkx<2.7'

Collecting networkx<2.7
  Downloading networkx-2.6.3-py3-none-any.whl.metadata (5.0 kB)
Downloading networkx-2.6.3-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m37.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: networkx
  Attempting uninstall: networkx
    Found existing installation: networkx 3.4.2
    Uninstalling networkx-3.4.2:
      Successfully uninstalled networkx-3.4.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
scikit-image 0.25.1 requires networkx>=3.0, but you have networkx 2.6.3 which is incompatible.
torch 2.5.1+cu124 requires nvidia-cublas-cu12==12.4.5.8; platform_system == "Linux" and platform_machine == "x86_64", but you have nvidia-cublas-cu12 12.5.3.2 which is incompatible.
torch 2.5.1+cu124 requires nvidia-cuda-cupti-cu12==12.4.127; platform_system ==

In [4]:
!pip install 'scipy>=1.8'



# Graph Attention Networks

In [142]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.data import Data
import networkx as nx
import matplotlib.pyplot as plt

## Criação de um grafo

In [51]:
# 1️⃣ Criando um Grafo Simples para Teste (NetworkX + PyG)
def create_toy_graph():
    edge_index = torch.tensor([
        [0, 1, 1, 2, 2, 3, 3, 4, 4, 5],
        [1, 0, 2, 1, 3, 2, 4, 3, 5, 4]
    ], dtype=torch.long)  # Lista de arestas

    x = torch.tensor([
        [1, 0], [0, 1], [1, 1], [0, 0], [1, 0], [0, 1]
    ], dtype=torch.float)  # Features dos nós

    return Data(x=x, edge_index=edge_index)

## Definição de GAT

In [247]:
# 2️⃣ Definição do Modelo GAT para Predição de Features
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=1):
        super(GAT, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads, dropout=0.6,concat=True)
        self.conv2 = GATConv(hidden_channels * heads, out_channels, heads=1, dropout=0.6,concat=False)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)  # Função de ativação
        x = self.conv2(x, edge_index)
        return x  # Retorna as features preditas para cada nó

In [248]:
# 3️⃣ Treinamento do Modelo
def train_model(data, epochs=400):
    model = GAT(in_channels=data.x.shape[1], hidden_channels=8, out_channels=data.x.shape[1])  # Modelo GAT

    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)  # Predição das features
        loss = F.mse_loss(out, data.x)  # Erro de reconstrução das features
        loss.backward()
        optimizer.step()

        if epoch % 20 == 0:
            print(f"Época {epoch}: Loss = {loss.item():.4f}")

    return model

### Funções de Grafos

#### Imprimir Nós e Arestas

In [56]:
def imprimirNósArestas(G):
  print("Nodes:", G.nodes())
  print("Length Nodes:", len(G.nodes()))
  print("Edges:", G.edges())
  print("Length Edges:", len(G.edges()))

#### Desenhar Grafo

In [121]:
def desenhar_grafo(G):
  import matplotlib.pyplot as plt
  import networkx as nx

  # Draw the graph
  plt.figure(figsize=(12, 8))  # Adjust figure size as needed
  pos = nx.spring_layout(G,k=5)  # You can try different layouts like 'circular', 'spectral',
  #nx.draw_networkx(G, with_labels=True)
  nx.draw_networkx(G, node_size=1000, node_color="skyblue", font_size=10, font_color="black", font_weight="bold", edge_color="gray")
  plt.title("Representação do Grafo")
  plt.show()

#### Retirar Arestas

In [58]:
def retirar_arestas_alvo(graph, alvo):
  nodes = list(graph.nodes())
  print(nodes)
  n = len(nodes)
  for i in range(n):
    for j in range(n):
        if graph.has_edge(nodes[i], alvo):
          if graph.has_edge(nodes[j], alvo):
            if graph.has_edge(nodes[i], nodes[j]):
              graph.remove_edge(nodes[i],alvo)
              print(f"Aresta ({i}, C) removida.")
  return graph


#### Renomear um Nó

In [95]:
# Função para renomear um nó
def rename_node(G, old_name, new_name):
    if old_name in G:
        # Adicionar novo nó com o novo nome
        G = nx.relabel_nodes(G, {old_name: new_name})
    else:
        print(f"Nó {old_name} não encontrado.")
    return G

### Criar Grafo Exemplo

In [None]:
data = create_toy_graph()

In [59]:
# 6️⃣ Visualização do Grafo
G = nx.DiGraph()
edges = data.edge_index.numpy().T.tolist()
G.add_edges_from(edges)

In [60]:
imprimirNósArestas(G)

Nodes: [0, 1, 2, 3, 4, 5]
Length Nodes: 6
Edges: [(0, 1), (1, 0), (1, 2), (2, 1), (2, 3), (3, 2), (3, 4), (4, 3), (4, 5), (5, 4)]
Length Edges: 10


## Preparação

#### Constantes

In [145]:
radical = '/content/drive/MyDrive/Seagate/RNP/dataset_'

mes = 'nov'
ano = '2024'
exp = 'exp_final'

# Datasets de entrada
dataset_transposto = 'dataset_trasnposto.csv'
grafo_causalidade_3 = 'grafo_causalidade_stage3.gexf'

# Datasets de saída
rnp_normalizado = 'dataset_rnp_normalizado.csv'
torch_data = 'torch_data.pt'
modelo = 'gat_model_1.csv'



pre_path = radical + mes + '_' + ano

# Caminhos de entrada

path_dataset_transposto = pre_path + '/dataset/' + exp + '/' + dataset_transposto
path_grafo_causalidade_3 = pre_path + '/dataset/' + exp + '/' + grafo_causalidade_3

# Caminhos de saída

path_rnp_normalizado = pre_path + '/dataset/' + exp + '/' + rnp_normalizado
path_modelo = pre_path + '/dataset/' + exp + '/' + modelo
path_torch_data = pre_path + '/dataset/' + exp + '/' + torch_data

#### Bibliotecas

In [146]:
#libraries for data manipulation
import numpy as np
import pandas as pd
import json

#### Mount Drive

In [147]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Carregamento do Grafo

In [156]:
print(path_grafo_causalidade_3)

/content/drive/MyDrive/Seagate/RNP/dataset_nov_2024/dataset/exp_final/grafo_causalidade_stage3.gexf


In [157]:
X = nx.read_gexf(path_grafo_causalidade_3)

In [158]:
# Renomear o nó 1 para "A"
X = rename_node(X, "C", 43)

In [159]:
imprimirNósArestas(X)

Nodes: ['0', '36', 43, '1', '2', '3', '13', '4', '5', '6', '7', '8', '9', '10', '11', '12', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '37', '38', '39', '40', '41']
Length Nodes: 43
Edges: [('0', '36'), ('36', 43), ('36', '1'), ('36', '2'), ('36', '3'), ('36', '13'), ('36', '4'), ('36', '5'), ('36', '6'), ('36', '7'), ('36', '8'), ('36', '9'), ('36', '10'), ('36', '11'), ('36', '12'), ('36', '14'), ('36', '15'), ('36', '16'), ('36', '17'), ('36', '18'), ('36', '19'), ('36', '20'), ('36', '21'), ('36', '22'), ('36', '23'), ('36', '24'), ('36', '25'), ('36', '26'), ('36', '27'), ('36', '28'), ('36', '29'), ('36', '30'), ('36', '31'), ('36', '32'), ('36', '33'), ('36', '34'), ('36', '35'), ('36', '37'), ('36', '38'), ('36', '39'), ('36', '40'), ('36', '41')]
Length Edges: 42


In [184]:
for key, value in dict_ids.items():
  print(f"Chave: {key}, Valor: {value}")
  print(type(value))
  if key == 43:
    X = rename_node(X, key, value)
  else:
    X = rename_node(X, str(key), value)

Chave: 0, Valor: 0
<class 'int'>
Chave: 1, Valor: 1
<class 'int'>
Chave: 2, Valor: 2
<class 'int'>
Chave: 3, Valor: 3
<class 'int'>
Chave: 4, Valor: 4
<class 'int'>
Chave: 5, Valor: 5
<class 'int'>
Chave: 6, Valor: 6
<class 'int'>
Chave: 7, Valor: 7
<class 'int'>
Chave: 8, Valor: 8
<class 'int'>
Chave: 9, Valor: 9
<class 'int'>
Chave: 10, Valor: 10
<class 'int'>
Chave: 11, Valor: 11
<class 'int'>
Chave: 12, Valor: 12
<class 'int'>
Chave: 13, Valor: 13
<class 'int'>
Chave: 14, Valor: 14
<class 'int'>
Chave: 15, Valor: 15
<class 'int'>
Chave: 16, Valor: 16
<class 'int'>
Chave: 17, Valor: 17
<class 'int'>
Chave: 18, Valor: 18
<class 'int'>
Chave: 19, Valor: 19
<class 'int'>
Chave: 20, Valor: 20
<class 'int'>
Chave: 21, Valor: 21
<class 'int'>
Chave: 22, Valor: 22
<class 'int'>
Chave: 23, Valor: 23
<class 'int'>
Chave: 24, Valor: 24
<class 'int'>
Chave: 25, Valor: 25
<class 'int'>
Chave: 26, Valor: 26
<class 'int'>
Chave: 27, Valor: 27
<class 'int'>
Chave: 28, Valor: 28
<class 'int'>
Chave

In [185]:
df_edge = nx.to_pandas_edgelist(X)


In [186]:
df_edge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   source  42 non-null     int64 
 1   target  42 non-null     int64 
 2   id      42 non-null     object
dtypes: int64(2), object(1)
memory usage: 1.1+ KB


In [187]:
colunas = list(df_edge.columns)

In [188]:
convert_dict = {}
for coluna in colunas:
  convert_dict[coluna] = float

In [189]:
df_edge = nx.to_pandas_edgelist(X)

In [190]:
df_edge = df_edge.astype(convert_dict)
print(df_edge.dtypes)

source    float64
target    float64
id        float64
dtype: object


In [191]:
df_edge = df_edge.drop(columns=['id'])

In [192]:
df_edge = df_edge.T
df_edge

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
source,0.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,...,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0
target,36.0,42.0,1.0,2.0,3.0,13.0,4.0,5.0,6.0,7.0,...,31.0,32.0,33.0,34.0,35.0,37.0,38.0,39.0,40.0,41.0


In [193]:
edge_index = torch.tensor(df_edge.values, dtype=torch.long)

## Carregamento dos Dados

In [217]:
dados = pd.read_csv(path_dataset_transposto, delimiter=",")
dados

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,1970-01-01 00:00:00.000002552,1970-01-01 00:00:00.000002553,1970-01-01 00:00:00.000002554,1970-01-01 00:00:00.000002555,1970-01-01 00:00:00.000002556,1970-01-01 00:00:00.000002557,1970-01-01 00:00:00.000002558,1970-01-01 00:00:00.000002559,1970-01-01 00:00:00.000002560,1970-01-01 00:00:00.000002561
0,0,58.5,58.6,58.6,58.5,58.5,58.3,60.7,60.4,60.4,...,,,,,,,,,,
1,1,59.65,59.0,88.45,74.28,74.69,67.8,74.93,66.6,65.19,...,,,,,,,,,,
2,2,62.8,59.4,319.0,159.0,200.0,98.0,113.0,74.3,74.2,...,,,,,,,,,,
3,3,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.1,26.0,...,,,,,,,,,,
4,4,26.81,26.32,26.32,26.47,30.14,26.48,42.57,26.58,48.57,...,,,,,,,,,,
5,5,28.3,26.7,26.7,26.9,65.3,27.1,234.0,27.0,289.0,...,,,,,,,,,,
6,6,38.0,38.0,37.9,37.9,38.0,38.0,38.0,38.0,38.0,...,,,,,,,,,,
7,7,38.25,38.35,38.15,40.81,42.49,38.48,70.44,39.08,39.29,...,,,,,,,,,,
8,8,38.5,39.1,38.4,52.9,63.5,39.2,621.0,42.2,41.6,...,,,,,,,,,,
9,9,28.3,28.3,28.3,28.3,28.2,28.2,28.3,28.3,28.3,...,,,,,,,,,,


In [218]:
dados = dados.rename(columns={'Unnamed: 0': 'No'})

In [219]:
dados = dados.interpolate(method='linear', limit_direction='forward', axis=1)

In [220]:
# Substitui valores NaN pela média dos valores anteriores na mesma linha
for col in dados.columns:
    if dados[col].isnull().any():
        dados[col] = dados[col].fillna(method='ffill')

In [221]:
ids = list(dados['No'])
print (ids)
print(len(ids))

[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 43.0]
43


In [222]:
for i in range(len(ids)):
  ids[i] = int(ids[i])
print (ids)
print(len(ids))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43]
43


In [223]:
dict_ids = {}
for i in range(len(ids)):
  dict_ids[ids[i]] = i

In [224]:
dict_ids

{0: 0,
 1: 1,
 2: 2,
 3: 3,
 4: 4,
 5: 5,
 6: 6,
 7: 7,
 8: 8,
 9: 9,
 10: 10,
 11: 11,
 12: 12,
 13: 13,
 14: 14,
 15: 15,
 16: 16,
 17: 17,
 18: 18,
 19: 19,
 20: 20,
 21: 21,
 22: 22,
 23: 23,
 24: 24,
 25: 25,
 26: 26,
 27: 27,
 28: 28,
 29: 29,
 30: 30,
 31: 31,
 32: 32,
 33: 33,
 34: 34,
 35: 35,
 36: 36,
 37: 37,
 38: 38,
 39: 39,
 40: 40,
 41: 41,
 43: 42}

### Normalização

In [225]:
dados_normalizados = dados

In [226]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
#data[['company_name_encoded', 'location', 'minimum_exp', 'maximum_exp']] = sc.fit_transform(data[['company_name_encoded', 'location', 'minimum_exp', 'maximum_exp']])
dados_normalizados = sc.fit_transform(dados_normalizados)
dados_normalizados

array([[0.00000000e+00, 8.03536859e-09, 7.38205898e-09, ...,
        2.01505312e-08, 2.01505312e-08, 2.01505312e-08],
       [2.32558140e-02, 8.19332882e-09, 7.43244846e-09, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [4.65116279e-02, 8.62600252e-09, 7.48283794e-09, ...,
        2.83019103e-08, 2.83019103e-08, 2.83019103e-08],
       ...,
       [9.30232558e-01, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [9.53488372e-01, 1.29301746e-01, 1.08866501e-01, ...,
        3.05433885e-01, 3.05433885e-01, 3.05433885e-01],
       [1.00000000e+00, 5.90624286e-09, 5.41669231e-09, ...,
        9.33471830e-10, 9.21928935e-10, 9.42637123e-10]])

In [227]:
dimensoes = dados_normalizados.shape
dimensoes

(43, 59687)

In [228]:
x = torch.tensor(dados_normalizados, dtype=torch.float)

In [229]:
target = dados_normalizados[-1]
target

array([1.00000000e+00, 5.90624286e-09, 5.41669231e-09, ...,
       9.33471830e-10, 9.21928935e-10, 9.42637123e-10])

In [230]:
y = target
y = torch.tensor(target, dtype=torch.long)

In [231]:
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

In [232]:
data = Data(x=x, edge_index=edge_index)

In [233]:
x.shape

torch.Size([43, 59687])

In [234]:
y.shape

torch.Size([59687])

In [235]:
edge_index.shape

torch.Size([2, 42])

## Treinamento do Modelo


In [213]:
data = create_toy_graph()

In [245]:
print(f" data.x.shape: {data.x.shape}")

 data.x.shape: torch.Size([43, 59687])


In [246]:
print(f" data.edge_index.shape: {data.edge_index.shape}")


 data.edge_index.shape: torch.Size([2, 42])


In [249]:
# 4️⃣ Rodando o Treinamento

model = train_model(data)

Época 0: Loss = 0.0321
Época 20: Loss = 0.0317
Época 40: Loss = 0.0325
Época 60: Loss = 0.0327
Época 80: Loss = 0.0319
Época 100: Loss = 0.0318
Época 120: Loss = 0.0318
Época 140: Loss = 0.0318
Época 160: Loss = 0.0318
Época 180: Loss = 0.0318
Época 200: Loss = 0.0318
Época 220: Loss = 0.0318
Época 240: Loss = 0.0318
Época 260: Loss = 0.0318
Época 280: Loss = 0.0318
Época 300: Loss = 0.0318
Época 320: Loss = 0.0318
Época 340: Loss = 0.0318
Época 360: Loss = 0.0318
Época 380: Loss = 0.0317


In [250]:
# 5️⃣ Predição das Features dos Nós
model.eval()
predicted_features = model(data.x, data.edge_index).detach()
print("\n🔹 Features preditas:\n", predicted_features.numpy())


🔹 Features preditas:
 [[0.0316437  0.00374678 0.0015908  ... 0.00340489 0.00380817 0.00345817]
 [0.03163431 0.00372805 0.00162091 ... 0.00344112 0.00384162 0.00342482]
 [0.03162339 0.00370515 0.0016558  ... 0.00348236 0.00387928 0.00338674]
 ...
 [0.0316437  0.00374678 0.0015908  ... 0.00340489 0.00380817 0.00345817]
 [0.0316437  0.00374678 0.0015908  ... 0.00340489 0.00380817 0.00345817]
 [0.03121519 0.00288285 0.00296638 ... 0.00505952 0.00532913 0.00193871]]


## Avaliação do Modelo

In [251]:
import torch
import torch.nn.functional as F

# 1️⃣ Modelo em modo de avaliação
model.eval()
with torch.no_grad():
    predicted_features = model(data.x, data.edge_index)  # Predição das features

# 2️⃣ Calculando o RMSE
rmse = torch.sqrt(F.mse_loss(predicted_features, data.x))  # Raiz do erro médio quadrático

print(f"🔹 RMSE do modelo GAT: {rmse.item():.4f}")


🔹 RMSE do modelo GAT: 0.1782


## Avaliação de um Nó especifico

In [252]:
import torch
import torch.nn.functional as F

def evaluate_node_rmse(model, data, node_idx):
    """
    Avalia o RMSE das features de um nó específico.

    Parâmetros:
    - model: modelo treinado (GAT)
    - data: grafo de entrada (PyG Data)
    - node_idx: índice do nó-alvo a ser avaliado

    Retorna:
    - RMSE do nó específico
    """
    model.eval()  # Coloca o modelo em modo de avaliação
    with torch.no_grad():
        predicted_features = model(data.x, data.edge_index)  # Predição das features

    # Features reais e preditas do nó-alvo
    y_real = data.x[node_idx]  # Features reais do nó-alvo
    y_pred = predicted_features[node_idx]  # Features preditas

    # Calculando RMSE apenas para esse nó
    rmse_node = torch.sqrt(F.mse_loss(y_pred, y_real))

    return rmse_node.item()

In [254]:
# Definir o nó-alvo (por exemplo, o nó 2)
target_node = 42
rmse_value = evaluate_node_rmse(model, data, target_node)

print(f"🔹 RMSE para o nó {target_node}: {rmse_value:.4f}")


🔹 RMSE para o nó 42: 0.0054
