# Lectura de datos

In [4]:
import os

ruta_al_directorio = '/home/TFM/code'

os.chdir(ruta_al_directorio)

In [5]:
import torch
import numpy as np
import pandas as pd
import random
from gnn.model import get_gnnNets
from gendata import get_dataset
from utils.parser_utils import (
    arg_parse,
    create_args_group,
    fix_random_seed,
    get_data_args,
    get_graph_size_args,
)
from pathlib import Path
from torch_geometric.utils import degree


## Forma manual - entender el código proporcionado

In [9]:
import os.path as osp
import torch
import mat73
from sklearn.model_selection import train_test_split
import os
from torch_geometric.data import InMemoryDataset, Data, download_url, extract_zip
import torch
from torch_geometric.data import Data
import numpy as np
from utils.gen_utils import from_edge_index_to_adj, padded_datalist

In [11]:
names = {
        "uk": ["uk", "Uk", "UK", None],
        "ieee24": ["ieee24", "Ieee24", "IEEE24", None],
        "ieee39": ["ieee39", "Ieee39", "IEEE39", None],
        "ieee118": ["ieee118", "Ieee118", "IEEE118", None],
            }
name = "ieee24"
raw_dir = '/home/dataset/{}/raw'.format(name)

In [12]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')

In [13]:
# load branch list also called edge order or edge index
path = os.path.join(raw_dir, 'blist.mat')
edge_order = mat73.loadmat(path)
edge_order = torch.tensor(edge_order["bList"] - 1)


# load output binary classification labels
path = os.path.join(raw_dir, 'of_bi.mat')
of_bi = mat73.loadmat(path)

# load output binary regression labels
path = os.path.join(raw_dir, 'of_reg.mat')
of_reg = mat73.loadmat(path)

# load output mc labels
path = os.path.join(raw_dir, 'of_mc.mat')
of_mc = mat73.loadmat(path)


# load output node feature matrix
path = os.path.join(raw_dir, 'Bf.mat')
node_f = mat73.loadmat(path)


# load output edge feature matrix
path = os.path.join(raw_dir, 'Ef.mat')
edge_f = mat73.loadmat(path)


# load explanations
path = os.path.join(raw_dir, "exp.mat")
exp = mat73.loadmat(path)

In [14]:
node_f = node_f['B_f_tot']
edge_f = edge_f['E_f_post']
of_bi = of_bi['output_features']
of_mc = of_mc['category']
of_reg = of_reg['dns_MW']
exp_mask = exp["explainations"]

In [15]:
data_list = []
adj_list = []
max_num_nodes = 0
index = 0


Bucle principal de procesamiento:

Para cada grafo del dataset (resultado de un corte), hacemos.
- Cogemos las características de los nodos y aristas
- Procesamos las aristas:
    1. Se crea una máscara que tiene a 1 las aristas que fueron parte del inicio del fallo
    2. Se eliminan las contingencias (aristas que tengan todo a 0 al final) de la máscara, y se duplica la máscara 
    3. Se eliminan las contingencias de la matriz de atributos de aristas
    4. Se duplican las características de las aristas, para tener en cuenta los ejes en ambos sentidos
    5. Se eliminan las contingencias de la lista de aristas general (para obtener una versión de cada grafo)
    6. Se duplica la lista de aristas general
- Creamos un objeto Data, que representa un grafo, y lo procesamos correctamente:
    - Las etiquetas 0 se transforman a 1, y las etiquetas 1 se cambian a -1
    - Se guarda en una lista de objetos Data
- Se obtiene la matriz de adyacencia, y se guarda en una lista de matrices de adyacencia




In [17]:
def th_delete(tensor, indices):
            mask = torch.ones(tensor.size(), dtype=torch.bool)
            mask[indices] = False
            return tensor[mask]

In [20]:
datatype = 'binary'
pre_filter = None
pre_transform = None
#Iteramos a través de todo el dataset:

for i in range(len(node_f)):
    # node output features
    x = torch.tensor(node_f[i][0], dtype=torch.float32).reshape([-1, 3]).to(device)
    # edge output features
    f = torch.tensor(edge_f[i][0], dtype=torch.float32)


    #Se ponen a 1 las aristas que empezaron con la cascada de fallos
    e_mask = torch.zeros(len(edge_f[i][0]), 1)
    if exp_mask[i][0] is None:  # .all() == 0:
        e_mask = e_mask
    else:
        e_mask[exp_mask[i][0].astype('int')-1] = 1

        
    # contigency lists, finds where do we have contigencies from the .mat edge feature matrices
    # ( if a line is part of the contigency list all egde features are set 0)
    cont = [j for j in range(len(f)) if np.all(np.array(f[j])) == 0]
    e_mask_post = th_delete(e_mask, cont)
    e_mask_post = torch.cat((e_mask_post, e_mask_post), 0).to(device)

    
    # remove edge features of the associated line
    f_tot = th_delete(f, cont).reshape([-1, 4]).type(torch.float32)


    # concat the post-contigency edge feature matrix to take into account the reversed edges
    f_totw = torch.cat((f_tot, f_tot), 0).to(device)

    
    # remove failed lines from branch list
    edge_iw = th_delete(edge_order, cont).reshape(-1, 2).type(torch.long)
    # flip branch list
    edge_iwr = torch.fliplr(edge_iw)
    
    #  and concat the non flipped and flipped branch list
    edge_iw = torch.cat((edge_iw, edge_iwr), 0)
    edge_iw = edge_iw.t().contiguous().to(device)

    if datatype.lower() == 'binary':
        ydata = torch.tensor(of_bi[i][0], dtype=torch.float, device=device).view(1, -1)
    if datatype.lower() == 'regression':
        ydata = torch.tensor(of_reg[i], dtype=torch.float, device=device).view(1, -1)
    if datatype.lower() == 'multiclass':
        #do argmax
        ydata = torch.tensor(np.argmax(of_mc[i][0]), dtype=torch.float, device=device).view(1, -1)
        # ydata = torch.tensor(of_mc[i][0], dtype=torch.int, device=device).view(1, -1)
    # Fill Data object, 1 Data object -> 1 graph

    data = Data(x=x, edge_index=edge_iw, edge_attr=f_totw, y=ydata, edge_mask=e_mask_post, idx=index)
    index+=1
    if ydata == 0:
        ydata_cf = torch.tensor(1, dtype=torch.int, device=device).view(-1)
    else:
        ydata_cf = torch.tensor(-1, dtype=torch.int, device=device).view(-1)
    data.y_cf = ydata_cf


    
    adj = from_edge_index_to_adj(data.edge_index, None, data.num_nodes)
    adj_list.append(adj)
    max_num_nodes = max(max_num_nodes, data.num_nodes)
    # append Data object to datalist
    data_list.append(data)
    if pre_filter is not None and not pre_filter(data):
        continue

    if pre_transform is not None:
        data = pre_transform(data)




data_list = padded_datalist(data_list, adj_list, max_num_nodes)


## Automático 

Lo hacemos con la librería adecuada - InMemoryDataset es una clase proporcionada por PyTorch Geometric, una biblioteca diseñada para trabajar con datos de grafos en PyTorch. Esta clase es una subclase de torch_geometric.data.Dataset y está diseñada para manejar conjuntos de datos de grafos que se pueden cargar completamente en la memoria RAM.

El código correspondiente a la clase InMemoryDataset viene proporcionado:

In [21]:
from dataset.powergrid import PowerGrid

In [39]:
def get_dataset(dataset_root, **params):

    dataset_name = params["dataset_name"]
    datatype = params["datatype"]
    print(f"Loading {dataset_name} dataset...")

    if dataset_name.lower() in list(PowerGrid.names.keys()):
        return PowerGrid(root=dataset_root, name=dataset_name, datatype=datatype)
    
    # NO VAMOS A USAR DATASETS SINTÉTICOS
    #elif dataset_name.lower() in list(SynGraphDataset.names.keys()):
    #   dataset = SynGraphDataset(root=dataset_root, name=dataset_name, **kwargs)
    #   return dataset
    else:
        raise ValueError(f"{dataset_name} is not defined.")

In [43]:
CKPT_ROOT = "/home/TFM/code/extras/"

DATA_DIR = "/home/dataset/"
MODEL_DIR = CKPT_ROOT + "model/"
LOG_DIR = CKPT_ROOT + "logs/"
RESULT_DIR = CKPT_ROOT + "results/"
MASK_DIR = CKPT_ROOT + "mask/"


In [44]:
dataset_params = {
    "dataset_name": "ieee24",
    "random_seed": 0,
    "datatype": "binary",
    "train_ratio": 0.8,
    "val_ratio": 0.15,
    "test_ratio": 0.1
}

args = {
    'data_save_dir': DATA_DIR,
    'dest': '/home/TFM/',
    'logs_save_dir': LOG_DIR,
    'mask_save_dir': MASK_DIR,
    'model_save_dir': MODEL_DIR,
    'result_save_dir': RESULT_DIR,

}

In [45]:


dataset = get_dataset(args['data_save_dir'], **dataset_params)





Loading ieee24 dataset...


Processing...
Done!


In [46]:
dataset

PowerGrid(12900)