In [9]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import random     as rnd
import sklearn    as skl
import numpy      as np
import pandas     as pd
import seaborn as sns
import networkx as nx
import scipy as sp
import torch
import torch_geometric as pyg
import os

In [45]:
def mat_to_graph1(matrices):
    if type(matrices) == torch.Tensor:
        nodes_features = []
        edges_list = []
        edges_features = []
        rows = matrices.shape[0]
        columns = matrices.shape[1]
        nodes_features += [[0,1]]*rows + [[1,0]]*columns
        for i in range(rows):
            for j in range(columns):
                if matrices[i][j] > 0:
                    edges_list.append([i,rows+j])
                    edges_features.append(matrices[i][j])
        edges_list = torch.transpose(torch.tensor(edges_list,dtype=torch.int),0,1)
        edges_features = torch.tensor(edges_features,dtype=torch.float)
        sparse_adj = torch.sparse_coo_tensor(edges_list,torch.tensor([1]*len(edges_features)),size=(len(nodes_features),len(nodes_features)))
        sparse_adj += torch.transpose(sparse_adj,0,1)
        mod_sparse_adj = torch.sparse_coo_tensor(edges_list,edges_features,size=(len(nodes_features),len(nodes_features)))
        return {'nodes':nodes_features,'edges_list':edges_list,'edges_features':edges_features,'adj_mat':sparse_adj,'mod_adj_mat':mod_sparse_adj}
    else:
        graph_list = []
        for mat in matrices:
            graph_list.append(mat_to_graph1(mat))
        return graph_list

def mat_to_graph2(matrices):
    if type(matrices) == np.ndarray:
        nodes_features = []
        edges_list = []
        edges_features = []
        rows = matrices.shape[0]
        columns = matrices.shape[1]
        node_num_mat = np.zeros(matrices.shape,dtype='float32')
        node_num = 0
        for i in range(rows):
            for j in range(columns):
                if matrices[i][j] > 0:
                    node_num_mat[i,j] = node_num
                    nodes_features.append([matrices[i][j]])
                    for k in range(0,i):
                        if matrices[k][j] > 0:
                            edges_list.append([node_num_mat[k,j],node_num])
                            edges_features.append([1,0])
                            edges_list.append([node_num,node_num_mat[k,j]])
                            edges_features.append([1,0])
                    for k in range(0,j):
                        if matrices[i][k] > 0:
                            edges_list.append([node_num_mat[i,k],node_num])
                            edges_features.append([0,1])
                            edges_list.append([node_num,node_num_mat[i,k]])
                            edges_features.append([0,1])
                    node_num += 1
        edges_list,edges_features = np.array(edges_list),np.array(edges_features)
        sparse_adj = sp.sparse.coo_matrix(([1]*len(edges_features),(edges_list[:,0],edges_list[:,1])),shape=(len(nodes_features),len(nodes_features)))
        return {'nodes':nodes_features,'edges_list':edges_list,'edges_features':edges_features,'adj_mat':sparse_adj}
    else:
        graph_list = []
        for mat in matrices:
            graph_list.append(mat_to_graph2(mat))
        return graph_list

def graph_to_mat1(graph):
    rows = 0
    columns = 0
    for node in graph['nodes']:
        if node == [0,1]:
            rows += 1
        else:
            columns += 1
    matrice = torch.zeros((rows,columns))
    for i in range(len(graph['edges_list'])):
        pos = graph['edges_list'][i]
        matrice[pos[0],pos[1]-rows] = graph['edges_features'][i]
    return matrice

def check_graph_mat1(graph,mat):
    return (graph_to_mat1(graph)==mat).all()

def df_to_list(df):
    graphs_list = mat_to_graph1(list(df['matrix'].values))
    for i in range(len(graphs_list)):
        graphs_list[i]['target'] = {'h11':df['h11'].values[i],'h21':df['h21'].values[i]}
    return graphs_list

In [47]:
from torch_geometric.data import Data
def graph_to_data(graph):
    return Data(x=graph['nodes'],edge_index=graph['edges_list'],edge_attr=graph['edges_features'],y=graph['target'])

In [51]:
def max_filter(x,label=None):
    if label == None:
        return lambda graph: graph.y <= x
    else:
        return lambda graph: graph.y[label] <= x

def min_filter(x,label=None):
    if label == None:
        return lambda graph: graph.y >= x
    else:
        return lambda graph: graph.y[label] >= x

def label_selection(label):
    def select(graph):
        if label == 'h11':
            graph.y = graph.y['h11']
        elif label == 'h21':
            graph.y = graph.y['h11']
        else:
            graph.y = torch.tensor([graph.y['h11'],graph.y['h21']],dtype=torch.int)
        return graph
    return select

In [55]:
class OriginalCICY3(pyg.data.InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None, pre_filter=None):
        self.url = 'http://www.lpthe.jussieu.fr/~erbin/files/data/cicy3o.h5'
        super().__init__(root, transform, pre_transform, pre_filter)
        self.data, self.slices = torch.load(self.processed_paths[0])

    def pre_filter_func(self,graph):
        if type(self.pre_filter)==list:
            for check in self.pre_filter:
                if not check(graph):
                    return False
            return True
        else:
            return self.pre_filter(graph)
    
    def pre_transform_func(self,graph):
        if type(self.pre_transform)==list:
            for transform in self.pre_transform:
                graph = transform(graph)
            return graph
        else:
            return self.pre_transform(graph)

    @property
    def raw_file_names(self):
        return ['cicy3o.h5']

    @property
    def processed_file_names(self):
        return ['cicy3o.pt']

    def download(self):
        # Download to `self.raw_dir`.
        pyg.data.download_url(self.url, self.raw_dir)

    def process(self):
        # Read data into huge `Data` list.
        path = os.path.join(self.raw_dir,'cicy3o.h5')
        df = pd.read_hdf(path)
        df['matrix'] = df['matrix'].apply(lambda x: torch.tensor(x,dtype=torch.float))
        df = df.sample(frac=1)
        graphs_list = df_to_list(df)
        data_list = [graph_to_data(graph) for graph in graphs_list]
        if self.pre_filter is not None:
            data_list = [data for data in data_list if self.pre_filter_func(data)]
        if self.pre_transform is not None:
            data_list = [self.pre_transform_func(data) for data in data_list]
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [56]:
dataset = OriginalCICY3(root='data',pre_filter=[min_filter(1,'h21'),max_filter(91,'h21')],pre_transform=label_selection('h21'))

Processing...
Done!
