In [1]:
%pip install torch-scatter torch-sparse torch-geometric

Installing collected packages: torch-sparse, torch-scatter, torch-geometric
Successfully installed torch-geometric-2.0.4 torch-scatter-2.0.9 torch-sparse-0.6.13


In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import random     as rnd
import sklearn    as skl
import numpy      as np
import pandas     as pd
import seaborn as sns
import networkx as nx
import scipy as sp
import torch
import torch_geometric as pyg

In [3]:
df = pd.read_hdf('cicy3o.h5')
df['matrix'] = df['matrix'].apply(lambda x: np.array(x,dtype='float32'))
df = df.sample(frac=1)

In [4]:
def mat_to_graph1(matrices):
    if type(matrices) == np.ndarray:
        nodes_features = []
        edges_list = []
        edges_features = []
        rows = matrices.shape[0]
        columns = matrices.shape[1]
        nodes_features += [[0,1]]*rows + [[1,0]]*columns
        for i in range(rows):
            for j in range(columns):
                if matrices[i][j] > 0:
                    edges_list.append([i,rows+j])
                    edges_features.append([matrices[i][j]])
        edges_list,edges_features = np.array(edges_list),np.array(edges_features)
        sparse_adj = sp.sparse.coo_matrix(([1]*len(edges_features),(edges_list[:,0],edges_list[:,1])),shape=(len(nodes_features),len(nodes_features)))
        sparse_adj += sparse_adj.T
        mod_sparse_adj = sp.sparse.coo_matrix((np.squeeze(edges_features,axis=1),(edges_list[:,0],edges_list[:,1])),shape=(len(nodes_features),len(nodes_features)))
        mod_sparse_adj += mod_sparse_adj.T
        return {'nodes':nodes_features,'edges_list':edges_list,'edges_features':edges_features,'adj_mat':sparse_adj,'mod_adj_mat':mod_sparse_adj}
    else:
        graph_list = []
        for mat in matrices:
            graph_list.append(mat_to_graph1(mat))
        return graph_list

def mat_to_graph2(matrices):
    if type(matrices) == np.ndarray:
        nodes_features = []
        edges_list = []
        edges_features = []
        rows = matrices.shape[0]
        columns = matrices.shape[1]
        node_num_mat = np.zeros(matrices.shape,dtype='float32')
        node_num = 0
        for i in range(rows):
            for j in range(columns):
                if matrices[i][j] > 0:
                    node_num_mat[i,j] = node_num
                    nodes_features.append([matrices[i][j]])
                    for k in range(0,i):
                        if matrices[k][j] > 0:
                            edges_list.append([node_num_mat[k,j],node_num])
                            edges_features.append([1,0])
                            edges_list.append([node_num,node_num_mat[k,j]])
                            edges_features.append([1,0])
                    for k in range(0,j):
                        if matrices[i][k] > 0:
                            edges_list.append([node_num_mat[i,k],node_num])
                            edges_features.append([0,1])
                            edges_list.append([node_num,node_num_mat[i,k]])
                            edges_features.append([0,1])
                    node_num += 1
        edges_list,edges_features = np.array(edges_list),np.array(edges_features)
        sparse_adj = sp.sparse.coo_matrix(([1]*len(edges_features),(edges_list[:,0],edges_list[:,1])),shape=(len(nodes_features),len(nodes_features)))
        return {'nodes':nodes_features,'edges_list':edges_list,'edges_features':edges_features,'adj_mat':sparse_adj}
    else:
        graph_list = []
        for mat in matrices:
            graph_list.append(mat_to_graph2(mat))
        return graph_list

def graph_to_mat1(graph):
    rows = 0
    columns = 0
    for node in graph['nodes']:
        if node == [0,1]:
            rows += 1
        else:
            columns += 1
    matrice = np.zeros((rows,columns))
    for i in range(len(graph['edges_list'])):
        pos = graph['edges_list'][i]
        matrice[pos[0],pos[1]-rows] = graph['edges_features'][i]
    return matrice

def check_graph_mat1(graph,mat):
    return (graph_to_mat1(graph)==mat).all()

def df_to_list(df):
    graphs_list = mat_to_graph1(list(df['matrix'].values))
    for i in range(len(graphs_list)):
        graphs_list[i]['h11'] = df['h11'].values[i]
        graphs_list[i]['h21'] = df['h21'].values[i]
    return graphs_list

In [5]:
graphs_list = df_to_list(df)

In [None]:
class OriginalCICY3(pyg.data.InMemoryDataset):

    url = 'http://www.lpthe.jussieu.fr/~erbin/files/data/cicy3o.h5'

    def __init__(self,root,transform=None,pre_transform=None,pre):
        super().__init__(root,transform,pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])
    
    @property
    def raw_file_names(self):
        return ['cicy3o.h5']

    @property
    def download(self):
        download_url(self.url,self.raw_dir)

class OriginalCICY3(pyg.data.InMemoryDataset):

    url = 'http://www.lpthe.jussieu.fr/~erbin/files/data/cicy3o.h5'

    def __init__(self, root, transform=None, pre_transform=None, pre_filter=None):
        super().__init__(root, transform, pre_transform, pre_filter)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return ['cicy3o.h5']

    @property
    def processed_file_names(self):
        return ['cicy3o.pt']

    def download(self):
        # Download to `self.raw_dir`.
        pyg.data.download_url(url, self.raw_dir)

    def process(self):
        # Read data into huge `Data` list.
        data_list = [...]

        if self.pre_filter is not None:
            data_list = [data for data in data_list if self.pre_filter(data)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(data) for data in data_list]

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])