In [2]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Data preparation

## a)
 Construct the networks based on the given description.

### Deepwalk

### DMGI

In [66]:
def csv_to_graph(path, threshold=0.3):
    # load in csv as dataframe
    df = pd.read_csv(path,header=None)
    
    # threshold dataframe and remove diagonal
    A = (df>threshold).astype(int) - pd.DataFrame(np.identity(df.shape[0]))
         
    # convert to graph
    G = nx.from_pandas_adjacency(A)
    
    return G, A

def load_graphs(folder):
    #load in all graphs in folder
    G1s, G2s = [],[]
    A1s, A2s = [],[]
    features1 = 0
    features2 = 0

    for i in range(1,61):
        filename = f'{folder}/p{i:03}_1.csv'
        G, A = csv_to_graph(filename)
        G1s.append(G)
        A1s.append(A)
        

        filename = f'{folder}/p{i:03}_2.csv'
        G, A = csv_to_graph(filename)
        G2s.append(G)
        A2s.append(A)
        
    graphs_info = {
        'G1': {
            'As': A1s,
            'features': None
        },
        'G2': {
            'As': A2s,
            'features': None
        }
    }
    
    return G1s, G2s, graphs_info


def load_node_info(folder, graph_info):
    
    dfs_1 = []
    dfs_2 = []
    
    for i in range(1,61):
        filename = f'{folder}/{folder}_p{i:03}_1.csv'
        df = pd.read_csv(filename,header=None)
        dfs_1.append(df.to_numpy())

        filename = f'{folder}/{folder}_p{i:03}_2.csv'
        df = pd.read_csv(filename,header=None)
        dfs_2.append(df.to_numpy())
    
    sum1 = np.zeros(dfs_1[0].shape)
    for m in dfs_1:
        sum1 += m[:, 0:240]
    sum2 = np.zeros(dfs_2[0].shape)
    for m in dfs_2:
        sum2 += m[:, 0:240]

    mean1 = np.divide(sum1, 60)
    mean2 = np.divide(sum2, 60)
    
    graph_info['G1']['features'] = mean1
    graph_info['G2']['features'] = mean2
    
    return graph_info

In [63]:
G1s, G2s, graphs_info = load_graphs("FC")

nb_graphs = len(G1s)
nb_nodes = G1s[0].number_of_nodes()

In [67]:
graphs_info = load_node_info("AAL_timeseries", graphs_info)

In [68]:
import pickle as pkl

graphs = ['G1', 'G2']
for graph in graphs:
    output = {}
    output['feature'] = graphs_info[graph]['features']
    output['train_idx'] = np.arange(nb_nodes)
    output['val_idx'] = np.arange(nb_nodes)
    output['test_idx'] = np.arange(nb_nodes)
    output['label'] = np.expand_dims(np.ones(nb_nodes), axis=1)
    As = graphs_info[graph]['As']
    for i, A in enumerate(As):
        name = "A_"+str(i)
        output[name] = A
    
    pkl.dump(output, open('../DMGI/data/{}.pkl'.format(graph), 'wb'), protocol=pkl.HIGHEST_PROTOCOL)

### Embeddings

## b) 
Apply implementations of two algorithms, one from each category: single graph node embedding and
multi-relational graph node embedding. Explain the reason behind choosing these two algorithms. 

### Deepwalk

### DMGI

In [None]:
# create embeddings for G1
os.chdir('..\DMGI') 
os.system('python main.py --embedder DMGI --dataset G1 --metapaths A_0,A_1,A_2,A_3,A_4,A_5,A_6,A_7,A_8,A_9,A_10,A_11,A_12,A_13,A_14,A_15,A_16,A_17,A_18,A_19,A_20,A_21,A_22,A_23,A_24,A_25,A_26,A_27,A_28,A_29,A_30,A_31,A_32,A_33,A_34,A_35,A_36,A_37,A_38,A_39,A_40,A_41,A_42,A_43,A_44,A_45,A_46,A_47,A_48,A_49,A_50,A_51,A_52,A_53,A_54,A_55,A_56,A_57,A_58,A_59')
os.chdir(cwd)

In [None]:
# create embeddings for G2
os.chdir('..\DMGI') 
os.system('python main.py --embedder DMGI --dataset G2 --metapaths A_0,A_1,A_2,A_3,A_4,A_5,A_6,A_7,A_8,A_9,A_10,A_11,A_12,A_13,A_14,A_15,A_16,A_17,A_18,A_19,A_20,A_21,A_22,A_23,A_24,A_25,A_26,A_27,A_28,A_29,A_30,A_31,A_32,A_33,A_34,A_35,A_36,A_37,A_38,A_39,A_40,A_41,A_42,A_43,A_44,A_45,A_46,A_47,A_48,A_49,A_50,A_51,A_52,A_53,A_54,A_55,A_56,A_57,A_58,A_59')
os.chdir(cwd)

The model embeddings should be saved in 'DMGI/saved_model' folder.