In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import torch

from matplotlib import pyplot as plt
from sklearn.neighbors import kneighbors_graph
from sklearn.metrics import f1_score

import generators as gen
import optimize as opt
import helpers as hel
import pickle as pkl
from NNet import NNet

%load_ext autoreload
%autoreload 2

import os

# Generate graphs
Create stochastic block models with 30 nodes

In [2]:
def create_multiple_graphs(n_graphs=20, **kwargs):
    return [gen.generate_L_sbm(seed=i, **kwargs) for i in range(n_graphs)]

def to_pickle(obj, filename):
    with open(filename, 'wb') as f:
        pkl.dump(obj, f)
        
def from_pickle(filename):
    with open(filename, 'rb') as f:
        return pkl.load(f)

In [3]:
Ls_30 = create_multiple_graphs(20, nnodes=45, p_in=.3, p_out=.1, n_blocks=3)
to_pickle(Ls_30, 'res/imp/Ls_30.pkl')

Ls_50 = create_multiple_graphs(20, nnodes=45, p_in=.5, p_out=.1, n_blocks=3)
to_pickle(Ls_50, 'res/imp/Ls_50.pkl')

Ls_70 = create_multiple_graphs(20, nnodes=45, p_in=.7, p_out=.1, n_blocks=3)
to_pickle(Ls_70, 'res/imp/Ls_70.pkl')

# Helper Methods

In [4]:
def sample_and_impute(L, ker, imputer, n_samples=500):
    """Reproducibly generates 500 samples from L using kernel ker, then 
    imputes a graph using the imputer. Imputer is a function that
    takes samples and returns an adjacency matrix."""
    
    samples = gen.gen_and_filter(L, n_samples, ker=ker, seed=42)
    return imputer(samples)


def simulate(Ls, ker, imputer, verbose=True):
    """For every L in Ls, generates samples with ker and imputes the graph
    using the imputer method. Imputer is a function that
    takes samples and returns an adjacency matrix."""
    out = []
    for i, L in enumerate(Ls):
        if verbose:
            print('\rSimulating example {}'.format(i+1), end=' ')
        out.append(sample_and_impute(L, ker, imputer))
    return out

def f1_scores(Ls, imps):
    """Calculates f1 scores between lists"""
    
    return [f1_score(np.asarray(L<0).flatten(), imp.flatten()) for L, imp in zip(Ls, imps)]

def process_all(imputer, savefile_prefix):
    kerlist = [gen.kernel_heat, gen.kernel_normal, 
               lambda x: gen.kernel_highpass(x, par=.5)]
    kernames = ['heat', 'norm', 'high']
    
    Lslist = [Ls_30, Ls_50, Ls_70]
    Lsnames = ['30', '50', '70']
    
    for ker, kername in zip(kerlist, kernames):
        print('Kernel:', kername)
        
        for Ls, Lsname in zip(Lslist, Lsnames):
            print('Ls:', Lsname)
            
            imps = simulate(Ls, ker, imputer)
            f1 = f1_scores(Ls, imps)

            savefile_suffix = '_' + Lsname + '_' + kername + '.pkl'
            to_pickle(imps, savefile_prefix + savefile_suffix)
            to_pickle(f1, savefile_prefix + '_f1' + savefile_suffix)
        

In [1]:
Ls+30

NameError: name 'Ls_30' is not defined

# Our Method

In [5]:
our_imputer = lambda x: opt.impute_graph(x, lr=.01, verbose=False,
                                         n_epochs=3000, lr_nnet=1e-3, nit_nnet=3)[0]>.5

In [None]:
if True:
    process_all(our_imputer, 'res/imp/ours')

Kernel: heat
Ls: 30
Simulating example 1 

# Stats

In [None]:
def load_f1(file_prefix):
    kernames = ['heat', 'norm', 'high']
    Lsnames = ['30', '50', '70']
    out = dict()
    for kn in kernames:
        out[kn] = dict()
        for ln in Lsnames:
            suffix = '_' + ln + '_' + kn + '.pkl'
            out[kn][ln] = from_pickle(file_prefix + '_f1' + suffix)
            
    return out

def get_iqr(x):
    return np.subtract(*np.percentile(x, [75, 25]))

def get_stats(f1_dict):
    outlist = []
    for ker, kerdict in f1_dict.items():
        for pin, l in kerdict.items():
            nparr = np.array(l)
            outlist.append([ker, pin, nparr.mean(), np.median(nparr), 
                        nparr.std(), get_iqr(nparr)])
    return pd.DataFrame(outlist, columns=['Filter', 'P_in', 'Mean', 'Median', 'Stdev', 'IQR'])


In [None]:
our_f1 = load_f1('res/imp/ours')
our_stats = get_stats(our_f1)

In [None]:
our_f1['norm']['70']

In [None]:
our_stats