In [3]:
%load_ext autoreload
%autoreload 2

import sys
import os
sys.path.append('../')

# Graph imports
from src.graph import GraphModel 
import src.utils as utils

# usual imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import gc
import random
import networkx as nx

from IPython.display import display
from pyvis.network import Network

import pickle
import os

np.random.seed(42)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
datasets = f'../data/connectomes/'
connectomes = sorted(os.listdir(datasets)) 
len(connectomes)


18

In [21]:
import sys
import os
sys.path.append('../')

import src.graph as graph
import src.logit_estimator as estimator
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Parameters
n = 500 
sigma_values = [-1, -2, -3,-4,-5,-6,-7]
#d_values = [0, 1, 2]
d_values = [0]
n_graphs = 10  # number of graphs to generate for each parameter combination
n_iterations = 2000
warm_up = 2000 
results = []

for sigma in sigma_values:
    for d in d_values:
        for i in range(n_graphs):
            # Generate graph
            graph_model = graph.GraphModel(n=n, d=d, sigma=sigma)
            graphs, _ = graph_model.populate_edges_baseline(warm_up=warm_up, max_iterations=n_iterations, patience=10)
            g = graphs[-1]

            # Perform estimation
            est = estimator.LogitRegEstimator(g, d=d)
            features, labels = est.get_features_labels()
            result, params, pvalue = est.estimate_parameters(l1_wt=1, alpha=0, features=features, labels=labels)

            # Store results
            results.append({
                'true_sigma': sigma,
                'true_d': d,
                'estimated_sigma': params[0],
                'estimated_d': d,  # We're using the true d for estimation
                'aic': result.aic,
                'bic': result.bic,
                'llf': result.llf
            })


iteration: 0
iteration: 1
iteration: 2
iteration: 3
iteration: 4
iteration: 5
iteration: 6
iteration: 7
iteration: 8
iteration: 9
iteration: 10
iteration: 11
iteration: 12
iteration: 13
iteration: 14
iteration: 15
iteration: 16
iteration: 17
iteration: 18
iteration: 19
iteration: 20
iteration: 21
iteration: 22
iteration: 23
iteration: 24
iteration: 25
iteration: 26
iteration: 27
iteration: 28
iteration: 29
iteration: 30
iteration: 31
iteration: 32
iteration: 33
iteration: 34
iteration: 35
iteration: 36
iteration: 37
iteration: 38
iteration: 39
iteration: 40
iteration: 41
iteration: 42
iteration: 43
iteration: 44
iteration: 45
iteration: 46
iteration: 47
iteration: 48
iteration: 49
iteration: 50
iteration: 51
iteration: 52
iteration: 53
iteration: 54
iteration: 55
iteration: 56
iteration: 57
iteration: 58
iteration: 59
iteration: 60
iteration: 61
iteration: 62
iteration: 63
iteration: 64
iteration: 65
iteration: 66
iteration: 67
iteration: 68
iteration: 69
iteration: 70
iteration: 71
it

In [12]:
with open(f'results_anova.pkl', 'wb') as f:
    pickle.dump(results, f)


In [18]:
# Convert results to DataFrame
df = pd.DataFrame(results)
df.head()

Unnamed: 0,true_sigma,true_d,estimated_sigma,estimated_d,aic,bic,llf
0,-2,0,-6.897507,0,10961.097993,10990.300194,-5477.548996
1,-2,0,-6.873209,0,10639.085887,10668.288089,-5316.542944
2,-2,0,-6.945375,0,9698.406947,9727.609148,-4846.203474
3,-2,0,-6.731992,0,10943.155077,10972.357278,-5468.577538
4,-2,0,-6.949774,0,10107.193327,10136.395528,-5050.596664


In [20]:
# Perform pairwise ANOVA tests for sigma and d
from itertools import combinations
from scipy import stats
import pandas as pd

def pairwise_anova(df, grouping_var, target_var):
    groups = df.groupby(grouping_var)
    pairs = list(combinations(groups.groups.keys(), 2))
    results = []
    
    for pair in pairs:
        group1 = groups.get_group(pair[0])[target_var]
        group2 = groups.get_group(pair[1])[target_var]
        f_stat, p_value = stats.f_oneway(group1, group2)
        results.append({
            'Comparison': f"{pair[0]} vs {pair[1]}",
            'F-statistic': f"{f_stat:.4f}",
            'p-value': f"{p_value:.4f}"
        })
    
    return pd.DataFrame(results)

# Pairwise ANOVA for sigma
sigma_results = pairwise_anova(df, 'true_sigma', 'estimated_sigma')
sigma_table = sigma_results.to_latex(index=False, caption="Pairwise ANOVA tests for sigma", label="tab:sigma_anova")

# Pairwise ANOVA for d
d_results = pairwise_anova(df, 'true_d', 'estimated_sigma')
d_table = d_results.to_latex(index=False, caption="Pairwise ANOVA tests for d", label="tab:d_anova")

print("LaTeX table for sigma ANOVA results:")
print(sigma_table)
print("\nLaTeX table for d ANOVA results:")
print(d_table)

Pairwise ANOVA tests for sigma:
Pair (-6, -4): F-statistic = 82.5694, p-value = 0.0000
Pair (-6, -2): F-statistic = 96.9190, p-value = 0.0000
Pair (-4, -2): F-statistic = 61.3124, p-value = 0.0000


Pairwise ANOVA tests for d:
Pair (0, 1): F-statistic = 0.7547, p-value = 0.3886
Pair (0, 2): F-statistic = 0.0501, p-value = 0.8236
Pair (1, 2): F-statistic = 0.4292, p-value = 0.5150
