In [1]:
import numpy as np

In [16]:
diff_net = np.array([[0, 1, -2, 2, -1, 3, 2, 1],
                      [0, 0, 2, 2, -1, 3, -2, 1],
                      [0, 0, 0, 2, -1, 0, -2, 1],
                      [0, 0, 0, 0, -1, 3, -2, 1],
                      [0, 0, 0, 0, 0, 3, -2, 1],
                      [0, 0, 0, 0, 0, 0, -2, 1],
                      [0, 0, 0, 0, 0, 0, 0, 1],
                      [0, 0, 0, 0, 0, 0, 0, 0]])

num_edges = 12

In [31]:
diff_net[np.triu_indices_from(diff_net, k=1)]

array([ 1, -2,  2, -1,  3,  2,  1,  2,  2, -1,  3, -2,  1,  2, -1,  0, -2,
        1, -1,  3, -2,  1,  3, -2,  1, -2,  1,  1])

In [17]:
# Find the num_edges most positive and most negative edge diffs
partitions = np.argpartition(diff_net, (num_edges, -num_edges), axis=None)
top_n = np.unravel_index(partitions[-num_edges:], diff_net.shape)
bottom_n = np.unravel_index(partitions[:num_edges], diff_net.shape)

In [18]:
partitions, top_n, bottom_n

(array([12, 20,  2, 38,  4, 30, 46, 28, 22, 14, 45, 48, 40, 42, 44, 51, 43,
        41, 35, 49, 50, 63, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 36,  0,
        17, 34, 32,  7,  8,  9, 55, 27, 26, 25, 24, 15, 21, 16, 33, 18, 31,
        39, 23,  1, 47, 29, 10, 11, 13, 37,  6,  5,  3, 19]),
 (array([2, 0, 5, 3, 1, 1, 1, 4, 0, 0, 0, 2]),
  array([7, 1, 7, 5, 2, 3, 5, 5, 6, 5, 3, 3])),
 (array([1, 2, 0, 4, 0, 3, 5, 3, 2, 1, 5, 6]),
  array([4, 4, 2, 6, 4, 6, 6, 4, 6, 6, 5, 0])))

In [19]:
diff_net[top_n], diff_net[bottom_n]

(array([1, 1, 1, 3, 2, 2, 3, 3, 2, 3, 2, 2]),
 array([-1, -1, -2, -2, -1, -2, -2, -1, -2, -2,  0,  0]))

In [25]:

bottom_edges = diff_net[bottom_n]
negative = bottom_edges < 0
negative_indices = (bottom_n[0][negative], bottom_n[1][negative])
important_b_edges = diff_net[negative_indices]

top_edges = diff_net[top_n]
positive = top_edges > 0
positive_indices = (top_n[0][positive], top_n[1][positive])
important_a_edges = diff_net[positive_indices]

important_b_edges,important_a_edges, negative_indices, positive_indices

(array([-1, -1, -2, -2, -1, -2, -2, -1, -2, -2]),
 array([1, 1, 1, 3, 2, 2, 3, 3, 2, 3, 2, 2]),
 (array([1, 2, 0, 4, 0, 3, 5, 3, 2, 1]),
  array([4, 4, 2, 6, 4, 6, 6, 4, 6, 6])),
 (array([2, 0, 5, 3, 1, 1, 1, 4, 0, 0, 0, 2]),
  array([7, 1, 7, 5, 2, 3, 5, 5, 6, 5, 3, 3])))

In [26]:
np.sum(important_a_edges), np.sum(important_b_edges)

(25, -16)

In [28]:
test = np.array([1,1,1,1,1,1,1,1,0,0])

np.dot(test, important_b_edges)/-16

0.75

In [29]:
np.sum(np.abs(diff_net))

46

## Tabular embedding with SVM

In [34]:
from classification import classify
import utils

In [37]:
def graphs_to_tabular(train_graphs, train_labels, test_graphs, a_label, b_label):

    def graph_to_point(graph):
        return graph[np.triu_indices_from(graph, k=1)]

    axes_labels = []
    train_points = np.array(list(map(graph_to_point, train_graphs)))
    test_points = np.array(list(map(graph_to_point, test_graphs)))

    return train_points, test_points, axes_labels

In [73]:
# Read brain graph files into numpy arrays
graphs_A = utils.get_graphs_from_files("./datasets/children/asd/")
graphs_B = utils.get_graphs_from_files("./datasets/children/td/")

graphs, labels = utils.label_and_concatenate_graphs(graphs_A, graphs_B, a_label="ASD", b_label="TD")

classify(graphs, labels, graphs_to_tabular, a_label="ASD", b_label="TD", random_state=None)


Performing 5-fold cross validation...

Metrics using cumulative confusion matrix:
[[36. 13.]
 [27. 25.]]
Accuracy: 0.6039603960396039
Precision: 0.7346938775510204
Recall: 0.5714285714285714
F1: 0.6428571428571428


0.6039603960396039

## Lanciano CSs for classification (derived from all graphs)

In [30]:
from classification import classify
import utils
from cs_classification import cs_p1_graphs_to_points

In [32]:
def cs_graphs_to_points(train_graphs, train_labels, test_graphs, a_label, b_label, cs_a_b, cs_b_a):

    axes_labels = []
    train_points = cs_p1_graphs_to_points(train_graphs, cs_a_b, cs_b_a)
    test_points = cs_p1_graphs_to_points(test_graphs, cs_a_b, cs_b_a)

    return train_points, test_points, axes_labels

In [40]:
# Read brain graph files into numpy arrays
graphs_A = utils.get_graphs_from_files("./datasets/children/asd/")
graphs_B = utils.get_graphs_from_files("./datasets/children/td/")

# Retrieved with Lanciano's code using optimally reported children alpha values for problem 1
cs_t_a = [4, 5, 6, 7, 8, 9, 13, 15, 35, 57, 58, 60, 61, 71, 88, 89, 90, 92, 93, 99, 100, 101, 102]
cs_a_t = [10, 17, 20, 30, 36, 37, 38, 39, 40, 41, 54, 55, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 81, 85, 95]

graphs, labels = utils.label_and_concatenate_graphs(graphs_A, graphs_B, a_label="ASD", b_label="TD")

classify(graphs, labels, cs_graphs_to_points, a_label="ASD", b_label="TD", random_state=None, cs_a_b=cs_a_t, cs_b_a=cs_t_a)


Performing 5-fold cross validation...

Metrics using cumulative confusion matrix:
[[35. 14.]
 [14. 38.]]
Accuracy: 0.7227722772277227
Precision: 0.7142857142857143
Recall: 0.7142857142857143
F1: 0.7142857142857143


0.7227722772277227

In [None]:
from sklearn.model_selection import GridSearchCV

# Find Percentile Alphas

In [3]:
import utils
import numpy as np

In [6]:
# Read brain graph files into numpy arrays
children_asd = utils.get_graphs_from_files("./datasets/children/asd/")
children_td = utils.get_graphs_from_files("./datasets/children/td/")
children_asd_td_diff = utils.summary_graph(children_asd) - utils.summary_graph(children_td)

In [None]:
male_asd = utils.get_graphs_from_files("./datasets/male/asd/")
male_td = utils.get_graphs_from_files("./datasets/male/td/")
male_asd_td_diff = utils.summary_graph(male_asd) - utils.summary_graph(male_td)

In [None]:
eyesclosed_asd = utils.get_graphs_from_files("./datasets/eyesclosed/asd/")
eyesclosed_td = utils.get_graphs_from_files("./datasets/eyesclosed/td/")
eyesclosed_asd_td_diff = utils.summary_graph(eyesclosed_asd) - utils.summary_graph(eyesclosed_td)

In [None]:
adolescents_asd = utils.get_graphs_from_files("./datasets/adolescents/asd/")
adolescents_td = utils.get_graphs_from_files("./datasets/adolescents/td/")
adolescents_asd_td_diff = utils.summary_graph(adolescents_asd) - utils.summary_graph(adolescents_td)

In [22]:
flat = children_asd_td_diff[np.triu_indices_from(children_asd_td_diff, k=1)]
children_alpha_asd_td = np.percentile(flat, 80)
children_alpha_td_asd = np.percentile(-flat, 70)
children_alpha_p2 = np.percentile(abs(flat), 70)
children_alpha_asd_td, children_alpha_td_asd, children_alpha_p2

(0.050235478806907374, 0.03375196232339091, 0.06828885400313972)

In [16]:
flat = male_asd_td_diff[np.triu_indices_from(male_asd_td_diff, k=1)]
male_alpha_asd_td = np.percentile(flat, 70)
male_alpha_td_asd = np.percentile(-flat, 75)
male_alpha_p2 = np.percentile(abs(flat), 70)
male_alpha_asd_td, male_alpha_td_asd, male_alpha_p2

(0.01411483253588517, 0.016994190020505763, 0.0283697881066302)

In [17]:
flat = eyesclosed_asd_td_diff[np.triu_indices_from(eyesclosed_asd_td_diff, k=1)]
eyesclosed_alpha_asd_td = np.percentile(flat, 75)
eyesclosed_alpha_td_asd = np.percentile(-flat, 75)
eyesclosed_alpha_p2 = np.percentile(abs(flat), 70)
eyesclosed_alpha_asd_td, eyesclosed_alpha_td_asd, eyesclosed_alpha_p2

(0.026805658972449742, 0.024664929262844387, 0.042377140729709536)

In [18]:
flat = adolescents_asd_td_diff[np.triu_indices_from(adolescents_asd_td_diff, k=1)]
adolescents_alpha_asd_td = np.percentile(flat, 70)
adolescents_alpha_td_asd = np.percentile(-flat, 95)
adolescents_alpha_p2 = np.percentile(abs(flat), 75)
adolescents_alpha_asd_td, adolescents_alpha_td_asd, adolescents_alpha_p2

(0.020447420917640347, 0.0847249928754629, 0.051937874038187545)

### Compare the Contrast Subgraphs found by replication

In [1]:
from dense_subgraph import qp, sdp

In [7]:
qp(children_asd_td_diff, 0.0502)
sdp(children_asd_td_diff, 0.0502)

Time for QP: 0:00:00.051924
CS before local search: []
CS after local search: []
Time for local search: 0:00:00.006762
Time to find CS: 0:00:00.058686
None
[[ 1.         -0.84723424 -0.97470852 ... -0.99365659 -0.85379128
  -0.82916163]
 [-0.84723424  1.          0.70709968 ...  0.78213333  0.99992185
   0.99945176]
 [-0.97470852  0.70709968  1.         ...  0.99365545  0.71586063
   0.68327337]
 ...
 [-0.99365659  0.78213333  0.99365545 ...  1.          0.78984116
   0.76105036]
 [-0.85379128  0.99992185  0.71586063 ...  0.78984116  1.
   0.99896082]
 [-0.82916163  0.99945176  0.68327337 ...  0.76105036  0.99896082
   1.        ]]
Time for SDP: 0:00:02.417420
CS before local search: [2, 7, 11, 16, 18, 21, 23, 24, 25, 33, 37, 38, 41, 45, 52, 54, 57, 59, 63, 66, 69, 71, 75, 76, 77, 78, 79, 80, 84, 90, 92, 93, 96, 103, 108, 113]
CS after local search: [10 17 20 30 36 37 38 39 40 41 54 55 70 71 72 73 74 75 76 77 79 80 81 85
 95]
Time for local search: 0:00:00.302759
Time to find CS: 0:00:

array([10, 17, 20, 30, 36, 37, 38, 39, 40, 41, 54, 55, 70, 71, 72, 73, 74,
       75, 76, 77, 79, 80, 81, 85, 95])

In [12]:
X_from_original = np.load("test.npy")
X_new_sdp = np.load("test2.npy")
X_diff = X_from_original - X_new_sdp
abs(X_diff).sum()
X_from_original[0], X_new_sdp[0]

0.8619145023489602