In [None]:
from dense_subgraph import sdp, qp
import numpy as np
import json
import utils
import classification
from pipeline import Pipeline
from de_transformer import DiscriminativeEdgesTransformer
from cs_transformer import ContrastSubgraphTransformer
from iidaka_transformer import IidakaTransformer
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler

In [None]:
# For reproducible results, set random_state to a number, otherwise set it to None
random_state = 42

## SELECT CATEGORY ##

# DATASET_NAME = "children"
# DATASET_NAME = "adolescents"
# DATASET_NAME = "eyesclosed"
# DATASET_NAME = "male"
# DATASET_NAME = "other"
DATASET_NAME = "all"


## SELECT DATASET PATH ##

GRAPH_DIR_PREFIX = "./data/lanciano_datasets_corr_thresh_80/"
DATA_DESCRIPTOR = "Lanciano-Processed"

# GRAPH_DIR_PREFIX = "./data/generated_filt_global/pearson_corr_raw/"
# DATA_DESCRIPTOR = "Raw-Correlation"


weighted = False
if DATA_DESCRIPTOR == "Raw-Correlation":
    weighted = True

In [None]:
A_GRAPH_DIR = f"{GRAPH_DIR_PREFIX}{DATASET_NAME}/asd/"
B_GRAPH_DIR = f"{GRAPH_DIR_PREFIX}{DATASET_NAME}/td/"
a_label="ASD"
b_label="TD"

In [None]:
# Read brain graph files into numpy arrays

if DATASET_NAME == "all":
    with open(GRAPH_DIR_PREFIX + "unique.json", "r") as fp:
        file_lists = json.load(fp)

    graphs_A = np.array([np.loadtxt(filename) for filename in file_lists[a_label]])
    graphs_B = np.array([np.loadtxt(filename) for filename in file_lists[b_label]])

else:
    graphs_A = utils.get_graphs_from_files(A_GRAPH_DIR)
    graphs_B = utils.get_graphs_from_files(B_GRAPH_DIR)

graphs, labels = utils.label_and_concatenate_graphs(graphs_A=graphs_A, graphs_B=graphs_B, a_label=a_label, b_label=b_label)

In [None]:
asd_count = len(graphs_A) if a_label == "ASD" else len(graphs_B)
td_count = len(graphs_B) if b_label == "TD" else len(graphs_A)

In [None]:
asd_count, td_count

## Discriminative Edges

In [None]:
# Set up possible values of parameters to optimize over
p_grid = {"SVC": {"C": [0.1, 1, 100], "gamma": [0.0001, 0.001, 0.01, 0.1]},
          "DiscriminativeEdgesTransformer": {
            "a_label": [a_label],
            "b_label": [b_label],
            "num_edges": [2, 3, 4, 5, 6],
            "weighted": [weighted]
            }
          }

pipe = [DiscriminativeEdgesTransformer, StandardScaler, SVC]


cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)

results, _ = classification.grid_search_cv(X=graphs, y=labels, pipeline_steps=pipe, step_param_grids=p_grid, cv=cv, random_state=random_state)


classification.write_results_to_file(filename=f'./outputs/{DATA_DESCRIPTOR}-GridSearchCV-DE-{DATASET_NAME}.txt',
                            summary=results["summary"], results=results["best_results"], parameter_grid=p_grid, asd_count=asd_count, td_count=td_count)

## Iidaka Feature Selection

In [None]:
# Set up possible values of parameters to optimize over
p_grid = {"SVC": {'C': [0.1, 1, 100], 'gamma': [1e-06, 1e-05, 0.0001]},
          "IidakaTransformer": {
            "a_label": [a_label],
            "b_label": [b_label],
            "effect_size_threshold": [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35]
            }
          }

pipe = [IidakaTransformer, StandardScaler, SVC]


cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)

results, _ = classification.grid_search_cv(X=graphs, y=labels, pipeline_steps=pipe, step_param_grids=p_grid, cv=cv, random_state=random_state)


classification.write_results_to_file(filename=f'./outputs/{DATA_DESCRIPTOR}-GridSearchCV-Iidaka-{DATASET_NAME}.txt',
                            summary=results["summary"], results=results["best_results"], parameter_grid=p_grid, asd_count=asd_count, td_count=td_count)

# Contrast Subgraph Methods

In [None]:
pipe = [ContrastSubgraphTransformer, StandardScaler, SVC]

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)

## CSP1 QP N3

In [None]:
# Set up possible values of parameters to optimize over
p_grid = {"SVC": {"C": [100], "gamma": [0.1]},
          "ContrastSubgraphTransformer": {
            "a_label": [a_label],
            "b_label": [b_label],
            "alpha": [None],
            "alpha2": [None],

            # ASD - TD
            "percentile": [65, 70, 75, 80],
            # TD - ASD
            "percentile2": [65, 70, 75, 95],

            "problem": [1],
            "solver": [qp],
            "num_cs": [3],
            }
          }

results, _ = classification.grid_search_cv(X=graphs, y=labels, pipeline_steps=pipe, step_param_grids=p_grid, cv=cv, random_state=random_state)

classification.write_results_to_file(filename=f'./outputs/{DATA_DESCRIPTOR}-GridSearchCV-CSP1-QP-N3-{DATASET_NAME}.txt',
                            summary=results["summary"], results=results["best_results"], parameter_grid=p_grid, asd_count=asd_count, td_count=td_count)

## CSP2 QP N3

In [None]:
# Set up possible values of parameters to optimize over
p_grid = {"SVC": {"C": [0.1, 1, 100], "gamma": [0.0001, 0.001, 0.1, 1]},
          "ContrastSubgraphTransformer": {
            "a_label": [a_label],
            "b_label": [b_label],
            "alpha": [None],
            "alpha2": [None],

            "percentile": [65, 70, 75, 80, 85, 90, 95],
            # TD - ASD
            "percentile2": [None],

            "problem": [2],
            "solver": [qp],
            "num_cs": [3],
            }
          }

results, _ = classification.grid_search_cv(X=graphs, y=labels, pipeline_steps=pipe, step_param_grids=p_grid, cv=cv, random_state=random_state)

classification.write_results_to_file(filename=f'./outputs/{DATA_DESCRIPTOR}-GridSearchCV-CSP2-QP-N3-{DATASET_NAME}.txt',
                            summary=results["summary"], results=results["best_results"], parameter_grid=p_grid, asd_count=asd_count, td_count=td_count)

## CSP1 SDP N1

In [None]:
# Set up possible values of parameters to optimize over
p_grid = {"SVC": {"C": [100], "gamma": [0.1]},
          "ContrastSubgraphTransformer": {
            "a_label": [a_label],
            "b_label": [b_label],
            "alpha": [None],
            "alpha2": [None],

            # ASD - TD
            "percentile": [65, 70, 75, 80],
            # TD - ASD
            "percentile2": [65, 70, 75, 95],

            "problem": [1],
            "solver": [sdp],
            "num_cs": [1],
            }
          }

results, _ = classification.grid_search_cv(X=graphs, y=labels, pipeline_steps=pipe, step_param_grids=p_grid, cv=cv, random_state=random_state)

classification.write_results_to_file(filename=f'./outputs/{DATA_DESCRIPTOR}-GridSearchCV-CSP1-SDP-N1-{DATASET_NAME}.txt',
                            summary=results["summary"], results=results["best_results"], parameter_grid=p_grid, asd_count=asd_count, td_count=td_count)

## CSP2 SDP N1

In [None]:
# Set up possible values of parameters to optimize over
p_grid = {"SVC": {"C": [1, 100], "gamma": [0.001, 0.1]},
          "ContrastSubgraphTransformer": {
            "a_label": [a_label],
            "b_label": [b_label],
            "alpha": [None],
            "alpha2": [None],

            "percentile": [65, 70, 75, 80, 85, 90, 95],
            "percentile2": [None],

            "problem": [2],
            "solver": [sdp],
            "num_cs": [1],
            }
          }

results, _ = classification.grid_search_cv(X=graphs, y=labels, pipeline_steps=pipe, step_param_grids=p_grid, cv=cv, random_state=random_state)

classification.write_results_to_file(filename=f'./outputs/{DATA_DESCRIPTOR}-GridSearchCV-CSP2-SDP-N1-{DATASET_NAME}.txt',
                            summary=results["summary"], results=results["best_results"], parameter_grid=p_grid, asd_count=asd_count, td_count=td_count)