In [1]:
import pandas as pd
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt
import datetime, pickle, subprocess
from networks import bio_networks, utils
from create_datasets import create_nx_datasets
from create_datasets.create_table_datasets import adni_data


**1. Create graph datasets**

Create the correspoding graph-datasets for AD PPT-Ohmnet network without APOE (28 nodes, 46 edges, density: 0.1216, diameter: 6)

In [2]:
# Create graph datasets with snap_brain network without APOE gene
targets = ['PET', 'PETandDX']

for target in targets:

    outdir = f'data/graph_datasets/{target}'

    result_nodes = create_nx_datasets.main('data', 'ADNI', target, 'AD', 'snap_brain_noAPOE', 'missense', None)
    outfile = f'{outdir}/AD_PPI_snap_brain_noAPOE_missense.pkl'
    print('Resulting dataset saved at:', outfile)
    print()

    with open(outfile, 'wb') as f:
        pickle.dump(result_nodes, f)

Network used: AD snap_brain_noAPOE
# nodes = 28
# edges = 46

Dataset used: ADNI
missense
(28, 808)
Creating samples graphs...
Class: PET. Found 410 positive subjects out of 726
Sample graph used: # nodes = 28 # edges = 46
Density = 0.12169312169312169 Diameter = 6
Resulting dataset saved at: data/graph_datasets/PET/AD_PPI_snap_brain_noAPOE_missense.pkl

Network used: AD snap_brain_noAPOE
# nodes = 28
# edges = 46

Dataset used: ADNI
missense
(28, 808)
Creating samples graphs...
Class: PETandDX. Found 182 positive subjects out of 317
Sample graph used: # nodes = 28 # edges = 46
Density = 0.12169312169312169 Diameter = 6
Resulting dataset saved at: data/graph_datasets/PETandDX/AD_PPI_snap_brain_noAPOE_missense.pkl



**2. Graph classification with GNNs**

We then trained and tested the best GNNs found for AD PPT-Ohmnet in the framework called [GraphGym](https://github.com/snap-stanford/GraphGym) (You *et al.*, 2020).

Configuration and grid files employed are in the subdirectory [graphgym_files](graphgym_files).

Summarized results obtained by GraphGym and other models are in **COMPLETE**

**3. Create table datasets**

Create table datasets (without APOE) for the targets and genes of interest to use as input for more-standard machine learning models.

In [3]:
# Create table datasets
data = pd.read_csv('data/table_datasets/AD_PPI_missense_ADNI_labeled.csv', index_col = 0)
data_wclass = adni_data('missense', 'data/other_networks/AD_SNAP_PPI_brain_noAPOE.edgelist', 'data/table_datasets/AD_PPI_missense_ADNI_labeled.csv', metadata_file)
data_wclass = data_wclass.loc[:, (data_wclass != 0).any(axis=0)]
data_wclass = data_wclass.drop(columns=['APOE'])
data_wclass

TypeError: adni_data() missing 2 required positional arguments: 'data_file' and 'metadata_file'

**2. Run no-GNN models**

Run more-standard machine learning models 3 times (to properly compare with the 3 runs for each GNN model).

In [1]:
# Classification with other non-GNN models