In [1]:
import csv
import pandas as pd
import numpy as np
from scipy.stats import norm
import networkx as nx
import sys

from src.netmix2 import *
from src.common import *

In [4]:
sys.path.insert(0,"locfdrpython")
from locfdrpython.locfdr import locfdr


This Jupyterlab provides a step-by-step guide for running NetMix2 using the propagation family

# Define inputs

In [5]:
edge_list = 'data/edge_list.tsv'
pvalues_file = 'data/gene_scores.tsv'

In [6]:
verbosity = 1

In [7]:
num_edges_in_Gdelta = 100000

In [8]:
density_threshold = 0.05

In [9]:
time_limit = 1

# Load network

In [10]:
node_list, A_network = load_network(edge_list, 1)

loading network
Number of nodes: 10725, Number of edges: 238793


# Load gene scores (P-values)

In [6]:
pvals_list = load_pvalues(pvalues_file, node_list, 1)

loading genescores


# Find the largest connected component in the network for genes with p-values

In [7]:
(pvals_list, node_list, A_network) = restrict_to_genes_in_network(pvals_list, node_list, A_network, 1)

number of nodes in G: 10422
number of edges in G: 228751


# Compute z-scores from the p-values

In [10]:
zscores = compute_zscores(pvals_list)
zscores = post_process_zscores(zscores)

-5.199337582290661


# Compute the altered subnetwork size using local FDR

In [11]:
nulltype_ind = 1
nulltype_name = "mlest"
r_locfdr=locfdr(zscores, nulltype=1, plot=0)


  wa.warn("f(z) misfit = " + str(round(D,1)) + ". Rerun with larger df.")


In [12]:
nonnull_count = correct_nans_from_locfdr(r_locfdr, zscores, nulltype_name)
if verbosity > 0:
    print(nonnull_count)

283


# Construct the similarity matrix and similarity threshold graph

In [13]:
PPR_sim_mat, PPR_mat_rowsums = compute_ppr_kernel(A_network)

In [15]:
###########################################################
# compute G_delta
# threshold similarity matrix
sim_mat = PPR_sim_mat

sim_mat_nodiag=sim_mat-np.diag(np.diag(sim_mat))
sim_mat_nodiag_sorted=np.sort(sim_mat_nodiag[np.triu_indices(sim_mat_nodiag.shape[0],1)])[::-1]

delta=sim_mat_nodiag_sorted[num_edges_in_Gdelta]


In [16]:
sim_mat_delta=1*(sim_mat_nodiag > delta)
degs_PPR_gwas = np.sum(sim_mat_delta,0)
num_edges_delta = int(np.sum(sim_mat_delta)/2)

print("delta: {}".format(delta))
print('number of edges in G: {}'.format(np.sum(A_network)/2))
print('number of edges in G_delta: {}'.format(np.sum(sim_mat_delta)/2))

delta: 0.0029081680733686606
number of edges in G: 228751.0
number of edges in G_delta: 100000.0


# Define the parameters for running NetMix2

In [19]:
###########################################################
# parameters for netmix
s=nonnull_count
alpha=s/len(node_list)
target_edge_density = density_threshold
rho=target_edge_density*(s-1)

print('size of altered subnetwork: {}'.format(s))
print('target edge density: {}'.format(target_edge_density))


size of altered subnetwork: 283
target edge density: 0.05


# Run NetMix2

In [24]:
if verbosity > 0:
    print('running netmix')
    print("time_limit", time_limit)
    output=True
elif verbosity > 1:
    output=True
est_subnetwork = netmix_edgedense(sim_mat_delta, rho, zscores, alpha, edge_dense_linear=True, output=output, time_limit=3600*time_limit)

running netmix
time_limit 1
n: 10422, clique_size: 283
Academic license - for non-commercial use only - expires 2022-08-05
Using license file /u/typark/gurobi.lic
Changed value of parameter TimeLimit to 3600.0
   Prev: inf  Min: 0.0  Max: inf  Default: inf
here1
here2
here3
here4
Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (linux64)
Thread count: 28 physical cores, 56 logical processors, using up to 28 threads
Optimize a model with 1 rows, 10422 columns and 10422 nonzeros
Model fingerprint: 0x81c14ed2
Model has 1 quadratic constraint
Variable types: 0 continuous, 10422 integer (10422 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  QMatrix range    [1e+00, 1e+00]
  QLMatrix range   [7e+00, 7e+00]
  Objective range  [7e-05, 8e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [3e+02, 3e+02]
Found heuristic solution: objective -0.0000000
Presolve time: 0.50s
Presolved: 200002 rows, 110422 columns, 520844 nonzeros
Variable types: 0 continuous, 110422 integer (

# Analyze the altered subnetwork

In [22]:
solution_size = len(est_subnetwork)
est_subnetwork_genes = node_list[est_subnetwork] if solution_size>0 else []

solution_network = sim_mat_delta[np.ix_(est_subnetwork, est_subnetwork)] if solution_size>0 else None
num_edges_in_solution = sum(sum(solution_network))/2 if solution_size>0 else 0
solution_network_density = num_edges_in_solution/(solution_size*(solution_size-1)/2) if solution_size>0 else 0

print("Number of vertices in altered subnetwork: {}".format(solution_size))
print("Density of altered subnetwork: {}".format(solution_network_density))


NameError: name 'est_subnetwork' is not defined

# Write the altered subnetwork to a file

In [27]:
###########################################################
# write solution
if output:
    write_list_to_file(os.path.join(output, 'netmix_subnetwork.tsv'), est_subnetwork_genes)
    write_list_to_file(os.path.join(output, 'node_list.tsv'), node_list)

NameError: name 'args' is not defined