Skip to content

Commit

Permalink
Add: NetworkGroup.get_consensus_network method
Browse files Browse the repository at this point in the history
1. This method returns the consensus network of the `NetworkGroup`. It
supports the `simple_voting` and `scaled_sum` methods
2. Fix the `NetworkGroup.get_adjacency_vectors` method. It now returns a
pd.DataFrame instead of a list of pd.Series
  • Loading branch information
dileep-kishore committed Oct 5, 2021
1 parent 939dafa commit 56b6b65
Showing 1 changed file with 116 additions and 33 deletions.
149 changes: 116 additions & 33 deletions micone/main/network_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
Module that defines the `NetworkGroup` object and methods to read, write and manipulate it
"""

from collections import defaultdict
from collections.abc import Collection
from itertools import product
from typing import Any, Dict, Iterator, List, Union
from itertools import groupby, product
from typing import Any, Dict, Iterator, List, Tuple, Union

import networkx as nx
import numpy as np
Expand Down Expand Up @@ -41,8 +42,11 @@ class NetworkGroup(Collection):
"""

def __init__(self, networks: List[Network]) -> None:
# dict(cid => dict(id_old => id_new))
self.nodeid_map: Dict[int, Dict[str, str]] = dict()
self._networks = networks
# dict(s_new-t_new => List[Tuple[cid, s_old-t_old], ...])
self.linkid_revmap: Dict[str, List[Tuple[int, str]]] = defaultdict(list)
self._networks = tuple(networks)
if not networks or [n for n in networks if not isinstance(n, Network)]:
raise ValueError(
"The networks parameter must be a list of one or more networks"
Expand Down Expand Up @@ -74,42 +78,52 @@ def __add__(self, other: "NetworkGroup") -> "NetworkGroup":
return NetworkGroup(networks)

def _combine_nodes(self, all_nodes: Dict[int, DType]) -> DType:
""" Combine nodes of individual networks into a single list """
"""Combine nodes of individual networks into a single list"""
nodes: DType = []
node_hash: Dict[int, int] = dict() # taxid => nodes.index
if len(all_nodes) == 1:
return all_nodes[0]
for cid, network_nodes in all_nodes.items():
self.nodeid_map[cid] = dict()
for node in network_nodes:
if node["taxid"] not in node_hash:
for network_node in network_nodes:
if network_node["taxid"] not in node_hash:
id_ = len(nodes)
id_old = node["id"]
id_old = network_node["id"]
id_new = f"id{id_}"
nodes.append(
{**node, **{"id": id_new, "children": [], "abundance": None}}
{
**network_node,
**{"id": id_new, "children": [], "abundance": None},
}
)
node_hash[node["taxid"]] = id_
node_hash[network_node["taxid"]] = id_
self.nodeid_map[cid][id_old] = id_new
else:
id_old = node["id"]
ind = node_hash[node["taxid"]]
id_old = network_node["id"]
ind = node_hash[network_node["taxid"]]
id_new = nodes[ind]["id"]
self.nodeid_map[cid][id_old] = id_new
return nodes

def _combine_links(self, all_links: Dict[int, DType]) -> DType:
""" Combine links of individual networks into a single list """
"""Combine links of individual networks into a single list"""
links = []
if len(all_links) == 1:
for link in all_links[0]:
source, target = link["source"], link["target"]
self.linkid_revmap[f"{source}-{target}"].append(
(0, f"{source}-{target}")
)
links.append({**link, "context_index": 0})
return links
for cid, network_links in all_links.items():
for link in network_links:
source, target = link["source"], link["target"]
new_source = self.nodeid_map[cid][source]
new_target = self.nodeid_map[cid][target]
self.linkid_revmap[f"{new_source}-{new_target}"].append(
(cid, f"{source}-{target}")
)
links.append(
{
**link,
Expand Down Expand Up @@ -159,22 +173,22 @@ def _combine_networks(

@property
def nodes(self) -> DType:
""" The list of nodes in the `NetworkGroup` and their corresponding properties """
"""The list of nodes in the `NetworkGroup` and their corresponding properties"""
return [data for _, data in self.graph.nodes(data=True)]

@property
def links(self) -> DType:
""" The list of links in the `NetworkGroup` and their corresponding properties """
"""The list of links in the `NetworkGroup` and their corresponding properties"""
return [data for _, _, data in self.graph.edges(data=True)]

@property
def contexts(self) -> DType:
""" The contexts for the group of networks """
"""The contexts for the group of networks"""
return self.graph.graph["contexts"]

def get_adjacency_vectors(self, key: str) -> List[pd.Series]:
def get_adjacency_vectors(self, key: str) -> pd.DataFrame:
"""
Returns the adjacency matrix for each context as a `pd.Series`
Returns the adjacency matrix for each context as a `pd.DataFrame`
Parameters
----------
Expand All @@ -183,22 +197,28 @@ def get_adjacency_vectors(self, key: str) -> List[pd.Series]:
Returns
-------
List[pd.Series]:
The list of adjacency vectors
pd.DataFrame:
The DataFrame containing adjacency vectors as columns
"""
ids = list(self.nodes)
size = len(ids) * len(ids)
# NOTE: This will consider id1-id2 and id2-id1 as different (even for undirected)
index = [f"{id1}-{id2}" for id1, id2 in product(ids, repeat=2)]
adj_vector_list: List[pd.Series] = [
pd.Series(np.zeros((size), dtype=float), index=index)
]
adj_vector_df: pd.DataFrame = pd.concat(
[pd.Series(np.zeros((size), dtype=float), index=index)],
join="outer",
axis=1,
)
adj_vector_df.fillna(0.0, inplace=True)
graph = self.graph
for source, target, data in graph.edges(data=True):
# NOTE: networkx automatically handles directionality (source -> target) here
for source, target, data in graph.edges(data=True, keys=False):
cid = data["cid"]
id_ = f"{source}-{target}"
adj_vector_list[cid][id_] = data[key]
return adj_vector_list
adj_vector_df.loc[id_, cid] = data[key]
return adj_vector_df

# FIXME: Doesn't affect the NetworkGroup object
def update_thresholds(
self, interaction_threshold: float = 0.3, pvalue_threshold: float = 0.05
) -> None:
Expand All @@ -218,6 +238,7 @@ def update_thresholds(
network.interaction_threshold = interaction_threshold
network.pvalue_threshold = pvalue_threshold

# FIXME: Doesn't affect the NetworkGroup object
def filter_links(self, pvalue_filter: bool, interaction_filter: bool) -> DType:
"""
The links of the networks after applying filtering
Expand Down Expand Up @@ -341,6 +362,74 @@ def load_json(cls, fpath: str) -> "NetworkGroup":
networks.append(Network.load_json(raw_data=network_raw_data))
return cls(networks)

def get_consensus_network(
self, cids: List[str], method: str = "simple_voting", parameter: float = 0.0
) -> "NetworkGroup":
"""
Get consensus network for the network defined by the `cids`
Parameters:
-----------
cids : List[str]
The list of context ids that are to be used in the merger
method : str, {"simple_voting", "scaled_sum"}
Default value is simple_voting
parameter : float
Default value is 0.0 (which is the union of all the links)
Returns
-------
consensus_network
The `NetworkGroup` that represents the consensus network
"""

# Method 1: Simple voting method
def simple_voting(weights: pd.DataFrame, parameter: float) -> List[str]:
"""Perform a simple voting consensus"""
size = weights.shape[1] # no. of networks
num_req_edges = np.floor(parameter * size)
num_actual_edges = weights.astype(bool).sum(axis=1)
indices_removal = weights.index[not (num_actual_edges >= num_req_edges)]
return list(indices_removal)

# Method 2: Scaled sum method
def scaled_sum(weights: pd.DataFrame, parameter: float) -> List[str]:
"""Peform a scaled sum consensus"""
size = weights.shape[1] # no. of networks
weights_scaled = weights.apply(lambda x: x / (np.abs(x).max()))
parameter_scaled = (size - 1) * parameter
indices_removal = weights.index[
not (weights_scaled.sum(axis=1) > parameter_scaled)
]
return list(indices_removal)

# Step1: Filter by "cids" and make copies of graphs
graphs = []
for cid, network in enumerate(self._networks):
if cid in cids:
graphs.append(network.graph.copy())
weights: pd.DataFrame = self.get_adjacency_vectors("weight")[cids]

# Step 2: Apply voting method to each multiedge
# indices_removal has {new_id_source}-{new_id_target}
if method == "simple_voting":
indices_removal = simple_voting(weights, parameter)
elif method == "scaled_sum":
indices_removal = scaled_sum(weights, parameter)
else:
raise ValueError("Only methods supported are simple_voting and scaled_sum")

# Step 3: Use indices_removal on the networks
graph_dict = dict(enumerate(graphs))
for ind in indices_removal:
for cid, ind_old in self.linkid_revmap[ind]:
source_old, target_old = ind_old.split("-")
graph_dict[cid].remove_edge(source_old, target_old)
new_networks = [Network.load_graph(graph) for graph in graphs]

# Step 4: Return NetworkGroup object
return NetworkGroup(new_networks)

def combine_pvalues(self, cids: List[str]) -> pd.Series:
"""
Combine pvalues of links in the `cids` using Brown's p-value merging method
Expand All @@ -355,14 +444,8 @@ def combine_pvalues(self, cids: List[str]) -> pd.Series:
pvalues_combined
The `pd.Series` containing the combined pvalues
"""
pvalue_vectors = self.get_adjacency_vectors("pvalue")
weight_vectors = self.get_adjacency_vectors("weight")
pvalue_df: pd.DataFrame = pd.concat(
[pvalue_vectors[i] for i in cids], join="outer"
)
weight_df: pd.DataFrame = pd.concat(
[weight_vectors[i] for i in cids], join="outer"
)
pvalue_df = self.get_adjacency_vectors("pvalue")[cids]
weight_df = self.get_adjacency_vectors("weight")[cids]
# E[psi] = 2 * k
k = pvalue_df.shape[1]
expected_value = 2 * k
Expand Down

0 comments on commit 56b6b65

Please sign in to comment.