Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ install_requires =
scipy
pybel==0.13.2
pandas
openpyxl

# Random options
zip_safe = false
Expand Down
99 changes: 54 additions & 45 deletions src/diffupy/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

"""Command line interface for DiffuPy."""
"""Command line interface for diffuPy."""

import json
import logging
Expand All @@ -10,12 +10,13 @@
import time

import click
from diffupy.process_network import get_kernel_from_network_path

from .constants import OUTPUT, METHODS, EMOJI
from .constants import OUTPUT, METHODS, EMOJI, RAW, CSV, JSON
from .diffuse import diffuse as run_diffusion
from .kernels import regularised_laplacian_kernel
from .process_input import process_input
from .utils import process_network_from_cli
from .process_input import process_map_and_format_input_data_for_diff
from .process_network import process_graph_from_file

logger = logging.getLogger(__name__)

Expand All @@ -42,9 +43,9 @@ def main():
)
@click.option('-l', '--log', is_flag=True, help='Activate debug mode')
def kernel(
network: str,
output: str = OUTPUT,
log: bool = None
graph: str,
output: str = OUTPUT,
log: bool = None
):
"""Generate a kernel for a given network."""
# Configure logging level
Expand All @@ -55,20 +56,20 @@ def kernel(
logging.basicConfig(level=logging.INFO)
logger.setLevel(logging.INFO)

click.secho(f'{EMOJI} Loading graph from {network} {EMOJI}')
click.secho(f'{EMOJI} Loading graph from {graph} {EMOJI}')

graph = process_network_from_cli(network)
graph = process_graph_from_file(graph)

click.secho(f'{EMOJI} Calculating regularized Laplacian kernel. This might take a while... {EMOJI}')
click.secho(f'{EMOJI} Generating regularized Laplacian kernel from graph. This might take a while... {EMOJI}')
exe_t_0 = time.time()
background_mat = regularised_laplacian_kernel(graph)
kernel = regularised_laplacian_kernel(graph)
exe_t_f = time.time()

output_file = os.path.join(output, f'{network.split("/")[-1]}.pickle')
output_file = os.path.join(output, f'{graph.split("/")[-1]}.pickle')

# Export numpy array
with open(output_file, 'wb') as file:
pickle.dump(background_mat, file, protocol=4)
pickle.dump(kernel, file, protocol=4)

running_time = exe_t_f - exe_t_0

Expand All @@ -77,14 +78,14 @@ def kernel(

@main.command()
@click.option(
'-n', '--network',
help='Path to the network graph or kernel',
'-i', '--input',
help='Input data',
required=True,
type=click.Path(exists=True, dir_okay=False)
)
@click.option(
'-i', '--data',
help='Input data',
'-n', '--network',
help='Path to the network graph or kernel',
required=True,
type=click.Path(exists=True, dir_okay=False)
)
Expand All @@ -98,7 +99,7 @@ def kernel(
'-m', '--method',
help='Diffusion method',
type=click.Choice(METHODS),
required=True,
default=RAW,
)
@click.option(
'-b', '--binarize',
Expand All @@ -112,6 +113,7 @@ def kernel(
@click.option(
'-t', '--threshold',
help='Codify node labels by applying a threshold to logFC in input.',
default=None,
type=float,
)
@click.option(
Expand All @@ -129,48 +131,55 @@ def kernel(
default=0.05,
show_default=True,
)
@click.option(
'-f', '--output_format',
help='Statistical significance (p-value).',
type=float,
default=CSV,
show_default=True,
)
def diffuse(
network: str,
data: str,
output: str,
method: str,
binarize: bool,
absolute_value: bool,
threshold: float,
p_value: float,
input: str,
network: str,
output: str = sys.stdout,
method: str = RAW,
binarize: bool = True,
threshold: float = None,
absolute_value: bool = True,
p_value: float = 0.05,
output_format: str = CSV
):
"""Run a diffusion method over a network or pre-generated kernel."""
click.secho(f'{EMOJI} Loading graph from {network} {EMOJI}')
graph = process_network_from_cli(network)

click.secho(
f'{EMOJI} Graph loaded with: \n'
f'{graph.number_of_nodes()} nodes\n'
f'{graph.number_of_edges()} edges\n'
f'{EMOJI}'
)
kernel = get_kernel_from_network_path(network)

click.secho(f'Codifying data from {data}.')
click.secho(f'Processing data input from {input}.')

label_dict = process_input(data, method, binarize, absolute_value, p_value, threshold)
input_scores_dict = process_map_and_format_input_data_for_diff(input,
kernel,
method,
binarize,
absolute_value,
p_value,
threshold,
)

click.secho(f'Running the diffusion algorithm.')
click.secho(f'Computing the diffusion algorithm.')

results = run_diffusion(
label_dict,
input_scores_dict,
method,
graph,
k=kernel
)

# results = run_diffusion(
# label_dict,
# method,
# graph,
# )
if output_format is CSV:
results.to_csv(output)

# json.dump(results, output, indent=2)
elif output_format is JSON:
json.dump(results, output, indent=2)

click.secho(f'Finished!')
click.secho(f'{EMOJI} Diffusion performed with success. Output located at {output} {EMOJI}')


if __name__ == '__main__':
Expand Down
48 changes: 39 additions & 9 deletions src/diffupy/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,30 +59,47 @@ def ensure_output_dirs():

#: csv
CSV = 'csv'
#: xml
XLS = 'xls'
#: xmls
XLSX = 'xlsx'
#: tsv
TSV = 'tsv'
#: graphML
GRAPHML = 'graphml'
#: bel
BEL = 'bel'
#: node link json
NODE_LINK_JSON = 'json'
JSON = 'json'
#: pickle
BEL_PICKLE = 'pickle'
PICKLE = 'pickle'
#: gml
GML = 'gml'
#: edge list
EDGE_LIST = '.lst'

#: DiffuPath available network formats
FORMATS = [
XLS_FORMATS = (
XLS,
XLSX
)

#: Available graph formats
GRAPH_FORMATS = (
CSV,
TSV,
GRAPHML,
BEL,
NODE_LINK_JSON,
BEL_PICKLE,
]
JSON,
PICKLE,
)

#: Available kernel formats
KERNEL_FORMATS = (
CSV,
TSV,
JSON,
PICKLE,
)

#: Separators
FORMAT_SEPARATOR_MAPPING = {
Expand All @@ -109,9 +126,22 @@ def ensure_output_dirs():

#: Node name
NODE = 'Node'
LABEL = 'Label'
ENTITY = 'Entity'
GENE = 'Gene'

NODE_LABELING = [
NODE,
LABEL,
ENTITY,
GENE
]

#: Node type
NODE_TYPE = 'NodeType'
#: Unspecified score type
SCORE = 'Score'
#: Log2 fold change (logFC)
LOG_FC = 'LogFC'
#: Statistical significance (p-value)
P_VALUE = 'p-value'
#: Label
LABEL = 'Label'
4 changes: 2 additions & 2 deletions src/diffupy/diffuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def diffuse(
) -> Matrix:
"""Run diffusion on a network given an input and a diffusion method.

:param input_scores: score collection, supplied as n-dimensional array. Could be 1-dimensional (List) or n-dimensional (Matrix).
:param method: Selected method ["raw", "ml", "gm", "ber_s", "ber_p", "mc", "z"]
:param input_scores: score collection, supplied as n-dimensional array. Could be 1-dimensional (Vector) or n-dimensional (Matrix).
:param method: Elected method ["raw", "ml", "gm", "ber_s", "ber_p", "mc", "z"]
:param graph: A network as a graph. It could be optional if a Kernel is provided
:param kwargs: Optional arguments:
- k: a kernel [matrix] stemming from a graph, thus sparing the graph transformation process
Expand Down
Loading