diff --git a/.gitignore b/.gitignore index b33e2e9..68d97ce 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,6 @@ dmypy.json # behave pretty.output allure_report/ + +# cython temp files +grim/**/*.c diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..bc6d0e1 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,9 @@ +include requirements.txt +include README.md +include LICENSE +include *.txt +recursive-include grim *.py +recursive-include grim *.txt +recursive-include grim *.json +recursive-include grim *.pyx +recursive-include grim *.pyd diff --git a/grim/__init__.py b/grim/__init__.py index f45a470..6e79b1e 100644 --- a/grim/__init__.py +++ b/grim/__init__.py @@ -26,4 +26,4 @@ """Top-level package for py-grim.""" __organization__ = "NMDP/CIBMTR Bioinformatics" -__version__ = "0.0.6" +__version__ = "0.0.7" diff --git a/grim/conf/__init__.py b/grim/conf/__init__.py old mode 100644 new mode 100755 index cfe8889..d64977b --- a/grim/conf/__init__.py +++ b/grim/conf/__init__.py @@ -23,6 +23,6 @@ # -"""Top-level package for py-grim.""" - -__organization__ = "NMDP/CIBMTR Bioinformatics" +__author__ = """Martin Maiers""" +__email__ = "mmaiers@nmdp.org" +__version__ = "0.0.7" diff --git a/grim/imputation/__init__.py b/grim/imputation/__init__.py old mode 100644 new mode 100755 index cfe8889..2779c9f --- a/grim/imputation/__init__.py +++ b/grim/imputation/__init__.py @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- -# -# grim Graph Imputation -# Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved. # # This library is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published @@ -10,7 +7,7 @@ # your option) any later version. # # This library is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public # License for more details. # @@ -23,6 +20,6 @@ # -"""Top-level package for py-grim.""" - -__organization__ = "NMDP/CIBMTR Bioinformatics" +__author__ = """Martin Maiers""" +__email__ = "mmaiers@nmdp.org" +__version__ = "0.0.7" diff --git a/grim/imputation/graph_generation/__init__.py b/grim/imputation/graph_generation/__init__.py old mode 100644 new mode 100755 index cfe8889..2779c9f --- a/grim/imputation/graph_generation/__init__.py +++ b/grim/imputation/graph_generation/__init__.py @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- -# -# grim Graph Imputation -# Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved. # # This library is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published @@ -10,7 +7,7 @@ # your option) any later version. # # This library is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public # License for more details. # @@ -23,6 +20,6 @@ # -"""Top-level package for py-grim.""" - -__organization__ = "NMDP/CIBMTR Bioinformatics" +__author__ = """Martin Maiers""" +__email__ = "mmaiers@nmdp.org" +__version__ = "0.0.7" diff --git a/grim/imputation/imputegl/__init__.py b/grim/imputation/imputegl/__init__.py index 2d0c16e..bec990d 100755 --- a/grim/imputation/imputegl/__init__.py +++ b/grim/imputation/imputegl/__init__.py @@ -24,4 +24,4 @@ __author__ = """Martin Maiers""" __email__ = "mmaiers@nmdp.org" -__version__ = "0.0.4" +__version__ = "0.0.7" diff --git a/grim/imputation/imputegl/cutils.pyx b/grim/imputation/imputegl/cutils.pyx new file mode 100644 index 0000000..4edb9e5 --- /dev/null +++ b/grim/imputation/imputegl/cutils.pyx @@ -0,0 +1,65 @@ +import cython + + +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef open_ambiguities(list hap, unsigned char loc, tuple split_loc): + cdef unsigned int k, i, p, j #hap_len, haps_len, splits_len + cdef Py_ssize_t hap_len, haps_len, splits_len + cdef list hap_new, hap1 + # cdef np.ndarray[STR, ndim=1] hap_new, hap1 + p = 0 + if len(split_loc) > 1: + # This opens all allele ambiguities + hap_len = len(hap[0]) + haps_len = len(hap) + splits_len = len(split_loc) + hap_new = [None] * (haps_len * splits_len) + # hap_new = np.empty(haps_len * splits_len, dtype=np.object) # produces an empty list of haplotypes + hap1 = [None] * hap_len + # hap1 = np.empty(haps_len, dtype=np.object) + for k in range(haps_len): # split a given locus in all haps. + + for j in range(hap_len): + hap1[j] = hap[k][j] + + for i in range(splits_len): + hap1[loc] = split_loc[i] + hap_new[p] = hap1[:] + p += 1 + return hap_new + return hap + +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef create_hap_list(list all_haps, dict optionDict, unsigned int N_Loc): + cdef unsigned int i, j, count + cdef list hap_list = [] + cdef list all_hap_split + + for i in range(len(all_haps)): + all_hap_split = all_haps[i].split('~') + count = 0 + for j in range(len(all_hap_split)): + if all_hap_split[j] not in optionDict: + break + else: + count += 1 + + if count == N_Loc: + hap_list.append(all_hap_split) + return hap_list + +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef deepcopy_list(list l): + cdef list copy_l + cdef unsigned int i, length + length = len(l) + copy_l = [None] * length + for i in range(length): + if isinstance(l[i], list): + copy_l[i] = deepcopy_list(l[i]) + else: + copy_l[i] = l[i] + return copy_l diff --git a/grim/imputation/imputegl/impute.py b/grim/imputation/imputegl/impute.py index 4501917..1cdc63a 100644 --- a/grim/imputation/imputegl/impute.py +++ b/grim/imputation/imputegl/impute.py @@ -7,8 +7,9 @@ import os.path import json -import numpy as np +import numpy as np +from .cutils import open_ambiguities, create_hap_list, deepcopy_list from .cypher_plan_b import CypherQueryPlanB from .cypher_query import CypherQuery @@ -118,6 +119,31 @@ def clean_up_gl(gl): class Imputation(object): + __slots__ = ( + "logger", + "verbose", + "populations", + "netGraph", + "priorMatrix", + "full_hapl", + "index_dict", + "full_loci", + "factor", + "_factor_missing_data", + "cypher", + "cypher_plan_b", + "matrix_planb", + "count_by_prob", + "number_of_options_threshold", + "plan", + "option_1", + "option_2", + "haplotypes_number_in_phase", + "save_space_mode", + "nodes_for_plan_A", + "unk_priors", + ) + def __init__(self, net=None, config=None, count_by_prob=None, verbose=False): """Constructor Intialize an instance of `Imputation` with a py2neo graph @@ -914,8 +940,8 @@ def open_phases(self, haps, N_Loc, gl_string): fq = [] for k in range(2): - hap_list = [] - hap_list.append(haps[j][k]) + hap_list = [haps[j][k]] + hap_list_splits = [tuple(allele.split("/")) for allele in hap_list[0]] # compute the number of options: options = 1 @@ -1590,12 +1616,12 @@ def comp_cand( # probabilties and accumulate cartesian productEpsilon=0.0001 chr = self.gl2haps(gl_string) if chr == []: - return + return None, None # if we in 9-loci, check if the type input in valid format if self.nodes_for_plan_A: geno_type = self.input_type(chr["Genotype"][0]) if not geno_type in self.nodes_for_plan_A: - return + return None, None n_loci = chr["N_Loc"] @@ -1604,7 +1630,7 @@ def comp_cand( # return if the result is empty (why would that be?) if pmags == []: - return + return None, None # res_muugs = {'Haps': 'NaN', 'Probs': 0} res_muugs = {"MaxProb": 0, "Haps": {}, "Pops": {}} @@ -1713,7 +1739,7 @@ def call_comp_phase_prob(self, epsilon, n, phases, chr, MUUG_output, planb): epsilon /= 10 if epsilon < min_epsilon: epsilon = 0.0 - phases_planb = copy.deepcopy(phases) + phases_planb = deepcopy_list(phases) # Find the option according to plan b if MUUG_output: res = self.comp_phase_prob_plan_b( diff --git a/grim/imputation/imputegl/networkx_graph.py b/grim/imputation/imputegl/networkx_graph.py index d58999a..1d997ab 100755 --- a/grim/imputation/imputegl/networkx_graph.py +++ b/grim/imputation/imputegl/networkx_graph.py @@ -9,10 +9,19 @@ def missing(labelA, labelB): class Graph(object): + __slots__ = ( + "graph", + "labelDict", + "whole_graph", + "full_loci", + "nodes_plan_a", + "nodes_plan_b", + ) + def __init__(self, config): - self.graph = nx.Graph() + self.graph = nx.DiGraph() self.labelDict = {} - self.whole_graph = nx.Graph() + self.whole_graph = nx.DiGraph() self.full_loci = config["full_loci"] self.nodes_plan_a, self.nodes_plan_b = [], [] if config["nodes_for_plan_A"]: @@ -63,8 +72,11 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile): if len(row) > 0: node1 = nodesDict[row[0]] node2 = nodesDict[row[1]] - if node1 in self.graph.nodes() and node2 in self.graph.nodes(): - self.graph.add_edge(node1, node2) + if node1 in self.graph and node2 in self.graph: + if self.graph.nodes[node1]["label"] == self.full_loci: + self.graph.add_edge(node2, node1) + else: + self.graph.add_edge(node1, node2) edgesfile.close() @@ -88,7 +100,6 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile): self.whole_graph.add_edge(node1, node2, color=kind) allEdgesfile.close() - nodesDict.clear() # return all haplotype by specific label diff --git a/grim/validation/__init__.py b/grim/validation/__init__.py old mode 100644 new mode 100755 index cfe8889..2779c9f --- a/grim/validation/__init__.py +++ b/grim/validation/__init__.py @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- -# -# grim Graph Imputation -# Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved. # # This library is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published @@ -10,7 +7,7 @@ # your option) any later version. # # This library is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public # License for more details. # @@ -23,6 +20,6 @@ # -"""Top-level package for py-grim.""" - -__organization__ = "NMDP/CIBMTR Bioinformatics" +__author__ = """Martin Maiers""" +__email__ = "mmaiers@nmdp.org" +__version__ = "0.0.7" diff --git a/grim/validation/runfile.py b/grim/validation/runfile.py index 0ed4de7..bf44bf5 100644 --- a/grim/validation/runfile.py +++ b/grim/validation/runfile.py @@ -105,7 +105,7 @@ def run_impute( print("Performing imputation based on:") print("\tPopulation: {}".format(config["pops"])) print("\tPriority: {}".format(config["priority"])) - print("\tPriority: {}".format(config["UNK_priors"])) + print("\tUNK priority: {}".format(config["UNK_priors"])) print("\tEpsilon: {}".format(config["epsilon"])) print("\tPlan B: {}".format(config["planb"])) print("\tNumber of Results: {}".format(config["number_of_results"])) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7307771 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +# Minimum requirements for the build system to execute. +requires = ["setuptools", "wheel", "cython==0.29.32"] # PEP 508 specifications. diff --git a/requirements.txt b/requirements.txt index ee19398..f319221 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ +cython==0.29.32 numpy>=1.20.2 -networkx==2.5.1 +networkx==2.5.1 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index d573e50..dcf485c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.0.6 +current_version = 0.0.7 commit = True tag = True diff --git a/setup.py b/setup.py index bb8ff99..c012b44 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,17 @@ """The setup script.""" -from setuptools import setup, find_packages +from setuptools import setup +from Cython.Build import cythonize + +# import numpy + + +# include_dirs=[numpy.get_include()], +# requires=['numpy', 'Cython']) + + +from setuptools import setup, find_packages, Extension with open("README.md") as readme_file: readme = readme_file.read() @@ -42,7 +52,7 @@ setup( name="py-graph-imputation", - version="0.0.6", + version="0.0.7", author="Pradeep Bashyal", author_email="pbashyal@nmdp.org", python_requires=">=3.8", @@ -66,13 +76,23 @@ include=[ "grim", "grim.imputation", - "grim.imputegl", + "grim.imputation.imputegl", "grim.imputation.graph_generation", "grim.validation", + "grim.conf", ] ), test_suite="tests", tests_require=test_requirements, url="https://github.com/nmdp-bioinformatics/py-grim", zip_safe=False, + ext_modules=cythonize( + [ + Extension( + "grim.imputation.imputegl.cutils", + ["grim/imputation/imputegl/cutils.pyx"], + ) + ], + language_level="3", + ), )