From 5c2912fc3551f08a6ebda9e677df0a971d8579e9 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 6 Nov 2022 13:18:47 +0200
Subject: [PATCH 01/19] Changes for efficiency

---
 grim/imputation/imputegl/cutils.pyx        |  66 +++++++++
 grim/imputation/imputegl/impute.py         | 156 +++++++++------------
 grim/imputation/imputegl/networkx_graph.py | 113 +++++++--------
 grim/validation/runfile.py                 |   2 +-
 setup.py                                   |  12 +-
 5 files changed, 193 insertions(+), 156 deletions(-)
 create mode 100644 grim/imputation/imputegl/cutils.pyx

diff --git a/grim/imputation/imputegl/cutils.pyx b/grim/imputation/imputegl/cutils.pyx
new file mode 100644
index 0000000..d920a6a
--- /dev/null
+++ b/grim/imputation/imputegl/cutils.pyx
@@ -0,0 +1,66 @@
+#cython: language_level=3
+import cython
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef open_ambiguities(list hap, unsigned char loc, tuple split_loc):
+    cdef unsigned int k, i, p, j #hap_len, haps_len, splits_len
+    cdef Py_ssize_t hap_len, haps_len, splits_len
+    cdef list hap_new, hap1
+    # cdef np.ndarray[STR, ndim=1] hap_new, hap1
+    p = 0
+    if len(split_loc) > 1:
+        # This opens all allele ambiguities
+        hap_len = len(hap[0])
+        haps_len = len(hap)
+        splits_len = len(split_loc)
+        hap_new = [None] * (haps_len * splits_len)
+        # hap_new = np.empty(haps_len * splits_len, dtype=np.object)  # produces an empty list of haplotypes
+        hap1 = [None] * hap_len
+        # hap1 = np.empty(haps_len, dtype=np.object)
+        for k in range(haps_len):  # split a given locus in all haps.
+
+            for j in range(hap_len):
+                hap1[j] = hap[k][j]
+
+            for i in range(splits_len):
+                hap1[loc] = split_loc[i]
+                hap_new[p] = hap1[:]
+                p += 1
+        return hap_new
+    return hap
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef create_hap_list(list all_haps, dict optionDict, unsigned int N_Loc):
+    cdef unsigned int i, j, count
+    cdef list hap_list = []
+    cdef list all_hap_split
+
+    for i in range(len(all_haps)):
+        all_hap_split = all_haps[i].split('~')
+        count = 0
+        for j in range(len(all_hap_split)):
+            if all_hap_split[j] not in optionDict:
+                break
+            else:
+                count += 1
+
+        if count == N_Loc:
+            hap_list.append(all_hap_split)
+    return hap_list
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef deepcopy_list(list l):
+    cdef list copy_l
+    cdef unsigned int i, length
+    length = len(l)
+    copy_l = [None] * length
+    for i in range(length):
+        if isinstance(l[i], list):
+            copy_l[i] = deepcopy_list(l[i])
+        else:
+            copy_l[i] = l[i]
+    return copy_l
diff --git a/grim/imputation/imputegl/impute.py b/grim/imputation/imputegl/impute.py
index f7d0169..28bc299 100644
--- a/grim/imputation/imputegl/impute.py
+++ b/grim/imputation/imputegl/impute.py
@@ -7,8 +7,9 @@
 import os.path
 import json
 
-import numpy as np
 
+import numpy as np
+from .cutils import open_ambiguities, create_hap_list, deepcopy_list
 from .cypher_plan_b import CypherQueryPlanB
 from .cypher_query import CypherQuery
 
@@ -92,6 +93,10 @@ def clean_up_gl(gl):
 
 
 class Imputation(object):
+    __slots__ = 'logger', 'verbose', 'populations', 'netGraph', 'priorMatrix', 'full_hapl', 'index_dict', 'full_loci', \
+                'factor', '_factor_missing_data', 'cypher', 'cypher_plan_b', 'matrix_planb', 'count_by_prob', \
+                'number_of_options_threshold', 'plan', 'option_1', 'option_2', \
+                'haplotypes_number_in_phase', 'save_space_mode', 'nodes_for_plan_A', 'unk_priors'
 
     def __init__(self, net=None,config=None,  count_by_prob=None, verbose=False):
         """Constructor
@@ -291,23 +296,6 @@ def open_phases_1(self, haps, N_Loc, gl_string):
                 phases.append([H1, H2])
         return phases
 
-    def open_ambiguities(self, hap, loc):
-        # This opens all allele ambiguities
-        hap_new = [] # produces an empty list of haplotypes
-        for k in range(len(hap)): #slit a given locus in all haps.
-            splitHap =  hap[k][loc].split('/')
-            if splitHap==hap[k][loc].split():
-                split_loc = hap[k][loc].split('|')
-            else:
-                split_loc = splitHap
-            hap1 = hap[k]
-            if len(split_loc) > 1:
-                for i in range(len(split_loc)):
-                    hap1[loc] = split_loc[i]
-                    hap_new.append(hap1[:])
-            else:
-                hap_new.append(hap1[:])
-        return hap_new
 
     def comp_hap_prob(self, Hap, N_Loc, epsilon, n):
         haplo_probs = self.get_haplo_freqs(Hap, epsilon, n)
@@ -350,11 +338,12 @@ def comp_hap_prob(self, Hap, N_Loc, epsilon, n):
     #     return haplo_probs
 
     def get_haplo_freqs(self, haplos, epsilon, n=25000):
-        haplos_joined = ["~".join(item) for sublist in haplos for item in sublist] ###
+        haplos_joined = ["~".join(item) for item in haplos[0]] ###
         #haplos_joined = [item for sublist in haplos for item in sublist]  ###
         #haplos_joined = ["~".join(sorted(item)) for sublist in haplos for item in sublist]
         return self.netGraph.adjs_query(haplos_joined)
 
+
     # def get_haplo_freqs_miss(self, haplos, epsilon):
     #     haplo_probs = {}
     #     haplos_joined = ["~".join(sorted(item)) for sublist in haplos for item in sublist]
@@ -681,7 +670,7 @@ def comp_phase_prob(self, phases, N_Loc, epsilon, n):
         # pop_res are the names of the populations
         return {'Haps': hap_total, 'Probs': p_total,'Pops': pop_res}
 
-    def reduce_phase_to_valid_allels(self, haps, N_Loc, planc = False):
+    def reduce_phase_to_valid_allels(self, haps, N_Loc, planc=False):
         for j in range(len(haps)):
             for k in range(2):
                 hap_list = []
@@ -689,14 +678,13 @@ def reduce_phase_to_valid_allels(self, haps, N_Loc, planc = False):
 
                 options = 1
                 for i in range(N_Loc): options = options * (len(hap_list[0][i].split("/")))
-                if options>=self.number_of_options_threshold or planc:
+                if options >= self.number_of_options_threshold or planc:
                     for hap_k in hap_list:
-                        for i,g in enumerate(hap_k):
+                        for i, g in enumerate(hap_k):
                             gen = g.split('/')
                             probs = self.check_if_alleles_exist(gen)
                             if not probs == {}:
-                                list(probs.keys())
-                                haps[j][k][i] = ('/').join(list(probs.keys()))
+                                haps[j][k][i] = '/'.join(list(probs.keys()))
 
 
     def reduce_phase_to_commons_alleles(self, haps, N_Loc, commons_number=1, planc = False ):
@@ -733,51 +721,53 @@ def open_phases(self, haps, N_Loc, gl_string):
         for j in range(len(haps)):
             H1 = []
             H2 = []
-          ##  fq = pa.DataFrame()
+            ##  fq = pa.DataFrame()
             fq = []
 
             for k in range(2):
-                hap_list = []
-                hap_list.append(haps[j][k])
+                hap_list = [haps[j][k]]
+                hap_list_splits = [tuple(allele.split("/")) for allele in hap_list[0]]
 
-                #compute the number of options:
+                # compute the number of options:
                 options = 1
-                for i in range(N_Loc): options=options*(len(hap_list[0][i].split("/")))
+                for i in range(N_Loc):
+                    options *= len(hap_list_splits[i])
 
-                #if the number of options is smaller than the total number of nodes:
+                # if the number of options is smaller than the total number of nodes:
                 if options < self.number_of_options_threshold:
-                    #open ambiguities regularly:
+                    # open ambiguities regularly:
                     for i in range(N_Loc):
-                        hap_list = self.open_ambiguities(hap_list, i)
-                    if (k == 0):
+                        hap_list = self.open_ambiguities(hap_list, i, hap_list_splits[i])
+
+                    if k == 0:
                         H1.append(hap_list)
                     else:
                         H2.append(hap_list)
 
-                    self.option_1 +=1
+                    self.option_1 += 1
 
-                #if there are more options than actual haplotypes possible:
+                # if there are more options than actual haplotypes possible:
                 else:
-                    self.option_2 +=1
-                    optionDict = {}
+                    self.option_2 += 1
+                    optionDict = {} #set()
                     if len(fq) == 0:
-                        list=[]
-                        for (gen,name) in self.cypher.loc_map.items():
-                            count=0
+                        _list = []
+                        for (gen, name) in self.cypher.loc_map.items():
+                            count = 0
                             for i in range(len(hap_list[0])):
-                                if hap_list[0][i].split("*")[0]==gen:
-                                    count=count+1
-                            if count>0:
-                                list.append(name)
-                        #we'll get all the options possible
-                        #(query,lc)=self.cypher.buildQuery(["~".join(list)])
-
-                       # fq = pa.DataFrame(self.graph.data(query))
-                        label = "".join(sorted(list))
+                                if hap_list[0][i].split("*", 1)[0] == gen:
+                                    count = count + 1
+                            if count > 0:
+                                _list.append(name)
+                        # we'll get all the options possible
+                        # (query,lc)=self.cypher.buildQuery(["~".join(_list)])
+
+                        # fq = pa.DataFrame(self.graph.data(query))
+                        label = "".join(sorted(_list))
                         fq = self.netGraph.haps_by_label(label)
-                       # fq = pa.DataFrame(self.netGraph.abcdq_allele(), )
-                       # fq = self.netGraph.abcdq_allele()
-                        #we'll find which of all the options are compatable with the donor
+                        # fq = pa.DataFrame(self.netGraph.abcdq_allele(), )
+                        # fq = self.netGraph.abcdq_allele()
+                        # we'll find which of all the options are compatable with the donor
                         """gl_list=gl_string.split("^")
                         for gen in gl_list:
                             gens = gen.split('+')
@@ -785,29 +775,15 @@ def open_phases(self, haps, N_Loc, gl_string):
                                 gen=g.split('/')
                                 for option in gen:
                                     optionDict[option]=True"""
-                    for hap_k in hap_list:
-                        #listType = []
-                        for g in hap_k:
-                            gen = g.split('/')
-                            for option in gen:
-                                optionDict[option] = True
-                        #print(listType)
+
+                    for gen in hap_list_splits:
+                        for option in gen:
+                            optionDict[option] = True
+                            # optionDict.add(option)
                     ##all_haps = fq.values.tolist()
-                    all_haps = fq
-                    hap_list=[]
-                    for i in range(len(all_haps)):
-                        count=0
-                        for gen in all_haps[i].split("~"):
-                            if not gen in optionDict:
-                                break
-                            else:
-                                count=count+1
-                        if count == N_Loc:
-
-                            # all_haps[i][0]=all_haps[i][0].split("~")
-                            #hap_list.append(all_haps[i][0])
-                            hap_list.append(all_haps[i].split("~"))
-                    if (k == 0):
+                    hap_list = create_hap_list(fq, optionDict, N_Loc)
+
+                    if k == 0:
                         H1.append(hap_list)
                     else:
                         H2.append(hap_list)
@@ -943,7 +919,7 @@ def find_option_freq(self, option, haplos, missing):
 
     def comp_hap_prob_plan_b(self, Hap,division,missing):
         if division[0] == list(set(self.index_dict.values())):#[1, 2, 3, 4, 5]:
-            return self.comp_hap_prob(Hap[0], 0, 0, 0)
+            return self.comp_hap_prob([Hap[0]], 0, 0, 0)
         haplo_probs = self.find_option_freq(division, Hap,missing)
 
         # Return {'Haps': haplos, 'Probs': probs} of the dict
@@ -1318,17 +1294,20 @@ def input_type(self, haplotype):
             type_list.append(self.index_dict[locus])
         return type_list
 
+    def open_ambiguities(self, hap, loc, split_loc):
+        return open_ambiguities(hap, loc, split_loc)
+
     def comp_cand(self, gl_string, binary, epsilon, n, MUUG_output, haps_output, planb, em):
         # receives a list of phases and computes haps and
         # probabilties and accumulate cartesian productEpsilon=0.0001
         chr = self.gl2haps(gl_string)
         if chr == []:
-            return
+            return None, None
         # if we in 9-loci, check if the type input in valid format
         if self.nodes_for_plan_A:
             geno_type = self.input_type(chr['Genotype'][0])
             if not geno_type in self.nodes_for_plan_A:
-                return
+                return None, None
 
         n_loci = chr['N_Loc']
 
@@ -1337,7 +1316,7 @@ def comp_cand(self, gl_string, binary, epsilon, n, MUUG_output, haps_output, pla
 
         # return if the result is empty (why would that be?)
         if pmags == []:
-            return
+            return None, None
 
         #res_muugs = {'Haps': 'NaN', 'Probs': 0}
         res_muugs = {'MaxProb': 0, 'Haps': {}, 'Pops': {}}
@@ -1363,7 +1342,7 @@ def comp_cand(self, gl_string, binary, epsilon, n, MUUG_output, haps_output, pla
 
         if phases:
             if MUUG_output:
-                prior_matrix_orig = copy.deepcopy(self.priorMatrix)
+                prior_matrix_orig = np.array(self.priorMatrix, order='K', copy=True) #copy.deepcopy(self.priorMatrix)
                 res_muugs = self.call_comp_phase_prob(epsilon, n, phases, chr, True, planb)
                 if planb and len(res_muugs['Haps']) == 0:
                     self.plan = 'c'
@@ -1420,23 +1399,19 @@ def call_comp_phase_prob(self, epsilon, n, phases, chr, MUUG_output, planb):
         # no plan b
         for level in range(2):
             if level == 1:
-                if self.unk_priors == "MR":
-                    self.priorMatrix = np.ones((len(self.populations), len(self.populations)))
-                else:
-                    self.priorMatrix = np.identity(len(self.populations))
-                #self.priorMatrix = np.ones((len(self.populations), len(self.populations)))  ####
+                self.priorMatrix = np.ones((len(self.populations), len(self.populations)))  ####
             if planb and len(res['Haps']) == 0:
                 self.plan = 'b'
                 epsilon = 1e-14
                 n_res = 0
                 min_res = 10
                 min_epsilon = 1.e-3
-                #self.priorMatrix = np.ones((len(self.populations), len(self.populations)))
+                # self.priorMatrix = np.ones((len(self.populations), len(self.populations)))
                 while (epsilon > 0) & (n_res < min_res):
                     epsilon /= 10
                     if (epsilon < min_epsilon):
                         epsilon = 0.0
-                    phases_planb = copy.deepcopy(phases)
+                    phases_planb = deepcopy_list(phases)
                     # Find the option according to plan b
                     if MUUG_output:
                         res = self.comp_phase_prob_plan_b(phases_planb, chr['N_Loc'], epsilon, True)
@@ -1683,7 +1658,7 @@ def impute_file(self, config,  planb=None, em_mr = False, em = False):##em
 
         with f as lines:
             for (i, name_gl) in enumerate(lines):
-                #try:
+                try:
                     name_gl = name_gl.rstrip()  # remove trailing whitespace
                     if ',' in name_gl:
                         list_gl = name_gl.split(',')
@@ -1746,9 +1721,10 @@ def impute_file(self, config,  planb=None, em_mr = False, em = False):##em
                     print(time_taken)
                     if self.verbose:
                         self.logger.info("Time taken: " + str(time_taken))
-                    """except:
-                        problem.write(str(name_gl) + "\n")
-                        continue"""
+                except:
+                    print(f"{i} Subject: {subject_id} - Exception")
+                    problem.write(str(name_gl) + "\n")
+                    continue
 
             f.close()
             if MUUG_output:
diff --git a/grim/imputation/imputegl/networkx_graph.py b/grim/imputation/imputegl/networkx_graph.py
index 2eeefaf..c7c834a 100755
--- a/grim/imputation/imputegl/networkx_graph.py
+++ b/grim/imputation/imputegl/networkx_graph.py
@@ -5,18 +5,20 @@
 def missing(labelA, labelB):
     a = list(labelA)
     b = list(labelB)
-    return  [x for x in b if x not in a]
+    return [x for x in b if x not in a]
+
 
 class Graph(object):
+    __slots__ = 'graph', 'labelDict', 'whole_graph', 'full_loci', 'nodes_plan_a', 'nodes_plan_b'
 
     def __init__(self, config):
-        self.graph = nx.Graph()
+        self.graph = nx.DiGraph()
         self.labelDict = {}
-        self.whole_graph = nx.Graph()
+        self.whole_graph = nx.DiGraph()
         self.full_loci = config["full_loci"]
         self.nodes_plan_a, self.nodes_plan_b = [], []
         if config["nodes_for_plan_A"]:
-            path = ('/').join(config["node_file"].split('/')[:-1])
+            path = '/'.join(config["node_file"].split('/')[:-1])
 
             # bug: dies if file doesn't exist
             # bug: list_f doesn't exist
@@ -27,69 +29,75 @@ def __init__(self, config):
             with open(path + '/nodes_for_plan_b.txt') as list_f:
                 for item in list_f:
                     self.nodes_plan_b.append(item.strip())
-            #self.nodes_plan_a = pickle.load(open( path + '/nodes_for_plan_a.pkl', "rb"))
-            #self.nodes_plan_b = pickle.load(open( path + '/nodes_for_plan_b.pkl', "rb"))
-
+            # self.nodes_plan_a = pickle.load(open( path + '/nodes_for_plan_a.pkl', "rb"))
+            # self.nodes_plan_b = pickle.load(open( path + '/nodes_for_plan_b.pkl', "rb"))
 
-    #build graph from files of nodes and edges between nodes with top relation
+    # build graph from files of nodes and edges between nodes with top relation
     def build_graph(self, nodesFile, edgesFile, allEdgesFile):
         nodesDict = dict()
-        #add nodes from file
+        # add nodes from file
         with open(nodesFile) as nodesfile:
             readNodes = csv.reader(nodesfile, delimiter=',')
-            firstLine = next(readNodes)
+            next(readNodes)
             for row in readNodes:
                 if len(row) > 0:
                     if not self.nodes_plan_a or row[2] in self.nodes_plan_a:
-                        self.graph.add_node(row[1],label=row[2], freq=list(map(float, row[3].split(";"))))
+                        self.graph.add_node(row[1], label=row[2], freq=list(map(float, row[3].split(";"))))
                     if not self.nodes_plan_b or row[2] in self.nodes_plan_b:
-                        self.whole_graph.add_node(row[1],label=row[2], freq=list(map(float, row[3].split(";"))))
+                        self.whole_graph.add_node(row[1], label=row[2], freq=list(map(float, row[3].split(";"))))
                     nodesDict[row[0]] = row[1]
 
         nodesfile.close()
 
-
-        #add edges from file
+        # add edges from file
         with open(edgesFile) as edgesfile:
             readEdges = csv.reader(edgesfile, delimiter=',')
-            firstLine = next(readEdges)
+            next(readEdges)
             for row in readEdges:
                 if len(row) > 0:
                     node1 = nodesDict[row[0]]
                     node2 = nodesDict[row[1]]
-                    if node1 in self.graph.nodes() and node2 in self.graph.nodes():
-                        self.graph.add_edge(node1, node2)
+                    if node1 in self.graph and node2 in self.graph:
+                        if self.graph.nodes[node1]["label"] == self.full_loci:
+                            self.graph.add_edge(node2, node1)
+                        else:
+                            self.graph.add_edge(node1, node2)
 
         edgesfile.close()
 
-        #add edges from file
+        # add edges from file
         with open(allEdgesFile) as allEdgesfile:
             readEdges = csv.reader(allEdgesfile, delimiter=',')
-            firstLine = next(readEdges)
+            next(readEdges)
             for row in readEdges:
                 if len(row) > 0:
                     node1 = nodesDict[row[0]]
                     node2 = nodesDict[row[1]]
-                    kind = ("-".join(sorted([self.whole_graph.nodes[node1]['label'], self.whole_graph.nodes[node2]['label']], key=len)))
-                    self.whole_graph.add_edge(node1, node2, color = kind)
+                    if len(self.whole_graph.nodes[node1]['label']) < len(self.whole_graph.nodes[node2]['label']):
+                        connector = self.whole_graph.nodes[node2]['label'] + node1
+                        self.whole_graph.add_edge(node1, connector)
+                        self.whole_graph.add_edge(connector, node2)
+                    else:
+                        connector =  self.whole_graph.nodes[node1]['label'] + node2
+                        self.whole_graph.add_edge(node2, connector)
+                        self.whole_graph.add_edge(connector, node1)
 
         allEdgesfile.close()
-
         nodesDict.clear()
 
-    #return all haplotype by specific label
+    # return all haplotype by specific label
     def haps_by_label(self, label):
-        #cheak if already found
+        # cheak if already found
         if label in self.labelDict:
             return self.labelDict[label]
-        #not found yet. serach and save in labelDict
+        # not found yet. serach and save in labelDict
         hapsList = []
         if not self.nodes_plan_a or label in self.nodes_plan_a:
-            for key,key_data in self.graph.nodes(data=True):
+            for key, key_data in self.graph.nodes(data=True):
                 if key_data["label"] == label:
                     hapsList.append(key)
         elif label in self.nodes_plan_b:
-            for key,key_data in self.whole_graph.nodes(data=True):
+            for key, key_data in self.whole_graph.nodes(data=True):
                 if key_data["label"] == label:
                     hapsList.append(key)
         self.labelDict[label] = hapsList
@@ -108,66 +116,43 @@ def haps_with_probs_by_label(self, label):
 
         return dictAlleles
 
-    #find all adj of alleleList from label 'ABCQR'
+    # find all adj of alleleList from label 'ABCQR'
     def adjs_query(self, alleleList):
         adjDict = dict()
         for allele in alleleList:
-            if allele in self.graph.nodes():
-                if self.graph.nodes[allele]["label"] == self.full_loci:  # 'ABCQR':
-                    adjDict[allele] = self.graph.nodes[allele]['freq']
+            if allele in self.graph:
+                allele_node = self.graph.nodes[allele]
+                if allele_node["label"] == self.full_loci:  # 'ABCQR':
+                    adjDict[allele] = allele_node['freq']
                 else:
                     adjs = self.graph.adj[allele]
                     for adj in adjs:
                         adjDict[adj] = self.graph.nodes[adj]['freq']
         return adjDict
 
-
-    #find all adj of alleleList by label
+    # find all adj of alleleList by label
     def adjs_query_by_color(self, alleleList, labelA, labelB):
-       # copyLabelA = labelA
+        # copyLabelA = labelA
         adjDict = dict()
-        if (labelA == labelB):
-            return  self.node_probs(alleleList, labelA)
+        if labelA == labelB:
+            return self.node_probs(alleleList, labelA)
 
         for allele in alleleList:
-            if allele in self.whole_graph.nodes():
-                copyLabelA = labelA
-                newLabelA = labelA
-                miss = missing(labelA, labelB)
-                alleles = [allele]
-
-                while len(miss) > 0:
-                        tmpAllels = list()
-                        newLabelA = copyLabelA + miss[0]
-                        newLabelA = ''.join(sorted(newLabelA))
-                        del miss[0]
-                        for oneAllel in alleles:
-                        #    alleles.remove(oneAllel)
-                            adjs = self.whole_graph.adj[oneAllel]
-                            label = copyLabelA + '-' +newLabelA
-                            for adj in adjs:
-                                 if adjs[adj]['color'] == label:
-                                    tmpAllels.append(adj)
-                        alleles = tmpAllels
-                        copyLabelA = newLabelA
-
-
+            if allele in self.whole_graph:
+                alleles = self.whole_graph.adj.get(labelB + allele  , [])
                 for adj in alleles:
                     adjDict[adj] = self.whole_graph.nodes[adj]['freq']
         return adjDict
 
-    #return dict of nodes and there proper freq
+    # return dict of nodes and there proper freq
     def node_probs(self, nodes, label):
         nodesDict = dict()
         if not self.nodes_plan_b or label in self.nodes_plan_b:
             for node in nodes:
-                if node in self.whole_graph.nodes():
+                if node in self.whole_graph:
                     nodesDict[node] = self.whole_graph.nodes[node]['freq']
         elif label in self.nodes_plan_a:
             for node in nodes:
-                if node in self.whole_graph.nodes():
+                if node in self.whole_graph:
                     nodesDict[node] = self.graph.nodes[node]['freq']
         return nodesDict
-
-
-
diff --git a/grim/validation/runfile.py b/grim/validation/runfile.py
index 48cc3d1..16e7b82 100644
--- a/grim/validation/runfile.py
+++ b/grim/validation/runfile.py
@@ -77,7 +77,7 @@ def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_gra
     print("Performing imputation based on:")
     print("\tPopulation: {}".format(config["pops"]))
     print("\tPriority: {}".format(config["priority"]))
-    print("\tPriority: {}".format(config["UNK_priors"]))
+    print("\tUNK priority: {}".format(config["UNK_priors"]))
     print("\tEpsilon: {}".format(config["epsilon"]))
     print("\tPlan B: {}".format(config["planb"]))
     print("\tNumber of Results: {}".format(config["number_of_results"]))
diff --git a/setup.py b/setup.py
index 57542a2..04a17e1 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,16 @@
 
 """The setup script."""
 
-from setuptools import setup, find_packages
+from setuptools import setup
+from Cython.Build import cythonize
+# import numpy
+
+
+      # include_dirs=[numpy.get_include()],
+      # requires=['numpy', 'Cython'])
+
+
+from setuptools import setup, find_packages, Extension
 
 with open("README.md") as readme_file:
     readme = readme_file.read()
@@ -67,4 +76,5 @@
     tests_require=test_requirements,
     url="https://github.com/nmdp-bioinformatics/py-grim",
     zip_safe=False,
+    ext_modules=cythonize([Extension("grim/imputation/imputegl/cutils", ["grim/imputation/imputegl/cutils.pyx"])])
 )

From 02bf72a699fbcb3c936f9c0739a6113cf66fa9ae Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 6 Nov 2022 13:33:21 +0200
Subject: [PATCH 02/19] Changes for efficiency

---
 setup.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 04a17e1..71d0365 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@
 
 setup(
     name="py-graph-imputation",
-    version="0.0.4",
+    version="0.0.6",
     author="Pradeep Bashyal",
     author_email="pbashyal@nmdp.org",
     python_requires=">=3.8",
@@ -71,7 +71,13 @@
     long_description_content_type="text/markdown",
     include_package_data=True,
     keywords="grim",
-    packages=find_packages(include=["grim"]),
+    packages=find_packages(include=[
+            "grim",
+            "grim.imputation",
+            "grim.imputation.imputegl",
+            "grim.imputation.graph_generation",
+            "grim.validation",
+        ]),
     test_suite="tests",
     tests_require=test_requirements,
     url="https://github.com/nmdp-bioinformatics/py-grim",

From bdd22d7f924039ffd370d15ba54f2be2347e3bf2 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 12:35:03 +0200
Subject: [PATCH 03/19] Changes for efficiency

---
 grim/imputation/imputegl/cutils.pyx | 1 -
 setup.py                            | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/grim/imputation/imputegl/cutils.pyx b/grim/imputation/imputegl/cutils.pyx
index d920a6a..4edb9e5 100644
--- a/grim/imputation/imputegl/cutils.pyx
+++ b/grim/imputation/imputegl/cutils.pyx
@@ -1,4 +1,3 @@
-#cython: language_level=3
 import cython
 
 
diff --git a/setup.py b/setup.py
index 71d0365..51c29a9 100644
--- a/setup.py
+++ b/setup.py
@@ -82,5 +82,5 @@
     tests_require=test_requirements,
     url="https://github.com/nmdp-bioinformatics/py-grim",
     zip_safe=False,
-    ext_modules=cythonize([Extension("grim/imputation/imputegl/cutils", ["grim/imputation/imputegl/cutils.pyx"])])
+    ext_modules=cythonize([Extension("grim/imputation/imputegl/cutils", ["grim/imputation/imputegl/cutils.pyx"])], language_level="3")
 )

From bfd15995958fada6cb9e243620bba4cb69f5987b Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 13:12:33 +0200
Subject: [PATCH 04/19] Changes for efficiency

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index ee19398..b0ed4ed 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 numpy>=1.20.2
 networkx==2.5.1
+cython

From 2a74587794d571c461768304b51c2082f31d4bb8 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 13:17:19 +0200
Subject: [PATCH 05/19] Changes for efficiency

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 51c29a9..708a99c 100644
--- a/setup.py
+++ b/setup.py
@@ -82,5 +82,5 @@
     tests_require=test_requirements,
     url="https://github.com/nmdp-bioinformatics/py-grim",
     zip_safe=False,
-    ext_modules=cythonize([Extension("grim/imputation/imputegl/cutils", ["grim/imputation/imputegl/cutils.pyx"])], language_level="3")
+    ext_modules=cythonize([Extension("cutils", ["grim/imputation/imputegl/cutils.pyx"])], language_level="3")
 )

From ba7c7a39074596884d4c5c3d8ff6a35ffa7db523 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 15:15:53 +0200
Subject: [PATCH 06/19] Changes for efficiency

---
 MANIFEST.in | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 MANIFEST.in

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..200770d
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
+include requirements.txt
+include README.md
+include LICENSE
+include *.txt
+recursive-include src *.py
+recursive-include src *.txt

From 766e66daad5efd5f9fb1a92fb74c90c82acae3b7 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 15:20:01 +0200
Subject: [PATCH 07/19] Changes for efficiency

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index b0ed4ed..27df244 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 numpy>=1.20.2
 networkx==2.5.1
 cython
+Cython

From 0dd03d9c827d179c8ceb5f24cbc3b3839f2e9fb4 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 15:26:32 +0200
Subject: [PATCH 08/19] Changes for efficiency

---
 grim/__init__.py                             |  2 +-
 grim/conf/__init__.py                        | 25 ++++++++++++++++++++
 grim/imputation/__init__.py                  | 25 ++++++++++++++++++++
 grim/imputation/graph_generation/__init__.py | 25 ++++++++++++++++++++
 grim/imputation/imputegl/__init__.py         |  2 +-
 grim/validation/__init__.py                  | 25 ++++++++++++++++++++
 setup.py                                     |  1 +
 7 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100755 grim/conf/__init__.py
 create mode 100755 grim/imputation/__init__.py
 create mode 100755 grim/imputation/graph_generation/__init__.py
 create mode 100755 grim/validation/__init__.py

diff --git a/grim/__init__.py b/grim/__init__.py
index 375c00f..f45a470 100644
--- a/grim/__init__.py
+++ b/grim/__init__.py
@@ -26,4 +26,4 @@
 """Top-level package for py-grim."""
 
 __organization__ = "NMDP/CIBMTR Bioinformatics"
-__version__ = "0.0.4"
+__version__ = "0.0.6"
diff --git a/grim/conf/__init__.py b/grim/conf/__init__.py
new file mode 100755
index 0000000..cedd678
--- /dev/null
+++ b/grim/conf/__init__.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+
+#
+#    This library is free software; you can redistribute it and/or modify it
+#    under the terms of the GNU Lesser General Public License as published
+#    by the Free Software Foundation; either version 3 of the License, or (at
+#    your option) any later version.
+#
+#    This library is distributed in the hope that it will be useful, but WITHOUT
+#    ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
+#    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+#    License for more details.
+#
+#    You should have received a copy of the GNU Lesser General Public License
+#    along with this library;  if not, write to the Free Software Foundation,
+#    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.
+#
+#    > http://www.fsf.org/licensing/licenses/lgpl.html
+#    > http://www.opensource.org/licenses/lgpl-license.php
+#
+
+
+__author__ = """Martin Maiers"""
+__email__ = 'mmaiers@nmdp.org'
+__version__ = '0.0.6'
diff --git a/grim/imputation/__init__.py b/grim/imputation/__init__.py
new file mode 100755
index 0000000..cedd678
--- /dev/null
+++ b/grim/imputation/__init__.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+
+#
+#    This library is free software; you can redistribute it and/or modify it
+#    under the terms of the GNU Lesser General Public License as published
+#    by the Free Software Foundation; either version 3 of the License, or (at
+#    your option) any later version.
+#
+#    This library is distributed in the hope that it will be useful, but WITHOUT
+#    ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
+#    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+#    License for more details.
+#
+#    You should have received a copy of the GNU Lesser General Public License
+#    along with this library;  if not, write to the Free Software Foundation,
+#    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.
+#
+#    > http://www.fsf.org/licensing/licenses/lgpl.html
+#    > http://www.opensource.org/licenses/lgpl-license.php
+#
+
+
+__author__ = """Martin Maiers"""
+__email__ = 'mmaiers@nmdp.org'
+__version__ = '0.0.6'
diff --git a/grim/imputation/graph_generation/__init__.py b/grim/imputation/graph_generation/__init__.py
new file mode 100755
index 0000000..cedd678
--- /dev/null
+++ b/grim/imputation/graph_generation/__init__.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+
+#
+#    This library is free software; you can redistribute it and/or modify it
+#    under the terms of the GNU Lesser General Public License as published
+#    by the Free Software Foundation; either version 3 of the License, or (at
+#    your option) any later version.
+#
+#    This library is distributed in the hope that it will be useful, but WITHOUT
+#    ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
+#    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+#    License for more details.
+#
+#    You should have received a copy of the GNU Lesser General Public License
+#    along with this library;  if not, write to the Free Software Foundation,
+#    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.
+#
+#    > http://www.fsf.org/licensing/licenses/lgpl.html
+#    > http://www.opensource.org/licenses/lgpl-license.php
+#
+
+
+__author__ = """Martin Maiers"""
+__email__ = 'mmaiers@nmdp.org'
+__version__ = '0.0.6'
diff --git a/grim/imputation/imputegl/__init__.py b/grim/imputation/imputegl/__init__.py
index 1548b44..352ebc3 100755
--- a/grim/imputation/imputegl/__init__.py
+++ b/grim/imputation/imputegl/__init__.py
@@ -24,4 +24,4 @@
 
 __author__ = """Martin Maiers"""
 __email__ = 'mmaiers@nmdp.org'
-__version__ = '0.0.4'
+__version__ = '0.0.6'
diff --git a/grim/validation/__init__.py b/grim/validation/__init__.py
new file mode 100755
index 0000000..cedd678
--- /dev/null
+++ b/grim/validation/__init__.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+
+#
+#    This library is free software; you can redistribute it and/or modify it
+#    under the terms of the GNU Lesser General Public License as published
+#    by the Free Software Foundation; either version 3 of the License, or (at
+#    your option) any later version.
+#
+#    This library is distributed in the hope that it will be useful, but WITHOUT
+#    ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
+#    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+#    License for more details.
+#
+#    You should have received a copy of the GNU Lesser General Public License
+#    along with this library;  if not, write to the Free Software Foundation,
+#    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.
+#
+#    > http://www.fsf.org/licensing/licenses/lgpl.html
+#    > http://www.opensource.org/licenses/lgpl-license.php
+#
+
+
+__author__ = """Martin Maiers"""
+__email__ = 'mmaiers@nmdp.org'
+__version__ = '0.0.6'
diff --git a/setup.py b/setup.py
index 708a99c..d23c4d8 100644
--- a/setup.py
+++ b/setup.py
@@ -77,6 +77,7 @@
             "grim.imputation.imputegl",
             "grim.imputation.graph_generation",
             "grim.validation",
+            "grim.conf",
         ]),
     test_suite="tests",
     tests_require=test_requirements,

From b0c8b94970b0e5d4b996a13bfb1f776d1685eeff Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 15:32:44 +0200
Subject: [PATCH 09/19] Changes for efficiency

---
 MANIFEST.in | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 200770d..cc4d41f 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,5 +2,6 @@ include requirements.txt
 include README.md
 include LICENSE
 include *.txt
-recursive-include src *.py
-recursive-include src *.txt
+recursive-include grim *.py
+recursive-include grim *.txt
+recursive-include grim *.json

From 4941866b09d728e6e066a2ffec3be9c9ef66fb42 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 15:33:58 +0200
Subject: [PATCH 10/19] Changes for efficiency

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d23c4d8..b40e34e 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@
 
 setup(
     name="py-graph-imputation",
-    version="0.0.6",
+    version="0.0.7",
     author="Pradeep Bashyal",
     author_email="pbashyal@nmdp.org",
     python_requires=">=3.8",

From df035f42bbc28c7a0284ff08ec807b376d0624e9 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 15:35:25 +0200
Subject: [PATCH 11/19] Changes for efficiency

---
 MANIFEST.in | 1 +
 setup.py    | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index cc4d41f..392139b 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -5,3 +5,4 @@ include *.txt
 recursive-include grim *.py
 recursive-include grim *.txt
 recursive-include grim *.json
+recursive-include grim *.pyx
diff --git a/setup.py b/setup.py
index b40e34e..7345f40 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@
 
 setup(
     name="py-graph-imputation",
-    version="0.0.7",
+    version="0.0.8",
     author="Pradeep Bashyal",
     author_email="pbashyal@nmdp.org",
     python_requires=">=3.8",

From 749372d87d5f28cc1291680367c4005042017977 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 15:37:54 +0200
Subject: [PATCH 12/19] Changes for efficiency

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 392139b..bc6d0e1 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -6,3 +6,4 @@ recursive-include grim *.py
 recursive-include grim *.txt
 recursive-include grim *.json
 recursive-include grim *.pyx
+recursive-include grim *.pyd

From 5913ba96594ba4ffc83b1c8e40c7acecd38151df Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 15:38:23 +0200
Subject: [PATCH 13/19] Changes for efficiency

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7345f40..b40e34e 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@
 
 setup(
     name="py-graph-imputation",
-    version="0.0.8",
+    version="0.0.7",
     author="Pradeep Bashyal",
     author_email="pbashyal@nmdp.org",
     python_requires=">=3.8",

From e4da1ab8715c07610a71f407cd8162b7f824b9b0 Mon Sep 17 00:00:00 2001
From: sapiris <sapir9003@gmail.com>
Date: Sun, 13 Nov 2022 16:35:51 +0200
Subject: [PATCH 14/19] Changes for efficiency

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index b40e34e..e25cd17 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@
 
 setup(
     name="py-graph-imputation",
-    version="0.0.7",
+    version="0.0.6",
     author="Pradeep Bashyal",
     author_email="pbashyal@nmdp.org",
     python_requires=">=3.8",
@@ -83,5 +83,5 @@
     tests_require=test_requirements,
     url="https://github.com/nmdp-bioinformatics/py-grim",
     zip_safe=False,
-    ext_modules=cythonize([Extension("cutils", ["grim/imputation/imputegl/cutils.pyx"])], language_level="3")
+    ext_modules=cythonize([Extension("grim.imputation.imputegl.cutils", ["grim/imputation/imputegl/cutils.pyx"])], language_level="3")
 )

From f19b41abdd0a6a61fbcdc17a69d2a00af5504942 Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Wed, 16 Nov 2022 10:07:32 -0600
Subject: [PATCH 15/19] Changes to PR from sapir.  - fix Cython cyclic
 dependency with pyproject.toml  - fix merge conflicts

---
 .github/workflows/python-tests.yml            |    3 -
 .gitignore                                    |    3 +
 grim/algorithm/match.py                       |   16 -
 grim/conf/__init__.py                         |    5 +-
 grim/grim.py                                  |   37 +-
 .../generate_neo4j_multi_hpf.py               |  272 +--
 .../generate_neo4j_single_hpf.py              |  194 +-
 .../graph_generation/nemo_to_hpf_csv.py       |   50 +-
 .../graph_generation/wmda_download.py         |   16 +-
 .../graph_generation/wmda_to_hpf_csv.py       |   19 +-
 grim/imputation/imputegl/cypher_plan_b.py     |   25 +-
 grim/imputation/imputegl/cypher_query.py      |   15 +-
 grim/imputation/imputegl/impute.py            | 1567 +++++++++++------
 grim/imputation/imputegl/networkx_graph.py    |   92 +-
 grim/imputation/setup.py                      |   45 +-
 grim/model/allele.py                          |   74 -
 grim/model/slug.py                            |   72 -
 grim/validation/parallel-imputation.py        |  190 +-
 grim/validation/reduce_loci.py                |   42 +-
 grim/validation/runfile.py                    |  161 +-
 grim/validation/runfile_mp.py                 |   87 +-
 grim/validation/runfile_mt.py                 |   87 +-
 grim/validation/simulation/__init__.py        |   28 +
 grim/validation/simulation/data/__init__.py   |   28 +
 pyproject.toml                                |    3 +
 requirements-deploy.txt                       |    2 -
 requirements-dev.txt                          |    7 +-
 requirements-tests.txt                        |    1 -
 requirements.txt                              |    5 +-
 setup.cfg                                     |    2 +-
 tests/features/algorithm/SLUG Match.feature   |   16 -
 .../definition/Class I HLA Alleles.feature    |   21 -
 tests/steps/HLA_alleles.py                    |   30 -
 tests/steps/SLUG_match.py                     |   26 -
 ...st_my_project_template.py => test_grim.py} |    0
 35 files changed, 1875 insertions(+), 1366 deletions(-)
 delete mode 100644 grim/algorithm/match.py
 delete mode 100644 grim/model/allele.py
 delete mode 100644 grim/model/slug.py
 create mode 100644 grim/validation/simulation/__init__.py
 create mode 100644 grim/validation/simulation/data/__init__.py
 create mode 100644 pyproject.toml
 delete mode 100644 tests/features/algorithm/SLUG Match.feature
 delete mode 100644 tests/features/definition/Class I HLA Alleles.feature
 delete mode 100644 tests/steps/HLA_alleles.py
 delete mode 100644 tests/steps/SLUG_match.py
 rename tests/unit/{test_my_project_template.py => test_grim.py} (100%)

diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 6bfdc01..cb48f53 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -28,6 +28,3 @@ jobs:
     - name: Test with pytest
       run: |
         pytest
-    - name: Run BDD Tests
-      run: |
-        behave
diff --git a/.gitignore b/.gitignore
index b33e2e9..68d97ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -134,3 +134,6 @@ dmypy.json
 # behave
 pretty.output
 allure_report/
+
+# cython temp files
+grim/**/*.c
diff --git a/grim/algorithm/match.py b/grim/algorithm/match.py
deleted file mode 100644
index a8bb16a..0000000
--- a/grim/algorithm/match.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from grim.model.slug import SLUG
-
-
-# SLUG match
-# match the SLUG of patient with donor
-
-
-def slug_match(patient_slug: SLUG, donor_slug: SLUG) -> bool:
-    """
-    SLUGs are matched if they are equal to each other
-
-    @param patient_slug: Patient SLUG
-    @param donor_slug: Donor SLUG
-    @return: bool indicating whether they match or not
-    """
-    return patient_slug == donor_slug
diff --git a/grim/conf/__init__.py b/grim/conf/__init__.py
index cedd678..54199c8 100755
--- a/grim/conf/__init__.py
+++ b/grim/conf/__init__.py
@@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
 
+#
+#    grim Graph Imputation
+#    Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
 #
 #    This library is free software; you can redistribute it and/or modify it
 #    under the terms of the GNU Lesser General Public License as published
@@ -7,7 +10,7 @@
 #    your option) any later version.
 #
 #    This library is distributed in the hope that it will be useful, but WITHOUT
-#    ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
+#    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 #    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 #    License for more details.
 #
diff --git a/grim/grim.py b/grim/grim.py
index db79f42..d177da0 100644
--- a/grim/grim.py
+++ b/grim/grim.py
@@ -30,29 +30,42 @@
 from .imputation.imputegl.networkx_graph import Graph
 import os
 
-def graph_freqs(conf_file = "",  for_em = False,  em_pop=None ):
+
+def graph_freqs(conf_file="", for_em=False, em_pop=None):
     if conf_file == "":
-        conf_file = os.path.dirname(os.path.realpath(__file__))  + '/conf/minimal-configuration.json'
+        conf_file = (
+            os.path.dirname(os.path.realpath(__file__))
+            + "/conf/minimal-configuration.json"
+        )
 
+    generate_neo4j_multi_hpf.generate_graph(
+        config_file=conf_file, em_pop=em_pop, em=for_em
+    )
 
-    generate_neo4j_multi_hpf.generate_graph(config_file=conf_file,  em_pop=em_pop,
-                       em=for_em)
 
-def impute(conf_file = ""):
+def impute(conf_file=""):
     project_dir_in_file, project_dir_graph = "", ""
     if conf_file == "":
-        conf_file = os.path.dirname(os.path.realpath(__file__)) + '/conf/minimal-configuration.json'
-        project_dir_graph =  os.path.dirname(os.path.realpath(__file__)) + '/imputation/graph_generation/'
-        project_dir_in_file =  os.path.dirname(os.path.realpath(__file__)) + '/'
+        conf_file = (
+            os.path.dirname(os.path.realpath(__file__))
+            + "/conf/minimal-configuration.json"
+        )
+        project_dir_graph = (
+            os.path.dirname(os.path.realpath(__file__))
+            + "/imputation/graph_generation/"
+        )
+        project_dir_in_file = os.path.dirname(os.path.realpath(__file__)) + "/"
     runfile.run_impute(conf_file, project_dir_graph, project_dir_in_file)
 
-def impute_instance(config, graph, count_by_prob= None):
+
+def impute_instance(config, graph, count_by_prob=None):
     imputation = Imputation(graph, config, count_by_prob)
     return imputation
 
+
 def graph_instance(config):
     graph = Graph(config)
-    graph.build_graph(config["node_file"], config["top_links_file"], config["edges_file"])
+    graph.build_graph(
+        config["node_file"], config["top_links_file"], config["edges_file"]
+    )
     return graph
-
-
diff --git a/grim/imputation/graph_generation/generate_neo4j_multi_hpf.py b/grim/imputation/graph_generation/generate_neo4j_multi_hpf.py
index 54fea9e..ecfd53f 100644
--- a/grim/imputation/graph_generation/generate_neo4j_multi_hpf.py
+++ b/grim/imputation/graph_generation/generate_neo4j_multi_hpf.py
@@ -12,7 +12,7 @@
 import sys
 import os
 
-#sys.path.insert(0, os.path.join(".."))
+# sys.path.insert(0, os.path.join(".."))
 
 
 # set haplotype frequency trimming threshold
@@ -36,14 +36,14 @@
 # csv files on a mid-2015 MacBook Pro (2.5 GHz Intel Core i7) and will result
 # in 1.8M nodes (262MB),
 # 26M edges (3.6B)and 107M top links (200MB).
-#FULL_LOCI = 'ABCQR'
+# FULL_LOCI = 'ABCQR'
 
 ##############################################################################
 # functions
 ##############################################################################
 # nCr: n Choose r
 def nCr(nchars, r):
-    return [''.join(x) for x in combinations(nchars, r)]
+    return ["".join(x) for x in combinations(nchars, r)]
 
 
 # dividebyzero: safe division
@@ -56,19 +56,16 @@ def dividebyzero(a, b):
 
 # make_allele_list: convert haplotype into sorted list of alleles
 def make_allele_list(haplotype, full_name_index_dict, num_of_alleles):
-    hl = haplotype.split('~')
+    hl = haplotype.split("~")
     # Remove g from the allele names
-    hl = list(map(lambda h: h[:-1] if (h[-1] == 'g') else h, hl))
-    sorted_h1 = ['0']*num_of_alleles
+    hl = list(map(lambda h: h[:-1] if (h[-1] == "g") else h, hl))
+    sorted_h1 = ["0"] * num_of_alleles
     for allele in hl:
-        locus = allele.split('*')[0]
-        sorted_h1[full_name_index_dict[locus] - 1 ] = allele
+        locus = allele.split("*")[0]
+        sorted_h1[full_name_index_dict[locus] - 1] = allele
     return sorted_h1
 
 
-
-
-
 # Find the index of a loci name in `FULL_LOCI`. ie. `BC` in `ABCRQ` is `[1,2]`
 def find_indices(FULL_LOCI, loci):
     return list(map(lambda x: indexOf(FULL_LOCI, x), loci))
@@ -76,11 +73,13 @@ def find_indices(FULL_LOCI, loci):
 
 # Given a list of indices find the corresponding loci combo
 def find_loci_name(FULL_LOCI, loci_indices):
-    return ''.join([FULL_LOCI[i] for i in sorted(loci_indices)])
+    return "".join([FULL_LOCI[i] for i in sorted(loci_indices)])
 
 
 # Find all the parents of the loci with loci_indices against loci_allele_list.
-def find_parents(FULL_LOCI, loci_indices, loci_allele_list, p, ParentLoci, top_nodes_plan_b):
+def find_parents(
+    FULL_LOCI, loci_indices, loci_allele_list, p, ParentLoci, top_nodes_plan_b
+):
     fi = find_indices(FULL_LOCI, FULL_LOCI)
     new_loci = set(fi).difference(loci_indices)
     parents = []
@@ -91,12 +90,13 @@ def find_parents(FULL_LOCI, loci_indices, loci_allele_list, p, ParentLoci, top_n
         loci_name = find_loci_name(FULL_LOCI, new_loci)
         if loci_name in top_nodes_plan_b:
             # print(loci_name)
-            haplotype = '~'.join([loci_allele_list[i] for i in sorted(new_loci)])
+            haplotype = "~".join([loci_allele_list[i] for i in sorted(new_loci)])
             parents.append(ParentLoci(loci=loci_name, haplotype=haplotype, p=p))
     return parents
 
+
 # Find the wanted nodes for plan A and plan B
-def labels_for_grap(conf, full_loci,csvdir):
+def labels_for_grap(conf, full_loci, csvdir):
 
     nodes_graph = conf.get("Plan_A_Matrix", [])
 
@@ -108,51 +108,56 @@ def labels_for_grap(conf, full_loci,csvdir):
         return list_all_combo, list_all_combo, list_all_combo, list_all_combo
 
     nodes_plan_a = []
-    nodes_plan_b = []# nodes from which edges to the parents came out in plan b
-    top_nodes_plan_b = []# nodes to which edges will reach in plan b or single locus without requested parents.
-
-
-
-    matrix_plan_b = conf.get("Plan_B_Matrix", [
-        [[1, 2, 3, 4, 5]],
-        [[1, 2, 3], [4, 5]],
-        [[1], [2, 3], [4, 5]],
-        [[1, 2, 3], [4], [5]],
-        [[1], [2, 3], [4], [5]],
-        [[1], [2], [3], [4], [5]]
-    ])[1]
+    nodes_plan_b = []  # nodes from which edges to the parents came out in plan b
+    top_nodes_plan_b = (
+        []
+    )  # nodes to which edges will reach in plan b or single locus without requested parents.
+
+    matrix_plan_b = conf.get(
+        "Plan_B_Matrix",
+        [
+            [[1, 2, 3, 4, 5]],
+            [[1, 2, 3], [4, 5]],
+            [[1], [2, 3], [4, 5]],
+            [[1, 2, 3], [4], [5]],
+            [[1], [2, 3], [4], [5]],
+            [[1], [2], [3], [4], [5]],
+        ],
+    )[1]
     # convert the first recombination event from loci indexes to loci names,
     # and add the event subcomponents to top_nodes_plan_b
     for subcomponent in matrix_plan_b:
         for idx, locus in enumerate(subcomponent):
             subcomponent[idx] = str(locus)
-        top_nodes_plan_b.append(('').join(subcomponent))
+        top_nodes_plan_b.append(("").join(subcomponent))
 
     list_complement = []
     full_loci_list = []
     full_loci_list[:0] = full_loci
     for node_label in nodes_graph:
         # add nodes of plan a
-        label = ''
+        label = ""
         list_label = []
         for locus_idx in node_label:
             label += str(locus_idx)
             list_label.append(str(locus_idx))
 
         nodes_plan_a.append(label)
-        complement_node = ('').join(sorted([item for item in full_loci_list if item not in list_label]))
-        if complement_node != '':
+        complement_node = ("").join(
+            sorted([item for item in full_loci_list if item not in list_label])
+        )
+        if complement_node != "":
             list_complement.append(complement_node)
         # find the sub-nodes of label for plan b
         for subcomponent in matrix_plan_b:
-            node = ''
+            node = ""
             for locus in label:
                 if locus in subcomponent:
                     node += locus
-            if node != '' and not node in top_nodes_plan_b:
+            if node != "" and not node in top_nodes_plan_b:
                 nodes_plan_b.append(node)
 
-    #add the full-locus to plan a nodes
+    # add the full-locus to plan a nodes
     if not full_loci in nodes_plan_a:
         nodes_plan_a.append(full_loci)
 
@@ -164,24 +169,29 @@ def labels_for_grap(conf, full_loci,csvdir):
 
     # add the complementary nodes if they are not in one of the graphs
     for node in list_complement:
-        if (not node in nodes_plan_a) and (not node in nodes_plan_b) and (not node in top_nodes_plan_b):
+        if (
+            (not node in nodes_plan_a)
+            and (not node in nodes_plan_b)
+            and (not node in top_nodes_plan_b)
+        ):
             top_nodes_plan_b.append(node)
 
     nodes_plan_b = list(set(nodes_plan_b))
     all_combo_list = list(dict.fromkeys(nodes_plan_a + nodes_plan_b + top_nodes_plan_b))
-    with open(csvdir + 'nodes_for_plan_a.txt', 'w') as f:
+    with open(csvdir + "nodes_for_plan_a.txt", "w") as f:
         for item in nodes_plan_a:
             f.write("%s\n" % item)
-    with open(csvdir + 'nodes_for_plan_b.txt', 'w') as f:
-        for item in (nodes_plan_b + top_nodes_plan_b):
+    with open(csvdir + "nodes_for_plan_b.txt", "w") as f:
+        for item in nodes_plan_b + top_nodes_plan_b:
             f.write("%s\n" % item)
-    #pickle.dump(nodes_plan_a, open(csvdir + '/nodes_for_plan_a.pkl', "wb"))
-    #pickle.dump(nodes_plan_b + top_nodes_plan_b, open(csvdir + '/nodes_for_plan_b.pkl', "wb"))
+    # pickle.dump(nodes_plan_a, open(csvdir + '/nodes_for_plan_a.pkl', "wb"))
+    # pickle.dump(nodes_plan_b + top_nodes_plan_b, open(csvdir + '/nodes_for_plan_b.pkl', "wb"))
 
     return all_combo_list, nodes_plan_a, nodes_plan_b, top_nodes_plan_b
 
-#create full loci order,
-#map beteen the loci neme and his index
+
+# create full loci order,
+# map beteen the loci neme and his index
 def loci_order(loc_values):
     full_name_index_dict = {}
     all_loci_set = set()
@@ -189,28 +199,32 @@ def loci_order(loc_values):
         full_name_index_dict[locus] = val
         all_loci_set.add(str(val))
 
-    FULL_LOCI = ''.join( sorted(all_loci_set))
+    FULL_LOCI = "".join(sorted(all_loci_set))
 
     return FULL_LOCI, full_name_index_dict
 
-def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop = None, em= False):
+
+def generate_graph(
+    config_file="../../conf/minimal-configuration.json", em_pop=None, em=False
+):
     ##############################################################################
     # Configure
     ##############################################################################
     # set output directory and create it if it doesn't exist
-    #csvdir = "output/csv"
-
-
+    # csvdir = "output/csv"
 
     # Input file
-    #freq_file = path  + freq_file
+    # freq_file = path  + freq_file
 
     parser = argparse.ArgumentParser()
-    parser.add_argument("-c", "--config",
-                        required=False,
-                        default= config_file,
-                        help="Configuration JSON file",
-                        type=str)
+    parser.add_argument(
+        "-c",
+        "--config",
+        required=False,
+        default=config_file,
+        help="Configuration JSON file",
+        type=str,
+    )
 
     args = parser.parse_args()
     configuration_file = args.config
@@ -221,59 +235,77 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
 
     csvdir = conf.get("graph_files_path")
     pathlib.Path(csvdir).mkdir(parents=True, exist_ok=True)
-    if csvdir[-1] != '/':
-        csvdir += '/'
+    if csvdir[-1] != "/":
+        csvdir += "/"
 
     pops = conf.get("populations")
     if em_pop:
         pops = em_pop
     freq_trim = conf.get("freq_trim_threshold")
 
-    freq_file =  conf.get("freq_file", "default" )
+    freq_file = conf.get("freq_file", "default")
     if freq_file == "default":
-        freq_file = os.path.dirname(os.path.realpath(__file__)) +  '/output/hpf.csv'
+        freq_file = os.path.dirname(os.path.realpath(__file__)) + "/output/hpf.csv"
     dict_count_of_pop = {}
 
-    pop_ratio_dir = conf.get("pops_count_file", os.path.dirname(os.path.realpath(__file__)) + '/imputation/graph_generation/output/pop_ratio.txt')
+    pop_ratio_dir = conf.get(
+        "pops_count_file",
+        os.path.dirname(os.path.realpath(__file__))
+        + "/imputation/graph_generation/output/pop_ratio.txt",
+    )
     path = pathlib.Path(pop_ratio_dir)
 
-
     if em or not path.is_file():
         for pop in pops:
-                dict_count_of_pop[pop] = freq_trim
+            dict_count_of_pop[pop] = freq_trim
     else:
         with open(pop_ratio_dir) as f_count:
             for line in f_count:
-                pop, count_pop, ratio = line.strip().split(',')
+                pop, count_pop, ratio = line.strip().split(",")
                 dict_count_of_pop[pop] = freq_trim / float(count_pop)
 
-
     # Display the configurations we are using
-    print('****************************************************************************************************')
+    print(
+        "****************************************************************************************************"
+    )
     print("Performing graph generation based on following configuration:")
     print("\tPopulation: {}".format(pops))
     print("\tFreq File: {}".format(freq_file))
     print("\tFreq Trim Threshold: {}".format(freq_trim))
-    print('****************************************************************************************************')
+    print(
+        "****************************************************************************************************"
+    )
 
     ##############################################################################
     # setup graph elements
     ##############################################################################
-    HaplotypeNode = namedtuple('HaplotypeNode', 'node_id, freq_list, allele_list, parents, top_links')
-    TopLink = namedtuple('TopLink', 'node_id, haplotype')
-    ParentLoci = namedtuple('ParentLoci', 'loci, haplotype, p')
+    HaplotypeNode = namedtuple(
+        "HaplotypeNode", "node_id, freq_list, allele_list, parents, top_links"
+    )
+    TopLink = namedtuple("TopLink", "node_id, haplotype")
+    ParentLoci = namedtuple("ParentLoci", "loci, haplotype, p")
 
-    header = [':START_ID(HAPLOTYPE)', ':END_ID(HAPLOTYPE)', 'CP:DOUBLE[]', ':TYPE']
+    header = [":START_ID(HAPLOTYPE)", ":END_ID(HAPLOTYPE)", "CP:DOUBLE[]", ":TYPE"]
 
     FULL_LOCI, full_name_index_dict = loci_order(conf.get("loci_map"))
 
-
-    all_combo_list, nodes_plan_a, nodes_plan_b, top_nodes_plan_b = labels_for_grap(conf, FULL_LOCI, csvdir)
+    all_combo_list, nodes_plan_a, nodes_plan_b, top_nodes_plan_b = labels_for_grap(
+        conf, FULL_LOCI, csvdir
+    )
     sequence = count(0)
 
-    loci_combo_map = {combo_list: defaultdict(lambda: HaplotypeNode(node_id=next(sequence),
-                                                                    freq_list=[], allele_list=[], parents=[],
-                                                                    top_links=set())) for combo_list in all_combo_list}
+    loci_combo_map = {
+        combo_list: defaultdict(
+            lambda: HaplotypeNode(
+                node_id=next(sequence),
+                freq_list=[],
+                allele_list=[],
+                parents=[],
+                top_links=set(),
+            )
+        )
+        for combo_list in all_combo_list
+    }
 
     # for key in loci_combo_map:
     #    print (key)
@@ -286,7 +318,7 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
     num_of_alleles = len(FULL_LOCI)
     with open(freq_file) as f:
         for hap_line in f:
-            haplotype, pop, freq = hap_line.split(',')
+            haplotype, pop, freq = hap_line.split(",")
             if haplotype == "hap":
                 continue
             freq = float(freq)
@@ -298,8 +330,8 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
                 continue
 
             hap_list = make_allele_list(haplotype, full_name_index_dict, num_of_alleles)
-            haplotype = '~'.join(hap_list)
-            pop_haplotype = pop + '-' + haplotype
+            haplotype = "~".join(hap_list)
+            pop_haplotype = pop + "-" + haplotype
             haplist_overall[haplotype] = 1
             pop_hap_combos[pop_haplotype] = freq
 
@@ -308,18 +340,19 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
         freqs = []
         for pop in pops:
             # check to see if population + haplotype combo exists
-            pop_haplotype = pop + '-' + haplotype
+            pop_haplotype = pop + "-" + haplotype
             if pop_haplotype in pop_hap_combos:
                 freqs.append(pop_hap_combos[pop_haplotype])
             else:
                 freqs.append(0)
         hap_list = make_allele_list(haplotype, full_name_index_dict, num_of_alleles)
-        loci_combo_map[FULL_LOCI][haplotype] = HaplotypeNode(node_id=next(sequence),
-                                                             freq_list=freqs,
-                                                             allele_list=hap_list,
-                                                             parents=None,
-                                                             top_links=set())
-
+        loci_combo_map[FULL_LOCI][haplotype] = HaplotypeNode(
+            node_id=next(sequence),
+            freq_list=freqs,
+            allele_list=hap_list,
+            parents=None,
+            top_links=set(),
+        )
 
     # list(loci_combo_map[FULL_LOCI].items())[99]
 
@@ -331,20 +364,31 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
             loci_indices = find_indices(FULL_LOCI, loci)
             for full_hap_name, full_hap_node in loci_combo_map[FULL_LOCI].items():
                 allele_list = full_hap_node.allele_list
-                haplotype = '~'.join([allele_list[i] for i in loci_indices])
+                haplotype = "~".join([allele_list[i] for i in loci_indices])
                 haplotype_node = loci_map[haplotype]
 
-                if loci in nodes_plan_b:# find parents just for internal nodes of plan B
-                    parents = find_parents(FULL_LOCI, loci_indices, allele_list, full_hap_node.freq_list, ParentLoci, top_nodes_plan_b)
+                if (
+                    loci in nodes_plan_b
+                ):  # find parents just for internal nodes of plan B
+                    parents = find_parents(
+                        FULL_LOCI,
+                        loci_indices,
+                        allele_list,
+                        full_hap_node.freq_list,
+                        ParentLoci,
+                        top_nodes_plan_b,
+                    )
                     parents_list = haplotype_node.parents
                     for parent in parents:
                         parents_list.append(parent)
                 else:
                     parents_list = []
 
-                if loci in nodes_plan_a:# top link just for nodes of plan A
+                if loci in nodes_plan_a:  # top link just for nodes of plan A
                     top_links = haplotype_node.top_links
-                    top_links.add(TopLink(node_id=full_hap_node.node_id, haplotype=full_hap_name))
+                    top_links.add(
+                        TopLink(node_id=full_hap_node.node_id, haplotype=full_hap_name)
+                    )
                 else:
                     top_links = None
 
@@ -357,33 +401,36 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
                     new_freq_list = haplotype_node.freq_list
                 freq_sum = list(map(add, new_freq_list, full_hap_node.freq_list))
 
-                new_node = HaplotypeNode(node_id=haplotype_node.node_id,
-                                         freq_list=freq_sum,
-                                         allele_list=None,
-                                         parents=parents_list,
-                                         top_links=top_links)
+                new_node = HaplotypeNode(
+                    node_id=haplotype_node.node_id,
+                    freq_list=freq_sum,
+                    allele_list=None,
+                    parents=parents_list,
+                    top_links=top_links,
+                )
 
                 loci_map[haplotype] = new_node
 
-
     # #### Build Nodes file
 
-    header = ['haplotypeId:ID(HAPLOTYPE)', 'name', 'loci:LABEL', 'frequency:DOUBLE[]']
+    header = ["haplotypeId:ID(HAPLOTYPE)", "name", "loci:LABEL", "frequency:DOUBLE[]"]
     node_file = csvdir + conf.get("node_csv_file")
-    with open(node_file, mode='w') as csvfile:
+    with open(node_file, mode="w") as csvfile:
         csv_writer = csv.writer(csvfile)
         csv_writer.writerow(header)
         for loci in all_combo_list:
             loci_map = loci_combo_map[loci]
             for haplotype, haplotype_node in loci_map.items():
-                freq_array = ';'.join(map(str, haplotype_node.freq_list))
-                csv_writer.writerow([haplotype_node.node_id, haplotype, loci, freq_array])
+                freq_array = ";".join(map(str, haplotype_node.freq_list))
+                csv_writer.writerow(
+                    [haplotype_node.node_id, haplotype, loci, freq_array]
+                )
 
     # #### Build Edges File
 
-    edgeheader = [':START_ID(HAPLOTYPE)', ':END_ID(HAPLOTYPE)', 'CP:DOUBLE[]', ':TYPE']
+    edgeheader = [":START_ID(HAPLOTYPE)", ":END_ID(HAPLOTYPE)", "CP:DOUBLE[]", ":TYPE"]
     edge_file = csvdir + conf.get("edges_csv_file")
-    with open(edge_file, mode='w') as csvfile:
+    with open(edge_file, mode="w") as csvfile:
         csv_writer = csv.writer(csvfile)
         csv_writer.writerow(edgeheader)
 
@@ -400,13 +447,15 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
                         loci_combo = parent.loci
                         hap = parent.haplotype
                         parent_id = loci_combo_map[loci_combo][hap].node_id
-                        csv_writer.writerow([haplotype_node.node_id, parent_id, prob_array, 'CP'])
+                        csv_writer.writerow(
+                            [haplotype_node.node_id, parent_id, prob_array, "CP"]
+                        )
 
     # #### Generate Top Links file
 
-    topheader = [':START_ID(HAPLOTYPE)', ':END_ID(HAPLOTYPE)', ':TYPE']
+    topheader = [":START_ID(HAPLOTYPE)", ":END_ID(HAPLOTYPE)", ":TYPE"]
     top_links_file = csvdir + conf.get("top_links_csv_file")
-    with open(top_links_file, mode='w') as csvfile:
+    with open(top_links_file, mode="w") as csvfile:
         csv_writer = csv.writer(csvfile)
         csv_writer.writerow(topheader)
         for loci in nodes_plan_a:
@@ -416,16 +465,23 @@ def generate_graph(config_file = "../../conf/minimal-configuration.json", em_pop
                 for haplotype, haplotype_node in list(loci_map.items()):
                     top_links = haplotype_node.top_links
                     for top_link in top_links:
-                        csv_writer.writerow([haplotype_node.node_id, top_link.node_id, 'TOP'])
+                        csv_writer.writerow(
+                            [haplotype_node.node_id, top_link.node_id, "TOP"]
+                        )
 
     # #### Generate Info Node file
 
-    infonode_header = ['INFO_NODE_ID:ID(INFO_NODE)', 'populations:STRING[]', 'INFO_NODE:LABEL']
+    infonode_header = [
+        "INFO_NODE_ID:ID(INFO_NODE)",
+        "populations:STRING[]",
+        "INFO_NODE:LABEL",
+    ]
     top_links_file = csvdir + conf.get("info_node_csv_file")
-    with open(top_links_file, mode='w') as csvfile:
+    with open(top_links_file, mode="w") as csvfile:
         csv_writer = csv.writer(csvfile)
         csv_writer.writerow(infonode_header)
         csv_writer.writerow([1, ";".join(pops), "INFO_NODE"])
 
-if __name__== "__main__":
+
+if __name__ == "__main__":
     generate_graph()
diff --git a/grim/imputation/graph_generation/generate_neo4j_single_hpf.py b/grim/imputation/graph_generation/generate_neo4j_single_hpf.py
index d806bcc..fa2df80 100755
--- a/grim/imputation/graph_generation/generate_neo4j_single_hpf.py
+++ b/grim/imputation/graph_generation/generate_neo4j_single_hpf.py
@@ -1,4 +1,3 @@
-
 # coding: utf-8
 
 # #### TODO
@@ -8,36 +7,39 @@
 # * ~~Remove ChainMap~~
 # * ~~Use namedtuples~~
 # * ~~Ignore 0 HF frequency from Input file~~
-# * Compare with Matlab output 
+# * Compare with Matlab output
 # * Document the code
 # * Extract a module
 #     * Accepts the list of FULL_LOCI and initial Freq. file
 
-from itertools import combinations, count 
+from itertools import combinations, count
 from collections import defaultdict, namedtuple
 from operator import indexOf, add, truediv
 import csv
 import os
 import pandas as pd
 
-os.makedirs('output/csv', exist_ok=True)
+os.makedirs("output/csv", exist_ok=True)
 
-HaplotypeNode = namedtuple('HaplotypeNode', 'node_id, freq_list, allele_list, parents, top_links')
-TopLink = namedtuple('TopLink', 'node_id, haplotype')
-ParentLoci = namedtuple('ParentLoci', 'loci, haplotype, p')
+HaplotypeNode = namedtuple(
+    "HaplotypeNode", "node_id, freq_list, allele_list, parents, top_links"
+)
+TopLink = namedtuple("TopLink", "node_id, haplotype")
+ParentLoci = namedtuple("ParentLoci", "loci, haplotype, p")
 
 
 # #### nCr: n Choose r
 
+
 def nCr(nchars, r):
-    return [''.join(x) for x in combinations(nchars, r)]
+    return ["".join(x) for x in combinations(nchars, r)]
 
 
-FULL_LOCI='ABCQR'
+FULL_LOCI = "ABCQR"
 
 
 all_combo_list = [FULL_LOCI]
-for i in range(len(FULL_LOCI) -1, 0, -1):
+for i in range(len(FULL_LOCI) - 1, 0, -1):
     all_combo_list.extend(nCr(FULL_LOCI, i))
 len(all_combo_list)
 
@@ -45,12 +47,18 @@ def nCr(nchars, r):
 sequence = count(0)
 
 
-loci_combo_map =  {combo_list: defaultdict(lambda : HaplotypeNode(node_id = next(sequence),
-                                                                  freq_list = [],
-                                                                  allele_list=[],
-                                                                  parents=[],
-                                                                  top_links=set())) 
-                   for combo_list in all_combo_list }
+loci_combo_map = {
+    combo_list: defaultdict(
+        lambda: HaplotypeNode(
+            node_id=next(sequence),
+            freq_list=[],
+            allele_list=[],
+            parents=[],
+            top_links=set(),
+        )
+    )
+    for combo_list in all_combo_list
+}
 
 
 # #### Verify
@@ -61,9 +69,9 @@ def nCr(nchars, r):
 
 
 def make_allele_list(haplotype):
-    hl = haplotype.split('~')
+    hl = haplotype.split("~")
     # Remove g from the allele names
-    hl = list(map(lambda h: h[:-1] if(h[-1] == 'g') else h, hl))
+    hl = list(map(lambda h: h[:-1] if (h[-1] == "g") else h, hl))
     hl.sort()
     return hl
 
@@ -75,28 +83,28 @@ def make_allele_list(haplotype):
 # #### Load Frequency file
 # Load the frequency file into the FULL_LOCI dictionary with the cleaned up haplotype as the key.
 
-haplist_overall = {} # list of haplotypes across all populations
+haplist_overall = {}  # list of haplotypes across all populations
 pop_hap_combos = {}
 pops = {}
 # pops = ['AAFA','AFB','AINDI','AISC','ALANAM','AMIND','CARB','CARHIS','CARIBI','FILII','KORI','JAPI','MENAFC',
 #         'NAMER','NCHI','SCAHIS','SCAMB','SCSEAI','VIET','AFA','API','CAU','HIS','NAM']
 # pops = ['VIET', 'KORI', 'JAPI', 'FILII']
 # pops = ['VIET', 'FILII']
-freqfile = 'output/hpf.csv'
+freqfile = "output/hpf.csv"
 with open(freqfile) as f:
     for hap_line in f:
-        haplotype, pop, freq = hap_line.split(',')
+        haplotype, pop, freq = hap_line.split(",")
         if haplotype == "hap":
             continue
         freq = float(freq)
         # Ignore lines with 0 freq
         if freq == 0.0:
             continue
-        #if freq < 0.0001: # REMOVE THESE TWO LINES TO GENERATE COMPLETE EDGE
+        # if freq < 0.0001: # REMOVE THESE TWO LINES TO GENERATE COMPLETE EDGE
         #    continue
         hap_list = make_allele_list(haplotype)
-        haplotype = '~'.join(hap_list)
-        pop_haplotype = pop + '-' + haplotype
+        haplotype = "~".join(hap_list)
+        pop_haplotype = pop + "-" + haplotype
         pops[pop] = 1
         haplist_overall[haplotype] = 1
         pop_hap_combos[pop_haplotype] = freq
@@ -106,17 +114,19 @@ def make_allele_list(haplotype):
     freqs = []
     for pop in pops:
         # check to see if population + haplotype combo exists
-        pop_haplotype = pop + '-' + haplotype
+        pop_haplotype = pop + "-" + haplotype
         if pop_haplotype in pop_hap_combos:
             freqs.append(pop_hap_combos[pop_haplotype])
         else:
             freqs.append(0)
-    hap_list = make_allele_list(haplotype)        
-    loci_combo_map[FULL_LOCI][haplotype] = HaplotypeNode(node_id = next(sequence),
-                                                             freq_list = freqs,
-                                                             allele_list = hap_list,
-                                                             parents = None,
-                                                             top_links= set())
+    hap_list = make_allele_list(haplotype)
+    loci_combo_map[FULL_LOCI][haplotype] = HaplotypeNode(
+        node_id=next(sequence),
+        freq_list=freqs,
+        allele_list=hap_list,
+        parents=None,
+        top_links=set(),
+    )
 
 
 # #### Verify
@@ -128,6 +138,7 @@ def make_allele_list(haplotype):
 
 # Find the index of a loci name in `FULL_LOCI`. ie. `BC` in `ABCRQ` is `[1,2]`
 
+
 def find_indices(loci):
     return list(map(lambda x: indexOf(FULL_LOCI, x), loci))
 
@@ -139,19 +150,21 @@ def find_indices(loci):
     print(find_indices(loci))
 
 
-find_indices('ABCQ')
+find_indices("ABCQ")
 
 
 # Given a list of indices find the corresponding loci combo
 
+
 def find_loci_name(loci_indices):
-    return ''.join([FULL_LOCI[i] for i in sorted(loci_indices)])
+    return "".join([FULL_LOCI[i] for i in sorted(loci_indices)])
+
 
+find_loci_name([1, 2])
 
-find_loci_name([1,2])
 
+# Find all the parents of the loci with loci_indices against loci_allele_list.
 
-# Find all the parents of the loci with loci_indices against loci_allele_list. 
 
 def find_parents(loci_indices, loci_allele_list, p):
     fi = find_indices(FULL_LOCI)
@@ -160,10 +173,10 @@ def find_parents(loci_indices, loci_allele_list, p):
     for i in list(new_loci):
         new_loci = loci_indices.copy()
         new_loci.append(i)
-        #print(new_loci)
+        # print(new_loci)
         loci_name = find_loci_name(new_loci)
-        #print(loci_name)
-        haplotype = '~'.join([loci_allele_list[i] for i in sorted(new_loci)])
+        # print(loci_name)
+        haplotype = "~".join([loci_allele_list[i] for i in sorted(new_loci)])
         parents.append(ParentLoci(loci=loci_name, haplotype=haplotype, p=p))
     return parents
 
@@ -172,7 +185,7 @@ def find_parents(loci_indices, loci_allele_list, p):
 # Find parents of a random haplotype from C
 
 # haplotype = 'A*01:01~B*15:02~C*01:02~DQB1*03:01~DRB1*12:01'
-haplotype = 'A*24:02~B*52:01~C*12:02~DQB1*06:01~DRB1*15:02'
+haplotype = "A*24:02~B*52:01~C*12:02~DQB1*06:01~DRB1*15:02"
 # haplotype = 'A*01:01~B*15:01~C*01:02~DQB1*05:01~DRB1*01:01'
 
 
@@ -182,21 +195,29 @@ def find_parents(loci_indices, loci_allele_list, p):
 # For each loci block, get all the allele combinations. Create parents link, and accumulate the freqs to create new Haplotype Nodes.
 
 for loci in all_combo_list:
-    if loci != FULL_LOCI: # FULL_LOCI is already in the dictionary
+    if loci != FULL_LOCI:  # FULL_LOCI is already in the dictionary
         loci_map = loci_combo_map[loci]
         loci_indices = find_indices(loci)
-        for full_haplotype_name, full_haplotype_node in loci_combo_map[FULL_LOCI].items():
+        for full_haplotype_name, full_haplotype_node in loci_combo_map[
+            FULL_LOCI
+        ].items():
             allele_list = full_haplotype_node.allele_list
-            haplotype = '~'.join([allele_list[i] for i in loci_indices])
-            haplotype_node = loci_map[haplotype]            
-            
-            parents = find_parents(loci_indices, allele_list, full_haplotype_node.freq_list)
+            haplotype = "~".join([allele_list[i] for i in loci_indices])
+            haplotype_node = loci_map[haplotype]
+
+            parents = find_parents(
+                loci_indices, allele_list, full_haplotype_node.freq_list
+            )
             parents_list = haplotype_node.parents
             for parent in parents:
                 parents_list.append(parent)
-                
+
             top_links = haplotype_node.top_links
-            top_links.add(TopLink(node_id=full_haplotype_node.node_id, haplotype=full_haplotype_name))
+            top_links.add(
+                TopLink(
+                    node_id=full_haplotype_node.node_id, haplotype=full_haplotype_name
+                )
+            )
 
             # have to make freq_list with all zeros otherwise map add function returns empty list
             new_freq_list = []
@@ -205,19 +226,21 @@ def find_parents(loci_indices, loci_allele_list, p):
                     new_freq_list.append(0)
             else:
                 new_freq_list = haplotype_node.freq_list
-            freq_sum = list(map(add,new_freq_list,full_haplotype_node.freq_list))
+            freq_sum = list(map(add, new_freq_list, full_haplotype_node.freq_list))
 
-            new_node = HaplotypeNode(node_id=haplotype_node.node_id,
-                                    freq_list=freq_sum,
-                                    allele_list=None,
-                                    parents=parents_list,
-                                    top_links=top_links)
+            new_node = HaplotypeNode(
+                node_id=haplotype_node.node_id,
+                freq_list=freq_sum,
+                allele_list=None,
+                parents=parents_list,
+                top_links=top_links,
+            )
 
             loci_map[haplotype] = new_node
 
 
-haplotype="A*24:02~B*52:01~C*12:02~DQB1*06:01"
-loci_combo_map['ABCQ'][haplotype]
+haplotype = "A*24:02~B*52:01~C*12:02~DQB1*06:01"
+loci_combo_map["ABCQ"][haplotype]
 # haplotype="A*24:02~B*52:01~C*12:02~DQB1*06:01~DRB1*15:02"
 # loci_combo_map[FULL_LOCI][haplotype]
 # haplotype="A*24:02"
@@ -230,7 +253,7 @@ def find_parents(loci_indices, loci_allele_list, p):
 for loci in all_combo_list:
     print(loci)
     loci_map = loci_combo_map[loci]
-    for k,v in list(loci_map.items())[1:10]:
+    for k, v in list(loci_map.items())[1:10]:
         print(k, v.node_id, v.freq_list, sep="\t")
 
 
@@ -243,32 +266,34 @@ def find_parents(loci_indices, loci_allele_list, p):
 
 # #### Build Nodes file
 
-header = ['haplotypeId:ID(HAPLOTYPE)', 'name', 'loci:LABEL', 'frequency:DOUBLE[]']
-node_file = 'output/csv/nodes.csv'
-with open(node_file, mode='w') as csvfile:
+header = ["haplotypeId:ID(HAPLOTYPE)", "name", "loci:LABEL", "frequency:DOUBLE[]"]
+node_file = "output/csv/nodes.csv"
+with open(node_file, mode="w") as csvfile:
     csv_writer = csv.writer(csvfile)
     csv_writer.writerow(header)
     for loci in all_combo_list:
         loci_map = loci_combo_map[loci]
         for haplotype, haplotype_node in loci_map.items():
-            freq_array = ';'.join(map(str,haplotype_node.freq_list))
+            freq_array = ";".join(map(str, haplotype_node.freq_list))
             csv_writer.writerow([haplotype_node.node_id, haplotype, loci, freq_array])
 
 
 # #### Build Edges File
 
+
 def dividebyzero(a, b):
     if b == 0:
         return 0
-    else: 
-        return a/b
+    else:
+        return a / b
+
 
 # header = [':START_ID(HAPLOTYPE)', ':END_ID(HAPLOTYPE)', 'CP:DOUBLE[]', ':TYPE']
-# 
+#
 # with open(edge_file, mode='w') as csvfile:
 #     csv_writer = csv.writer(csvfile)
 #     csv_writer.writerow(header)
-edge_file = 'output/csv/edges.csv'
+edge_file = "output/csv/edges.csv"
 fq_l = list()
 for loci in all_combo_list:
     if loci != FULL_LOCI:
@@ -283,38 +308,47 @@ def dividebyzero(a, b):
                 loci_combo = parent.loci
                 hap = parent.haplotype
                 parent_id = loci_combo_map[loci_combo][hap].node_id
-                l = list([haplotype_node.node_id, parent_id, prob_array, 'CP'])
+                l = list([haplotype_node.node_id, parent_id, prob_array, "CP"])
                 fq_l.extend([l])
 
 freq_df = pd.DataFrame(fq_l, columns=["HapNode", "ParentID", "P", "CP"])
-ndf = freq_df.join(freq_df.groupby(['HapNode', 'ParentID'])['P'].sum(), on=['HapNode', 'ParentID'], rsuffix='_r')
-df2 = ndf[['HapNode', 'ParentID', 'P_r', 'CP']]
+ndf = freq_df.join(
+    freq_df.groupby(["HapNode", "ParentID"])["P"].sum(),
+    on=["HapNode", "ParentID"],
+    rsuffix="_r",
+)
+df2 = ndf[["HapNode", "ParentID", "P_r", "CP"]]
 df2 = df2.drop_duplicates()
-df2.columns = [':START_ID(HAPLOTYPE)', ':END_ID(HAPLOTYPE)', 'CP:DOUBLE[]', ':TYPE']
-df2.to_csv(edge_file, index=False, header=True, line_terminator='\n')
+df2.columns = [":START_ID(HAPLOTYPE)", ":END_ID(HAPLOTYPE)", "CP:DOUBLE[]", ":TYPE"]
+df2.to_csv(edge_file, index=False, header=True, line_terminator="\n")
 
 # # #### Generate Top Links file
-header = [':START_ID(HAPLOTYPE)', ':END_ID(HAPLOTYPE)', ':TYPE']
-top_links_file = 'output/csv/top_links.csv'
-with open(top_links_file, mode='w') as csvfile:
+header = [":START_ID(HAPLOTYPE)", ":END_ID(HAPLOTYPE)", ":TYPE"]
+top_links_file = "output/csv/top_links.csv"
+with open(top_links_file, mode="w") as csvfile:
     csv_writer = csv.writer(csvfile)
     csv_writer.writerow(header)
     for loci in all_combo_list:
-        if loci != FULL_LOCI: # FULL_LOCI is already in the dictionary
-            # 
+        if loci != FULL_LOCI:  # FULL_LOCI is already in the dictionary
+            #
             loci_map = loci_combo_map[loci]
             for haplotype, haplotype_node in list(loci_map.items()):
                 top_links = haplotype_node.top_links
                 for top_link in top_links:
-                    csv_writer.writerow([haplotype_node.node_id, top_link.node_id, 'TOP'])
+                    csv_writer.writerow(
+                        [haplotype_node.node_id, top_link.node_id, "TOP"]
+                    )
 
 # #### Generate Info Node file
 
-pops = ['CAU']
-infonode_header = ['INFO_NODE_ID:ID(INFO_NODE)', 'populations:STRING[]', 'INFO_NODE:LABEL']
-top_links_file = 'output/csv/info_node.csv'
-with open(top_links_file, mode='w') as csvfile:
+pops = ["CAU"]
+infonode_header = [
+    "INFO_NODE_ID:ID(INFO_NODE)",
+    "populations:STRING[]",
+    "INFO_NODE:LABEL",
+]
+top_links_file = "output/csv/info_node.csv"
+with open(top_links_file, mode="w") as csvfile:
     csv_writer = csv.writer(csvfile)
     csv_writer.writerow(infonode_header)
     csv_writer.writerow([1, ";".join(pops), "INFO_NODE"])
-
diff --git a/grim/imputation/graph_generation/nemo_to_hpf_csv.py b/grim/imputation/graph_generation/nemo_to_hpf_csv.py
index d276429..4ff233d 100644
--- a/grim/imputation/graph_generation/nemo_to_hpf_csv.py
+++ b/grim/imputation/graph_generation/nemo_to_hpf_csv.py
@@ -8,11 +8,14 @@
 output_dir = "output/"
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-c", "--config",
-                    required=False,
-                    default="../../conf/minimal-configuration.json",
-                    help="Configuration JSON file",
-                    type=str)
+parser.add_argument(
+    "-c",
+    "--config",
+    required=False,
+    default="../../conf/minimal-configuration.json",
+    help="Configuration JSON file",
+    type=str,
+)
 
 args = parser.parse_args()
 configuration_file = args.config
@@ -23,21 +26,27 @@
 
 pops = conf.get("populations")
 freq_data_dir = project_dir + conf.get("freq_data_dir")
-pop_ratio_dir = project_dir + conf.get("pops_count_file", 'imputation/graph_generation/output/pop_ratio.txt')
+pop_ratio_dir = project_dir + conf.get(
+    "pops_count_file", "imputation/graph_generation/output/pop_ratio.txt"
+)
 
 # Output in HaplotypePopulationFrequency (hpf) csv file
-hpf_file = output_dir + 'hpf.csv'
+hpf_file = output_dir + "hpf.csv"
 
 # Create output directory if it doesn't exist
 pathlib.Path(output_dir).mkdir(parents=False, exist_ok=True)
 
 # Display the configurations we are using
-print('****************************************************************************************************')
+print(
+    "****************************************************************************************************"
+)
 print("Conversion to HPF file based on following configuration:")
 print("\tPopulation: {}".format(pops))
 print("\tFrequency File Directory: {}".format(freq_data_dir))
 print("\tOutput File: {}".format(hpf_file))
-print('****************************************************************************************************')
+print(
+    "****************************************************************************************************"
+)
 
 haplist_overall = {}  # list of haplotypes across all populations
 pop_hap_combos = {}
@@ -45,13 +54,13 @@
 list_pop_count = []
 #### Load initial frequency files
 for pop in pops:
-    freq_file = freq_data_dir + pop + '.freqs.gz'
+    freq_file = freq_data_dir + pop + ".freqs.gz"
     print("Reading Frequency File:\t {}".format(freq_file))
-    with gzip.open(freq_file, 'rb') as zf:
+    with gzip.open(freq_file, "rb") as zf:
         count_pop = 0
-        lines = [x.decode('utf8').strip() for x in zf.readlines()]
+        lines = [x.decode("utf8").strip() for x in zf.readlines()]
         for hap_line in lines:
-            haplotype, count, freq = hap_line.split(',')
+            haplotype, count, freq = hap_line.split(",")
             if haplotype == "Haplo":
                 continue
             freq = float(freq)
@@ -59,7 +68,7 @@
             if freq == 0.0:
                 continue
 
-            pop_haplotype = pop + '-' + haplotype
+            pop_haplotype = pop + "-" + haplotype
             haplist_overall[haplotype] = 1
             pop_hap_combos[pop_haplotype] = freq
 
@@ -67,18 +76,17 @@
         list_pop_count.append(count_pop)
 
 sum_pops = sum(list_pop_count)
-pop_ratio_file = open(pop_ratio_dir, 'w')
-for pop,ratio in zip(pops, list_pop_count):
-    pop_ratio_file.write("{},{},{}\n".format(pop, ratio,(ratio/sum_pops)))
+pop_ratio_file = open(pop_ratio_dir, "w")
+for pop, ratio in zip(pops, list_pop_count):
+    pop_ratio_file.write("{},{},{}\n".format(pop, ratio, (ratio / sum_pops)))
 
 
-header = ['hap', 'pop', 'freq']
-
+header = ["hap", "pop", "freq"]
 
 
 print("Writing hpf File:\t {}".format(hpf_file))
-with open(hpf_file, mode='w') as csv_file:
-    csv_writer = csv.writer(csv_file, delimiter=',', quoting=csv.QUOTE_NONE)
+with open(hpf_file, mode="w") as csv_file:
+    csv_writer = csv.writer(csv_file, delimiter=",", quoting=csv.QUOTE_NONE)
     csv_writer.writerow(header)
     for pop_haplotype in pop_hap_combos:
         (pop, haplotype) = pop_haplotype.split("-")
diff --git a/grim/imputation/graph_generation/wmda_download.py b/grim/imputation/graph_generation/wmda_download.py
index 4b7f98d..9f6ac34 100755
--- a/grim/imputation/graph_generation/wmda_download.py
+++ b/grim/imputation/graph_generation/wmda_download.py
@@ -1,4 +1,4 @@
-'''
+"""
 The WMDA data is part of the supplemental materials from the following
 article.
 
@@ -14,24 +14,24 @@
 (CC BY-NC-ND 4.0) license.
 
 https://creativecommons.org/licenses/by-nc-nd/4.0/
-'''
+"""
 
 import os.path
 import urllib.request
 from tarfile import TarFile
 
-url='https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5089599/bin/TAN-87-439-s006.tgz'
-file_name='TAN-87-439-s006.tgz'
+url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5089599/bin/TAN-87-439-s006.tgz"
+file_name = "TAN-87-439-s006.tgz"
 
 # download file
 if os.path.isfile(file_name):
-    print('Skipping download step.  File found: ', file_name)
+    print("Skipping download step.  File found: ", file_name)
 else:
-    print('Downloading file', file_name, '...')
+    print("Downloading file", file_name, "...")
     urllib.request.urlretrieve(url, file_name)
 
 # unpack it
-print('Unpacking file ...')
+print("Unpacking file ...")
 TarFile.open(file_name).extractall(path="./data/wmda")
 
-print('Done.')
+print("Done.")
diff --git a/grim/imputation/graph_generation/wmda_to_hpf_csv.py b/grim/imputation/graph_generation/wmda_to_hpf_csv.py
index f84be78..49efa31 100755
--- a/grim/imputation/graph_generation/wmda_to_hpf_csv.py
+++ b/grim/imputation/graph_generation/wmda_to_hpf_csv.py
@@ -1,17 +1,18 @@
 import os
 import pandas as pd
-wmda_orig = pd.read_csv('data/wmda/freqs.txt', sep=';', names = ['hap', 'freq'])
+
+wmda_orig = pd.read_csv("data/wmda/freqs.txt", sep=";", names=["hap", "freq"])
 
 # in case we want to change to read a gz version
-#wmda_orig = pd.read_csv('data/wmda/freqs.txt.gz', sep=';', compression='gzip', names = ['hap', 'freq'])
+# wmda_orig = pd.read_csv('data/wmda/freqs.txt.gz', sep=';', compression='gzip', names = ['hap', 'freq'])
 
 project_dir = "../../"
 
-wmda_orig['pop'] = 'CAU'
-wmda = wmda_orig[['hap', 'pop', 'freq']]
+wmda_orig["pop"] = "CAU"
+wmda = wmda_orig[["hap", "pop", "freq"]]
 
-os.makedirs('output', exist_ok=True)
-pop_ratio_dir = project_dir +  'imputation/graph_generation/output/pop_ratio.txt'
-pop_ratio_file = open(pop_ratio_dir, 'w')
-pop_ratio_file.write("{},{},{}\n".format('CAU', 20/(1e-5),1))
-wmda.to_csv('output/hpf.csv',index=False)
+os.makedirs("output", exist_ok=True)
+pop_ratio_dir = project_dir + "imputation/graph_generation/output/pop_ratio.txt"
+pop_ratio_file = open(pop_ratio_dir, "w")
+pop_ratio_file.write("{},{},{}\n".format("CAU", 20 / (1e-5), 1))
+wmda.to_csv("output/hpf.csv", index=False)
diff --git a/grim/imputation/imputegl/cypher_plan_b.py b/grim/imputation/imputegl/cypher_plan_b.py
index e68738a..b576e46 100755
--- a/grim/imputation/imputegl/cypher_plan_b.py
+++ b/grim/imputation/imputegl/cypher_plan_b.py
@@ -1,21 +1,17 @@
-
-
 class CypherQueryPlanB(object):
-    '''
+    """
     classdocs
-    '''
+    """
 
-    def __init__(self, loci_map):#, indexes):
-        '''
+    def __init__(self, loci_map):  # , indexes):
+        """
         Constructor
-        '''
+        """
 
         self.loc_map = loci_map
-        #self.index_dict =  indexes
+        # self.index_dict =  indexes
 
-
-
-    def indexToLocus(self,indexes):
+    def indexToLocus(self, indexes):
         loci = []
 
         # Sort if the indexes are not sorted
@@ -27,7 +23,7 @@ def indexToLocus(self,indexes):
 
         return loci
 
-    def findTypeFromIndexes(self,indexes):
+    def findTypeFromIndexes(self, indexes):
         full_form_loci = self.indexToLocus(indexes)
         return self.findFullFormType(full_form_loci)
 
@@ -36,12 +32,11 @@ def findFullFormType(self, full_form_loci):
         return type
 
     def getLocus(self, allele):
-        """ Get the locus back from any given allele
-        """
+        """Get the locus back from any given allele"""
         loc_allele = allele.split("*")
         return self.loc_map[loc_allele[0]]
 
     def findLinkType(self, typing):
         loci = sorted([self.getLocus(a) for a in typing[0].split("~")])
         loci_lc = "".join([allele for allele in loci])
-        return loci_lc
\ No newline at end of file
+        return loci_lc
diff --git a/grim/imputation/imputegl/cypher_query.py b/grim/imputation/imputegl/cypher_query.py
index 83c112a..c9a14ad 100755
--- a/grim/imputation/imputegl/cypher_query.py
+++ b/grim/imputation/imputegl/cypher_query.py
@@ -5,24 +5,23 @@
 
 
 class CypherQuery(object):
-    '''
+    """
     classdocs
-    '''
+    """
 
-    def __init__(self, loc_map, verbose: bool=False):
-        '''
+    def __init__(self, loc_map, verbose: bool = False):
+        """
         Constructor
-        '''
+        """
         self.verbose = verbose
         self.logger = logging.getLogger("Logger." + __name__)
-        #haplo = "A*02:01~C*03:03~B*15:01~DRB1*11:01~DQB1*03:01"
+        # haplo = "A*02:01~C*03:03~B*15:01~DRB1*11:01~DQB1*03:01"
         self.loc_map = loc_map
         for locus, val in self.loc_map.items():
             self.loc_map[locus] = str(val)
 
     def getLocus(self, allele):
-        """ Get the locus back from any given allele
-        """
+        """Get the locus back from any given allele"""
         loc_allele = allele.split("*")
         return self.loc_map[loc_allele[0]]
 
diff --git a/grim/imputation/imputegl/impute.py b/grim/imputation/imputegl/impute.py
index 28bc299..ef9b696 100644
--- a/grim/imputation/imputegl/impute.py
+++ b/grim/imputation/imputegl/impute.py
@@ -18,11 +18,12 @@
 
 def chunks(l, n):
     for i in range(0, len(l), n):
-        yield l[i:i+n]
+        yield l[i : i + n]
 
-def write_best_prob(name_gl, res, probs, numOfResult, fout, sign = ","):
+
+def write_best_prob(name_gl, res, probs, numOfResult, fout, sign=","):
     sumProbsDict = defaultdict(list)
-    #loop over the result and sum the prob by populations/haplotype
+    # loop over the result and sum the prob by populations/haplotype
     for k in range(len(res)):
         key = res[k][0] + sign + res[k][1]
         if key in sumProbsDict:
@@ -42,54 +43,79 @@ def write_best_prob(name_gl, res, probs, numOfResult, fout, sign = ","):
 
     multProbs.sort(key=lambda x: x[0], reverse=True)
 
-    #write the output to file
+    # write the output to file
     minBestResult = min(numOfResult, len(multProbs))
     for k in range(minBestResult):
-         fout.write(name_gl + ',' + str(multProbs[k][1][0]) + ',' +
-                    str(multProbs[k][0]) + ',' + str(k) + '\n')
+        fout.write(
+            name_gl
+            + ","
+            + str(multProbs[k][1][0])
+            + ","
+            + str(multProbs[k][0])
+            + ","
+            + str(k)
+            + "\n"
+        )
 
 
 def write_best_prob_genotype(name_gl, res, numOfResult, fout):
-     sorted_by_value = sorted(res.items(), key=lambda kv: kv[1], reverse=True)
+    sorted_by_value = sorted(res.items(), key=lambda kv: kv[1], reverse=True)
 
-        # write the output to file
-     minBestResult = min(numOfResult, len(sorted_by_value))
-     for k in range(minBestResult):
-        fout.write(name_gl + ',' + str(sorted_by_value[k][0]) + ',' +
-                str(sorted_by_value[k][1]) + ',' + str(k) + '\n')
+    # write the output to file
+    minBestResult = min(numOfResult, len(sorted_by_value))
+    for k in range(minBestResult):
+        fout.write(
+            name_gl
+            + ","
+            + str(sorted_by_value[k][0])
+            + ","
+            + str(sorted_by_value[k][1])
+            + ","
+            + str(k)
+            + "\n"
+        )
 
 
 def write_best_hap_race_pairs(name_gl, haps, pops, probs, numOfResult, fout):
     all_res = []
 
     for i in range(len(probs)):
-        pair = haps[i][0] + ';' + pops[i][0] + ',' + haps[i][1] + ';' + pops[i][1]
+        pair = haps[i][0] + ";" + pops[i][0] + "," + haps[i][1] + ";" + pops[i][1]
         all_res.append([probs[i], pair])
     all_res.sort(key=lambda x: x[0], reverse=True)
 
     # write the output to file
     minBestResult = min(numOfResult, len(all_res))
     for k in range(minBestResult):
-        fout.write(name_gl + ',' + str(all_res[k][1]) + ',' +
-                   str(all_res[k][0]) + ',' + str(k) + '\n')
+        fout.write(
+            name_gl
+            + ","
+            + str(all_res[k][1])
+            + ","
+            + str(all_res[k][0])
+            + ","
+            + str(k)
+            + "\n"
+        )
+
 
 #
 # given a gl string, remove g,L and UUUUs from it
 #
 def clean_up_gl(gl):
-    gl = gl.replace('g', '')
+    gl = gl.replace("g", "")
     # keep nulls
     # gl = gl.replace('N', '')
-    gl = gl.replace('L', '')
+    gl = gl.replace("L", "")
     unknowns = []
     locus_gl = gl.split("^")
     for j in range(len(locus_gl)):
         gen = locus_gl[j]
-        if not gen.strip('UUUU') == gen:
+        if not gen.strip("UUUU") == gen:
             unknowns.append(gen)
     for miss in unknowns:
         locus_gl.remove(miss)
-    return '^'.join(locus_gl)
+    return "^".join(locus_gl)
 
 
 class Imputation(object):
@@ -106,7 +132,7 @@ def __init__(self, net=None,config=None,  count_by_prob=None, verbose=False):
         # graph is a link to the graph and populations is the list of populations
         self.logger = logging.getLogger("Logger." + __name__)
         self.verbose = verbose
-    #    self.graph = graph
+        #    self.graph = graph
         if not config is None:
 
             # can not let populations be set on construction
@@ -118,30 +144,30 @@ def __init__(self, net=None,config=None,  count_by_prob=None, verbose=False):
             self.unk_priors = config["UNK_priors"]
 
             # For plan b
-            #self.full_loci = config["full_loci"]
-            #loc_values = list(set(config["loci_map"].values()))
-            #loc_values.sort()
-            #indxes_loc_dict = {}
-            #indxes_loc_dict_planb = {}
-            #for i, loc in enumerate(loc_values):
-                #indxes_loc_dict[loc] = i+1
-                #indxes_loc_dict_planb[i+1] = loc
+            # self.full_loci = config["full_loci"]
+            # loc_values = list(set(config["loci_map"].values()))
+            # loc_values.sort()
+            # indxes_loc_dict = {}
+            # indxes_loc_dict_planb = {}
+            # for i, loc in enumerate(loc_values):
+            # indxes_loc_dict[loc] = i+1
+            # indxes_loc_dict_planb[i+1] = loc
             self.full_hapl = []
-            #self.index_dict = {}
+            # self.index_dict = {}
 
-            #for loci in config["loci_map"]:
-                #self.full_hapl.append(loci)
-                #self.index_dict[loci] = indxes_loc_dict[config["loci_map"][loci]]
+            # for loci in config["loci_map"]:
+            # self.full_hapl.append(loci)
+            # self.index_dict[loci] = indxes_loc_dict[config["loci_map"][loci]]
 
             loc_values = config["loci_map"]
             #####################
             indxes_loc_dict_planb = {}
             self.index_dict = {}
-            #short_name_index_dict = {}
+            # short_name_index_dict = {}
             for locus, val in loc_values.items():
                 self.full_hapl.append(locus)
-                self.index_dict[locus] = val#full_name_index_dict
-                #indxes_loc_dict_planb[val[1]] = val[0] #indxes_loc_short_name_dict
+                self.index_dict[locus] = val  # full_name_index_dict
+                # indxes_loc_dict_planb[val[1]] = val[0] #indxes_loc_short_name_dict
 
             self.full_loci = config["full_loci"]
 
@@ -150,7 +176,9 @@ def __init__(self, net=None,config=None,  count_by_prob=None, verbose=False):
             self.factor = 0.0001
             self._factor_missing_data = config["factor_missing_data"]
             self.cypher = CypherQuery(config["loci_map"])
-            self.cypher_plan_b = CypherQueryPlanB(config["loci_map"])#, indxes_loc_dict_planb)
+            self.cypher_plan_b = CypherQueryPlanB(
+                config["loci_map"]
+            )  # , indxes_loc_dict_planb)
 
             self.matrix_planb = config["matrix_planb"]
 
@@ -158,14 +186,14 @@ def __init__(self, net=None,config=None,  count_by_prob=None, verbose=False):
                 self.count_by_prob = np.ones(len(self.populations))
                 if config["use_pops_count_file"]:
                     with open(config["pops_count_file"]) as f_count:
-                        for i,line in enumerate(f_count):
-                            self.count_by_prob[i] = float(line.strip().split(',')[2])
+                        for i, line in enumerate(f_count):
+                            self.count_by_prob[i] = float(line.strip().split(",")[2])
             else:
                 self.count_by_prob = count_by_prob
             self.number_of_options_threshold = config["number_of_options_threshold"]
 
             # The following is not reliable in parallel mode.
-            self.plan = 'a'
+            self.plan = "a"
             self.option_1 = 0
             self.option_2 = 0
             self.haplotypes_number_in_phase = config["max_haplotypes_number_in_phase"]
@@ -173,14 +201,21 @@ def __init__(self, net=None,config=None,  count_by_prob=None, verbose=False):
             self.nodes_for_plan_A = config["nodes_for_plan_A"]
 
     def print_options_count(self, subject_id):
-        message = "Subject: {id} plan: {plan}, open_phases - count of open regular option: {option1}, " \
-                  "count of alternative opening: {option2} "
-        print(message.format(id=subject_id, plan=self.plan,
-                             option1=self.option_1, option2=self.option_2))
+        message = (
+            "Subject: {id} plan: {plan}, open_phases - count of open regular option: {option1}, "
+            "count of alternative opening: {option2} "
+        )
+        print(
+            message.format(
+                id=subject_id,
+                plan=self.plan,
+                option1=self.option_1,
+                option2=self.option_2,
+            )
+        )
 
     def power_find(self, n):
-        """produces all powers of 2
-        """
+        """produces all powers of 2"""
         result = []
         binary = bin(n)[:1:-1]
         for x in range(len(binary)):
@@ -192,31 +227,31 @@ def gl2haps(self, GL_String):
         # Receives a GL string adn produces a genotype in list structure
         if GL_String == "" or GL_String == " ":
             return []
-        split_hap = GL_String.split('^')
+        split_hap = GL_String.split("^")
         N_Loci = len(split_hap)
         t1 = []
         t2 = []
         count = 0
         for i in range(N_Loci):
-            #if not split_hap[i]=='+':
-                if split_hap[i][0] == "+":
-                    split_hap[i] = split_hap[i][1:]
-                curr_locus = split_hap[i].split('+')
-                if len(curr_locus) == 1:
-                    if curr_locus == ['']:
-                        count = count+1
-                        continue
-                    else:
-                        return []
-                t1.append(curr_locus[0])
-                t2.append(curr_locus[1])
+            # if not split_hap[i]=='+':
+            if split_hap[i][0] == "+":
+                split_hap[i] = split_hap[i][1:]
+            curr_locus = split_hap[i].split("+")
+            if len(curr_locus) == 1:
+                if curr_locus == [""]:
+                    count = count + 1
+                    continue
+                else:
+                    return []
+            t1.append(curr_locus[0])
+            t2.append(curr_locus[1])
         for i in range(count):
-            split_hap.remove('')
-            N_Loci = N_Loci-1
+            split_hap.remove("")
+            N_Loci = N_Loci - 1
         Gen = [sorted(t1), sorted(t2)]
-        return {'Genotype': Gen, 'N_Loc': N_Loci}
+        return {"Genotype": Gen, "N_Loc": N_Loci}
 
-    def gen_phases(self, gen, n_loci, b_phases ):
+    def gen_phases(self, gen, n_loci, b_phases):
         # haplotypes in gen is expected to be sorted
         # Generates all phases, but does not handle locus ambiguities
         if b_phases is not None:
@@ -255,13 +290,13 @@ def open_gl_string(self, gl_string):
             return
 
         # generate the 2^(N-1) phases
-        chr1 = self.gen_phases(chr['Genotype'], chr['N_Loc'])
+        chr1 = self.gen_phases(chr["Genotype"], chr["N_Loc"])
 
         # return if the result is empty (why would that be?)
         if chr1 == []:
             return
 
-        phases = self.open_phases_1(chr1, chr['N_Loc'], gl_string)
+        phases = self.open_phases_1(chr1, chr["N_Loc"], gl_string)
         return phases
 
     def open_phases_1(self, haps, N_Loc, gl_string):
@@ -278,14 +313,14 @@ def open_phases_1(self, haps, N_Loc, gl_string):
 
                 # compute the number of options:
                 options = 1
-                for i in range(N_Loc): options = options * (len(hap_list[0][i].split("/")))
+                for i in range(N_Loc):
+                    options = options * (len(hap_list[0][i].split("/")))
 
                 # if the number of options is smaller than the total number of nodes:
-                if options < 100: \
-                        # open ambiguities regularly:
+                if options < 100:  # open ambiguities regularly:
                     for i in range(N_Loc):
                         hap_list = self.open_ambiguities(hap_list, i)
-                    if (k == 0):
+                    if k == 0:
                         H1.append(hap_list)
                     else:
                         H2.append(hap_list)
@@ -296,15 +331,32 @@ def open_phases_1(self, haps, N_Loc, gl_string):
                 phases.append([H1, H2])
         return phases
 
+    def open_ambiguities(self, hap, loc):
+        # This opens all allele ambiguities
+        hap_new = []  # produces an empty list of haplotypes
+        for k in range(len(hap)):  # slit a given locus in all haps.
+            splitHap = hap[k][loc].split("/")
+            if splitHap == hap[k][loc].split():
+                split_loc = hap[k][loc].split("|")
+            else:
+                split_loc = splitHap
+            hap1 = hap[k]
+            if len(split_loc) > 1:
+                for i in range(len(split_loc)):
+                    hap1[loc] = split_loc[i]
+                    hap_new.append(hap1[:])
+            else:
+                hap_new.append(hap1[:])
+        return hap_new
 
     def comp_hap_prob(self, Hap, N_Loc, epsilon, n):
         haplo_probs = self.get_haplo_freqs(Hap, epsilon, n)
         probs = list(haplo_probs.values())
-        #probs=haplo_probs.values()
+        # probs=haplo_probs.values()
         haplos = list(haplo_probs.keys())
         if not haplo_probs:
-            return {'Haps': '', 'Probs': ''}
-        return {'Haps': haplos, 'Probs': probs}
+            return {"Haps": "", "Probs": ""}
+        return {"Haps": haplos, "Probs": probs}
 
     # get_haplo_freqs
     # given a set of possible haplotypes, look them up
@@ -338,9 +390,9 @@ def comp_hap_prob(self, Hap, N_Loc, epsilon, n):
     #     return haplo_probs
 
     def get_haplo_freqs(self, haplos, epsilon, n=25000):
-        haplos_joined = ["~".join(item) for item in haplos[0]] ###
-        #haplos_joined = [item for sublist in haplos for item in sublist]  ###
-        #haplos_joined = ["~".join(sorted(item)) for sublist in haplos for item in sublist]
+        haplos_joined = ["~".join(item) for sublist in haplos for item in sublist]  ###
+        # haplos_joined = [item for sublist in haplos for item in sublist]  ###
+        # haplos_joined = ["~".join(sorted(item)) for sublist in haplos for item in sublist]
         return self.netGraph.adjs_query(haplos_joined)
 
 
@@ -358,17 +410,17 @@ def cal_prob(self, probs1, probs2, epsilon):
         # This is the part where we loop over all race combinations.
         # N*N loop, where N is the number of races/
         places = []
-        #print ("cal_prob\n\n")
-        #print ("probs1 = ", probs1)
-        #print ("probs2 = ", probs2)
+        # print ("cal_prob\n\n")
+        # print ("probs1 = ", probs1)
+        # print ("probs2 = ", probs2)
         for i in range(len(probs1)):
             for j in range(len(probs2)):
-                if probs1[i]*probs2[j] >= epsilon and probs1[i]*probs2[j] != 0:
+                if probs1[i] * probs2[j] >= epsilon and probs1[i] * probs2[j] != 0:
                     places.append([i, j])
         # places are the indices of the positions in the population array.
         return places
 
-    #move to one dim sorted list, and remove prob=0
+    # move to one dim sorted list, and remove prob=0
     def convert_list_to_one_dim(self, prob):
         ProbWithIndexes = []
 
@@ -376,53 +428,100 @@ def convert_list_to_one_dim(self, prob):
         for k in range(len(prob)):
             for j in range(len(prob[k])):
                 if prob[k][j] > 0:
-                    prob_with_indexes_by_prior.append([(prob[k][j] * self.priorMatrix[j][j]), [prob[k][j], [k, j]]])
+                    prob_with_indexes_by_prior.append(
+                        [(prob[k][j] * self.priorMatrix[j][j]), [prob[k][j], [k, j]]]
+                    )
         prob_with_indexes_by_prior.sort(key=lambda x: x[0], reverse=True)
 
-        minBestResult = min(self.haplotypes_number_in_phase, len(prob_with_indexes_by_prior))
+        minBestResult = min(
+            self.haplotypes_number_in_phase, len(prob_with_indexes_by_prior)
+        )
         for k in range(minBestResult):
             ProbWithIndexes.append(prob_with_indexes_by_prior[k][1])
 
         return ProbWithIndexes
 
-    def calc_haps_pairs(self, Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsilon, hap_total, pop_res, maxProb, geno_seen):
+    def calc_haps_pairs(
+        self,
+        Haps1,
+        Haps2,
+        Prob1WithIndexes,
+        Prob2WithIndexes,
+        epsilon,
+        hap_total,
+        pop_res,
+        maxProb,
+        geno_seen,
+    ):
 
         for h in range(len(Prob1WithIndexes)):
             x = epsilon / Prob1WithIndexes[h][0]
-            x_homozygote = x*2
+            x_homozygote = x * 2
             prob2Len = len(Prob2WithIndexes)
             #  k=0
             # while  k < prob2Len and Prob2WithIndexe s[k][0] >= x:
             for k in range(prob2Len):
                 if Prob2WithIndexes[k][0] >= x:
-                    if self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]] > 0:
+                    if (
+                        self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                            Prob2WithIndexes[k][1][1]
+                        ]
+                        > 0
+                    ):
                         hap1 = Haps1[Prob1WithIndexes[h][1][0]]
                         hap2 = Haps2[Prob2WithIndexes[k][1][0]]
-                        if (hap1 != hap2 and (self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]] *
-                            Prob2WithIndexes[k][0]) >= x) or (hap1==hap2 and (self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]] *
-                            Prob2WithIndexes[k][0]) >= x_homozygote):
-
-                            #hap1_list = sorted(hap1.split('~'))
-                            #hap2_list = sorted(hap2.split('~'))
-                            #genotype = ("~".join(sorted(hap1.split("~") + hap2.split("~"))))
-                            #if hap1_list[-1].split("*")[0] == hap2_list[-1].split("*")[0]:
-                            genotype = "^".join(map(lambda x: "+".join(sorted(x)),
-                                                    zip(sorted(hap1.split('~')),
-                                                        sorted(hap2.split('~')))))
+                        if (
+                            hap1 != hap2
+                            and (
+                                self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                                    Prob2WithIndexes[k][1][1]
+                                ]
+                                * Prob2WithIndexes[k][0]
+                            )
+                            >= x
+                        ) or (
+                            hap1 == hap2
+                            and (
+                                self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                                    Prob2WithIndexes[k][1][1]
+                                ]
+                                * Prob2WithIndexes[k][0]
+                            )
+                            >= x_homozygote
+                        ):
+
+                            # hap1_list = sorted(hap1.split('~'))
+                            # hap2_list = sorted(hap2.split('~'))
+                            # genotype = ("~".join(sorted(hap1.split("~") + hap2.split("~"))))
+                            # if hap1_list[-1].split("*")[0] == hap2_list[-1].split("*")[0]:
+                            genotype = "^".join(
+                                map(
+                                    lambda x: "+".join(sorted(x)),
+                                    zip(
+                                        sorted(hap1.split("~")), sorted(hap2.split("~"))
+                                    ),
+                                )
+                            )
                             race1 = self.populations[Prob1WithIndexes[h][1][1]]
                             race2 = self.populations[Prob2WithIndexes[k][1][1]]
 
-                            h1_id = hap1 + ',' +race1
-                            h2_id = hap2 + ',' + race2
-                            geno_id = ("-".join(sorted([h1_id, h2_id])))
-                            #geno_id = tuple(sorted([h1_id, h2_id]))
+                            h1_id = hap1 + "," + race1
+                            h2_id = hap2 + "," + race2
+                            geno_id = "-".join(sorted([h1_id, h2_id]))
+                            # geno_id = tuple(sorted([h1_id, h2_id]))
                             if geno_id not in geno_seen:
                                 geno_seen.add(geno_id)
 
-                                prob = Prob1WithIndexes[h][0] * Prob2WithIndexes[k][0] *self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]]
+                                prob = (
+                                    Prob1WithIndexes[h][0]
+                                    * Prob2WithIndexes[k][0]
+                                    * self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                                        Prob2WithIndexes[k][1][1]
+                                    ]
+                                )
 
                                 if hap1 != hap2:
-                                    prob = prob*2
+                                    prob = prob * 2
 
                                 if prob > maxProb:
                                     maxProb = prob
@@ -433,10 +532,9 @@ def calc_haps_pairs(self, Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsi
                                 else:
                                     hap_total[genotype] = prob
 
-
                                 races = sorted([race1, race2])
 
-                                races = races[0] + ',' + races[1]
+                                races = races[0] + "," + races[1]
 
                                 if races in pop_res:
                                     sumProb = pop_res[races] + prob
@@ -449,9 +547,21 @@ def calc_haps_pairs(self, Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsi
 
         return maxProb
 
-
-
-    def calc_haps_pairs_haplotype(self, Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsilon, hap_total, pop_res, maxProb, haps_pairs, geno_seen, pop_res_haplo, p_total):
+    def calc_haps_pairs_haplotype(
+        self,
+        Haps1,
+        Haps2,
+        Prob1WithIndexes,
+        Prob2WithIndexes,
+        epsilon,
+        hap_total,
+        pop_res,
+        maxProb,
+        haps_pairs,
+        geno_seen,
+        pop_res_haplo,
+        p_total,
+    ):
 
         for h in range(len(Prob1WithIndexes)):
             x = epsilon / Prob1WithIndexes[h][0]
@@ -461,33 +571,60 @@ def calc_haps_pairs_haplotype(self, Haps1, Haps2, Prob1WithIndexes, Prob2WithInd
             # while  k < prob2Len and Prob2WithIndexe s[k][0] >= x:
             for k in range(prob2Len):
                 if Prob2WithIndexes[k][0] >= x:
-                    if self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]] > 0:
+                    if (
+                        self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                            Prob2WithIndexes[k][1][1]
+                        ]
+                        > 0
+                    ):
                         hap1 = Haps1[Prob1WithIndexes[h][1][0]]
                         hap2 = Haps2[Prob2WithIndexes[k][1][0]]
-                        if (hap1 != hap2 and (self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]] *
-                                              Prob2WithIndexes[k][0]) >= x) or ( hap1 ==hap2 and
-                                (self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]] *
-                                 Prob2WithIndexes[k][0]) >= x_homozygote):
-
-                            #hap1_list = sorted(hap1.split('~'))
-                            #hap2_list = sorted(hap2.split('~'))
+                        if (
+                            hap1 != hap2
+                            and (
+                                self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                                    Prob2WithIndexes[k][1][1]
+                                ]
+                                * Prob2WithIndexes[k][0]
+                            )
+                            >= x
+                        ) or (
+                            hap1 == hap2
+                            and (
+                                self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                                    Prob2WithIndexes[k][1][1]
+                                ]
+                                * Prob2WithIndexes[k][0]
+                            )
+                            >= x_homozygote
+                        ):
+
+                            # hap1_list = sorted(hap1.split('~'))
+                            # hap2_list = sorted(hap2.split('~'))
                             # genotype = ("~".join(sorted(hap1.split("~") + hap2.split("~"))))
-                            #if hap1_list[-1].split("*")[0] == hap2_list[-1].split("*")[0]:
-                            genotype = ("~".join(sorted(hap1.split("~") + hap2.split("~"))))
+                            # if hap1_list[-1].split("*")[0] == hap2_list[-1].split("*")[0]:
+                            genotype = "~".join(
+                                sorted(hap1.split("~") + hap2.split("~"))
+                            )
                             race1 = self.populations[Prob1WithIndexes[h][1][1]]
                             race2 = self.populations[Prob2WithIndexes[k][1][1]]
 
-                            h1_id = hap1 + ',' + race1
-                            h2_id = hap2 + ',' + race2
-                            geno_id = ("-".join(sorted([h1_id, h2_id])))
+                            h1_id = hap1 + "," + race1
+                            h2_id = hap2 + "," + race2
+                            geno_id = "-".join(sorted([h1_id, h2_id]))
                             # geno_id = tuple(sorted([h1_id, h2_id]))
-                            #if len(geno_seen) > 25000000:
-                                #k = 1 / 0
+                            # if len(geno_seen) > 25000000:
+                            # k = 1 / 0
                             if geno_id not in geno_seen:
                                 geno_seen.add(geno_id)
 
-                                prob = Prob1WithIndexes[h][0] * Prob2WithIndexes[k][0] * \
-                                       self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]]
+                                prob = (
+                                    Prob1WithIndexes[h][0]
+                                    * Prob2WithIndexes[k][0]
+                                    * self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                                        Prob2WithIndexes[k][1][1]
+                                    ]
+                                )
 
                                 if hap1 != hap2:
                                     prob = prob * 2
@@ -503,7 +640,7 @@ def calc_haps_pairs_haplotype(self, Haps1, Haps2, Prob1WithIndexes, Prob2WithInd
 
                                 races = sorted([race1, race2])
 
-                                races = races[0] + ',' + races[1]
+                                races = races[0] + "," + races[1]
 
                                 if races in pop_res:
                                     sumProb = pop_res[races] + prob
@@ -535,27 +672,44 @@ def comp_phase_prob_haplotype(self, phases, N_Loc, epsilon, n):
         for i in range(len(phases)):
             P1 = self.comp_hap_prob(phases[i][0], N_Loc, epsilon, n)
             # This will open locus ambiguities and comp probabilities for Hap1
-            Haps1 = P1['Haps']
-            Prob1 = P1['Probs']
+            Haps1 = P1["Haps"]
+            Prob1 = P1["Probs"]
             # if the first haplotype returned something then look at 2nd
-            if len(Prob1)>0:
+            if len(Prob1) > 0:
                 P2 = self.comp_hap_prob(phases[i][1], N_Loc, epsilon, n)
                 # This will do the same for Hap 2;
-                Haps2 = P2['Haps']
-                Prob2 = P2['Probs']
+                Haps2 = P2["Haps"]
+                Prob2 = P2["Probs"]
 
             Prob1WithIndexes = self.convert_list_to_one_dim(Prob1)
 
             Prob2WithIndexes = self.convert_list_to_one_dim(Prob2)
 
-            maxProb = self.calc_haps_pairs_haplotype(Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsilon, genotypes_total,
-                                          pop_res, maxProb, hap_total, geno_seen, pop_res_haplo, p_total)
+            maxProb = self.calc_haps_pairs_haplotype(
+                Haps1,
+                Haps2,
+                Prob1WithIndexes,
+                Prob2WithIndexes,
+                epsilon,
+                genotypes_total,
+                pop_res,
+                maxProb,
+                hap_total,
+                geno_seen,
+                pop_res_haplo,
+                p_total,
+            )
 
         # p_total returns an array of N*N (N is number of populations), hap_total - pairs of haplotypes.
         # pop_res are the names of the populations
-        #return {'MaxProb': maxProb, 'Haps': hap_total, 'Pops': pop_res}
+        # return {'MaxProb': maxProb, 'Haps': hap_total, 'Pops': pop_res}
 
-        return {'MaxProb': maxProb,'Haps': hap_total, 'Probs': p_total, 'Pops': pop_res_haplo}
+        return {
+            "MaxProb": maxProb,
+            "Haps": hap_total,
+            "Probs": p_total,
+            "Pops": pop_res_haplo,
+        }
 
     def comp_phase_prob_genotype(self, phases, N_Loc, epsilon, n):
         # receives a list of phases and computes haps and
@@ -570,27 +724,34 @@ def comp_phase_prob_genotype(self, phases, N_Loc, epsilon, n):
         for i in range(len(phases)):
             P1 = self.comp_hap_prob(phases[i][0], N_Loc, epsilon, n)
             # This will open locus ambiguities and comp probabilities for Hap1
-            Haps1 = P1['Haps']
-            Prob1 = P1['Probs']
+            Haps1 = P1["Haps"]
+            Prob1 = P1["Probs"]
             # if the first haplotype returned something then look at 2nd
-            if len(Prob1)>0:
+            if len(Prob1) > 0:
                 P2 = self.comp_hap_prob(phases[i][1], N_Loc, epsilon, n)
                 # This will do the same for Hap 2;
-                Haps2 = P2['Haps']
-                Prob2 = P2['Probs']
+                Haps2 = P2["Haps"]
+                Prob2 = P2["Probs"]
 
             Prob1WithIndexes = self.convert_list_to_one_dim(Prob1)
 
             Prob2WithIndexes = self.convert_list_to_one_dim(Prob2)
 
-            maxProb = self.calc_haps_pairs(Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsilon, hap_total,
-                                          pop_res, maxProb, geno_seen)
+            maxProb = self.calc_haps_pairs(
+                Haps1,
+                Haps2,
+                Prob1WithIndexes,
+                Prob2WithIndexes,
+                epsilon,
+                hap_total,
+                pop_res,
+                maxProb,
+                geno_seen,
+            )
 
         # p_total returns an array of N*N (N is number of populations), hap_total - pairs of haplotypes.
         # pop_res are the names of the populations
-        return {'MaxProb': maxProb, 'Haps': hap_total, 'Pops': pop_res}
-
-
+        return {"MaxProb": maxProb, "Haps": hap_total, "Pops": pop_res}
 
     def comp_phase_prob(self, phases, N_Loc, epsilon, n):
         # receives a list of phases and computes haps and
@@ -603,38 +764,52 @@ def comp_phase_prob(self, phases, N_Loc, epsilon, n):
         for i in range(len(phases)):
             P1 = self.comp_hap_prob(phases[i][0], N_Loc, epsilon, n)
             # This will open locus ambiguities and comp probabilities for Hap1
-            Haps1 = P1['Haps']
-            Prob1 = P1['Probs']
+            Haps1 = P1["Haps"]
+            Prob1 = P1["Probs"]
             # if the first haplotype returned something then look at 2nd
-            if len(Prob1)>0:
+            if len(Prob1) > 0:
                 P2 = self.comp_hap_prob(phases[i][1], N_Loc, epsilon, n)
                 # This will do the same for Hap 2;
-                Haps2 = P2['Haps']
-                Prob2 = P2['Probs']
+                Haps2 = P2["Haps"]
+                Prob2 = P2["Probs"]
 
             Prob1WithIndexes = self.convert_list_to_one_dim(Prob1)
 
             Prob2WithIndexes = self.convert_list_to_one_dim(Prob2)
 
-         #   for h in range(len(Prob1)):
-         #       for k in range(len(Prob2)):
+            #   for h in range(len(Prob1)):
+            #       for k in range(len(Prob2)):
             for h in range(len(Prob1WithIndexes)):
-               x = epsilon/Prob1WithIndexes[h][0]
-               prob2Len = len(Prob2WithIndexes)
-             #  k=0
-              # while  k < prob2Len and Prob2WithIndexes[k][0] >= x:
-               for k in range(prob2Len):
-                   if Prob2WithIndexes[k][0] >= x:
-                       if self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]] > 0:
-                           if self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]] > 0:
-                               if (self.priorMatrix[Prob1WithIndexes[h][1][1]][Prob2WithIndexes[k][1][1]] *
-                                   Prob2WithIndexes[k][0]) >= x:
-                                # NOTE: there is a bug in places
-                              #  places = self.cal_prob(Prob1[h], Prob2[k], epsilon)
-                                #print ("places= ", places)
-                              #  for i in range(len(places)):
-                                #Prob1WithIndexes[h][0]
-                                   # p=(places[i])
+                x = epsilon / Prob1WithIndexes[h][0]
+                prob2Len = len(Prob2WithIndexes)
+                #  k=0
+                # while  k < prob2Len and Prob2WithIndexes[k][0] >= x:
+                for k in range(prob2Len):
+                    if Prob2WithIndexes[k][0] >= x:
+                        if (
+                            self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                                Prob2WithIndexes[k][1][1]
+                            ]
+                            > 0
+                        ):
+                            if (
+                                self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                                    Prob2WithIndexes[k][1][1]
+                                ]
+                                > 0
+                            ):
+                                if (
+                                    self.priorMatrix[Prob1WithIndexes[h][1][1]][
+                                        Prob2WithIndexes[k][1][1]
+                                    ]
+                                    * Prob2WithIndexes[k][0]
+                                ) >= x:
+                                    # NOTE: there is a bug in places
+                                    #  places = self.cal_prob(Prob1[h], Prob2[k], epsilon)
+                                    # print ("places= ", places)
+                                    #  for i in range(len(places)):
+                                    # Prob1WithIndexes[h][0]
+                                    # p=(places[i])
                                     # avoid reporting the same haplotype pair more than once
                                     #
                                     # crashing at the next line
@@ -645,10 +820,16 @@ def comp_phase_prob(self, phases, N_Loc, epsilon, n):
                                     # the population list to be the same as the graph
                                     #
                                     #
-                                    #print ("p[0] = ", p[0])
-                                    #print ("p[1] = ", p[1])
-                                    h1_id = (Haps1[Prob1WithIndexes[h][1][0]], self.populations[Prob1WithIndexes[h][1][1]])
-                                    h2_id = (Haps2[Prob2WithIndexes[k][1][0]], self.populations[Prob2WithIndexes[k][1][1]])
+                                    # print ("p[0] = ", p[0])
+                                    # print ("p[1] = ", p[1])
+                                    h1_id = (
+                                        Haps1[Prob1WithIndexes[h][1][0]],
+                                        self.populations[Prob1WithIndexes[h][1][1]],
+                                    )
+                                    h2_id = (
+                                        Haps2[Prob2WithIndexes[k][1][0]],
+                                        self.populations[Prob2WithIndexes[k][1][1]],
+                                    )
                                     geno_id = tuple(sorted([h1_id, h2_id]))
                                     if geno_id not in geno_seen:
                                         geno_seen.add(geno_id)
@@ -660,15 +841,25 @@ def comp_phase_prob(self, phases, N_Loc, epsilon, n):
                                         # should be this:
                                         # D000001,A*02:01~B*44:03~C*02:02~DQB1*02:01~DRB1*07:01,0.00006000,CAU,A*11:01~B*13:02~C*06:02~DQB1*05:01~DRB1*01:01,0.00009000,CAU
                                         if geno_id[0] == h1_id:
-                                            p_total.append([Prob1WithIndexes[h][0], Prob2WithIndexes[k][0]])
+                                            p_total.append(
+                                                [
+                                                    Prob1WithIndexes[h][0],
+                                                    Prob2WithIndexes[k][0],
+                                                ]
+                                            )
                                         else:
-                                            p_total.append([Prob2WithIndexes[k][0],Prob1WithIndexes[h][0]])
-                         #   k+=1
-                   else:
-                       break
+                                            p_total.append(
+                                                [
+                                                    Prob2WithIndexes[k][0],
+                                                    Prob1WithIndexes[h][0],
+                                                ]
+                                            )
+                        #   k+=1
+                    else:
+                        break
         # p_total returns an array of N*N (N is number of populations), hap_total - pairs of haplotypes.
         # pop_res are the names of the populations
-        return {'Haps': hap_total, 'Probs': p_total,'Pops': pop_res}
+        return {"Haps": hap_total, "Probs": p_total, "Pops": pop_res}
 
     def reduce_phase_to_valid_allels(self, haps, N_Loc, planc=False):
         for j in range(len(haps)):
@@ -677,44 +868,49 @@ def reduce_phase_to_valid_allels(self, haps, N_Loc, planc=False):
                 hap_list.append(haps[j][k])
 
                 options = 1
-                for i in range(N_Loc): options = options * (len(hap_list[0][i].split("/")))
+                for i in range(N_Loc):
+                    options = options * (len(hap_list[0][i].split("/")))
                 if options >= self.number_of_options_threshold or planc:
                     for hap_k in hap_list:
                         for i, g in enumerate(hap_k):
-                            gen = g.split('/')
+                            gen = g.split("/")
                             probs = self.check_if_alleles_exist(gen)
                             if not probs == {}:
-                                haps[j][k][i] = '/'.join(list(probs.keys()))
-
+                                list(probs.keys())
+                                haps[j][k][i] = ("/").join(list(probs.keys()))
 
-    def reduce_phase_to_commons_alleles(self, haps, N_Loc, commons_number=1, planc = False ):
+    def reduce_phase_to_commons_alleles(
+        self, haps, N_Loc, commons_number=1, planc=False
+    ):
         for j in range(len(haps)):
             for k in range(2):
                 hap_list = []
                 hap_list.append(haps[j][k])
 
                 options = 1
-                for i in range(N_Loc): options = options * (len(hap_list[0][i].split("/")))
-                if options>=self.number_of_options_threshold or planc:
+                for i in range(N_Loc):
+                    options = options * (len(hap_list[0][i].split("/")))
+                if options >= self.number_of_options_threshold or planc:
                     for hap_k in hap_list:
-                        for i,g in enumerate(hap_k):
-                            gen = g.split('/')
+                        for i, g in enumerate(hap_k):
+                            gen = g.split("/")
                             probs = self.check_if_alleles_exist(gen)
                             if not probs == {}:
                                 dict_allels = {}
                                 for allele in probs:
                                     list_allel = probs[allele]
                                     sum = 0
-                                    for p,prob in enumerate(list_allel):
-                                        sum += prob * self.priorMatrix[p,p]
+                                    for p, prob in enumerate(list_allel):
+                                        sum += prob * self.priorMatrix[p, p]
                                     dict_allels[allele] = sum
-                                commons_alleles = dict(sorted(dict_allels.items(), key=lambda kv: kv[1], reverse=True)[:commons_number])
-                                haps[j][k][i] = ('/').join(list(commons_alleles.keys()))
-
-
-
-
-
+                                commons_alleles = dict(
+                                    sorted(
+                                        dict_allels.items(),
+                                        key=lambda kv: kv[1],
+                                        reverse=True,
+                                    )[:commons_number]
+                                )
+                                haps[j][k][i] = ("/").join(list(commons_alleles.keys()))
 
     def open_phases(self, haps, N_Loc, gl_string):
         phases = []
@@ -731,14 +927,13 @@ def open_phases(self, haps, N_Loc, gl_string):
                 # compute the number of options:
                 options = 1
                 for i in range(N_Loc):
-                    options *= len(hap_list_splits[i])
+                    options = options * (len(hap_list[0][i].split("/")))
 
                 # if the number of options is smaller than the total number of nodes:
                 if options < self.number_of_options_threshold:
                     # open ambiguities regularly:
                     for i in range(N_Loc):
-                        hap_list = self.open_ambiguities(hap_list, i, hap_list_splits[i])
-
+                        hap_list = self.open_ambiguities(hap_list, i)
                     if k == 0:
                         H1.append(hap_list)
                     else:
@@ -749,21 +944,21 @@ def open_phases(self, haps, N_Loc, gl_string):
                 # if there are more options than actual haplotypes possible:
                 else:
                     self.option_2 += 1
-                    optionDict = {} #set()
+                    optionDict = {}
                     if len(fq) == 0:
-                        _list = []
+                        list = []
                         for (gen, name) in self.cypher.loc_map.items():
                             count = 0
                             for i in range(len(hap_list[0])):
-                                if hap_list[0][i].split("*", 1)[0] == gen:
+                                if hap_list[0][i].split("*")[0] == gen:
                                     count = count + 1
                             if count > 0:
-                                _list.append(name)
+                                list.append(name)
                         # we'll get all the options possible
-                        # (query,lc)=self.cypher.buildQuery(["~".join(_list)])
+                        # (query,lc)=self.cypher.buildQuery(["~".join(list)])
 
                         # fq = pa.DataFrame(self.graph.data(query))
-                        label = "".join(sorted(_list))
+                        label = "".join(sorted(list))
                         fq = self.netGraph.haps_by_label(label)
                         # fq = pa.DataFrame(self.netGraph.abcdq_allele(), )
                         # fq = self.netGraph.abcdq_allele()
@@ -775,14 +970,28 @@ def open_phases(self, haps, N_Loc, gl_string):
                                 gen=g.split('/')
                                 for option in gen:
                                     optionDict[option]=True"""
-
-                    for gen in hap_list_splits:
-                        for option in gen:
-                            optionDict[option] = True
-                            # optionDict.add(option)
+                    for hap_k in hap_list:
+                        # listType = []
+                        for g in hap_k:
+                            gen = g.split("/")
+                            for option in gen:
+                                optionDict[option] = True
+                        # print(listType)
                     ##all_haps = fq.values.tolist()
-                    hap_list = create_hap_list(fq, optionDict, N_Loc)
-
+                    all_haps = fq
+                    hap_list = []
+                    for i in range(len(all_haps)):
+                        count = 0
+                        for gen in all_haps[i].split("~"):
+                            if not gen in optionDict:
+                                break
+                            else:
+                                count = count + 1
+                        if count == N_Loc:
+
+                            # all_haps[i][0]=all_haps[i][0].split("~")
+                            # hap_list.append(all_haps[i][0])
+                            hap_list.append(all_haps[i].split("~"))
                     if k == 0:
                         H1.append(hap_list)
                     else:
@@ -791,15 +1000,13 @@ def open_phases(self, haps, N_Loc, gl_string):
                 phases.append([H1, H2])
         return phases
 
+    def find_type_loci(self, loci):
+        return (loci.split("*"))[0]
 
+    def find_missing_indexes(self, haplo):
 
-    def find_type_loci(self,loci):
-        return  (loci.split("*"))[0]
-
-    def find_missing_indexes(self,haplo):
-
-        locus_in_haplo =[]
-        index=[]
+        locus_in_haplo = []
+        index = []
         for locus in haplo:
             locus_in_haplo.append(self.index_dict[self.find_type_loci(locus)])
         for locus in self.full_hapl:
@@ -814,34 +1021,34 @@ def open_dict_data(self, haplo_probs):
         probs = list(haplo_probs.values())
         haplos = list(haplo_probs.keys())
         if not haplo_probs:
-            return {'Haps': '', 'Probs': ''}
-        return {'Haps': haplos, 'Probs': probs}
+            return {"Haps": "", "Probs": ""}
+        return {"Haps": haplos, "Probs": probs}
 
     def create_haplos_string(self, haplos, division, missing):
-            string_haplos = []
-            wanted_haplos = haplos[0]
-            # Go over all the hap
-            for hap in wanted_haplos:
-                # The first item
-                hap_string = ""
-                # Create the wanted string
-                for i in range(0, len(division)):
-                    place = division[i]
-                    # If this is not a missing option
-                    if division[i] not in missing:
-                        # Normalize the places
-                        for miss in missing:
-                            if division[i] > miss:
-                                place = place - 1
-
-                        # [division[i]]- to get the index for the item in haplox we want
-                        if hap_string == "":
-                            hap_string = str(hap[place - 1])
-                        else:
-                            hap_string = hap_string + '~' + str(hap[place - 1])
-                if not hap_string == "":
-                    string_haplos.append(hap_string)
-            return string_haplos
+        string_haplos = []
+        wanted_haplos = haplos[0]
+        # Go over all the hap
+        for hap in wanted_haplos:
+            # The first item
+            hap_string = ""
+            # Create the wanted string
+            for i in range(0, len(division)):
+                place = division[i]
+                # If this is not a missing option
+                if division[i] not in missing:
+                    # Normalize the places
+                    for miss in missing:
+                        if division[i] > miss:
+                            place = place - 1
+
+                    # [division[i]]- to get the index for the item in haplox we want
+                    if hap_string == "":
+                        hap_string = str(hap[place - 1])
+                    else:
+                        hap_string = hap_string + "~" + str(hap[place - 1])
+            if not hap_string == "":
+                string_haplos.append(hap_string)
+        return string_haplos
 
     def open_option_(self, dict2, dict1, planc=False, num_of_options=10):
         # Will contain the two option combine
@@ -869,118 +1076,125 @@ def open_option_(self, dict2, dict1, planc=False, num_of_options=10):
                 freq2 = dict2[key2]
                 list_prob = [freq1[i] * freq2[i] * self.factor for i in range(size)]
                 if max(list_prob) > 0:
-                    key = ('~').join(sorted(key1.split('~') + key2.split('~')))
+                    key = ("~").join(sorted(key1.split("~") + key2.split("~")))
                     dict_all[key] = list_prob
         return dict_all
 
     # Find the freq of option from the matrix
     def find_option_freq(self, option, haplos, missing):
-            #one = True
-            # Here we calculate the  freq of first the first division of the option
-            # The first division
-            division = option[0]
-
-            # The string of the haplos
-            string_option = self.create_haplos_string(haplos, division, missing)
-
-            # Find the dict of the haplos and their freqs
-            dict_all = self.get_haplo_freqs_pan_b(string_option, division)
-
-            # If we found the freq of the first optin- if no-no need to continue
-            if dict_all != {}:
-                # Go over all the parts of the haplo according to option
-                # Skip the first options
-                for i in range(1, len(option)):
-                    # Create the string of the option
-                    # Example: option [1,2,3] will get :A*01:01~B*07:02~C*07:02
-                    # Example: option [5] will get DRB1*13:02
-                    division = option[i]
-                    string_option = self.create_haplos_string(haplos, division, missing)
-                    if string_option == []:
-                        t = 0
-                    div_dict = self.get_haplo_freqs_pan_b(string_option, division)
-
-                    # If this part of the option is empty-the option is not good
-                    if div_dict == {}:
-                        result = all(elem in missing for elem in division)
-                        if result:# and one:#len(division) == 1 and len(missing) == 1 and division[0] == missing[0]:
-                            type = self.cypher_plan_b.findTypeFromIndexes(division)
-                            div_dict = self.netGraph.haps_with_probs_by_label(type)
-                            #one = False
-                        else:
-                            # There is no freqs
-                            dict_all = {}
-
-                            # No need to continue
-                            break
-                    # Add the new data to old data and make it a new "dict_all"
-                    dict_all = self.open_option_(div_dict, dict_all)
-            return dict_all
+        # one = True
+        # Here we calculate the  freq of first the first division of the option
+        # The first division
+        division = option[0]
+
+        # The string of the haplos
+        string_option = self.create_haplos_string(haplos, division, missing)
+
+        # Find the dict of the haplos and their freqs
+        dict_all = self.get_haplo_freqs_pan_b(string_option, division)
+
+        # If we found the freq of the first optin- if no-no need to continue
+        if dict_all != {}:
+            # Go over all the parts of the haplo according to option
+            # Skip the first options
+            for i in range(1, len(option)):
+                # Create the string of the option
+                # Example: option [1,2,3] will get :A*01:01~B*07:02~C*07:02
+                # Example: option [5] will get DRB1*13:02
+                division = option[i]
+                string_option = self.create_haplos_string(haplos, division, missing)
+                if string_option == []:
+                    t = 0
+                div_dict = self.get_haplo_freqs_pan_b(string_option, division)
+
+                # If this part of the option is empty-the option is not good
+                if div_dict == {}:
+                    result = all(elem in missing for elem in division)
+                    if (
+                        result
+                    ):  # and one:#len(division) == 1 and len(missing) == 1 and division[0] == missing[0]:
+                        type = self.cypher_plan_b.findTypeFromIndexes(division)
+                        div_dict = self.netGraph.haps_with_probs_by_label(type)
+                        # one = False
+                    else:
+                        # There is no freqs
+                        dict_all = {}
 
-    def comp_hap_prob_plan_b(self, Hap,division,missing):
-        if division[0] == list(set(self.index_dict.values())):#[1, 2, 3, 4, 5]:
-            return self.comp_hap_prob([Hap[0]], 0, 0, 0)
-        haplo_probs = self.find_option_freq(division, Hap,missing)
+                        # No need to continue
+                        break
+                # Add the new data to old data and make it a new "dict_all"
+                dict_all = self.open_option_(div_dict, dict_all)
+        return dict_all
+
+    def comp_hap_prob_plan_b(self, Hap, division, missing):
+        if division[0] == list(set(self.index_dict.values())):  # [1, 2, 3, 4, 5]:
+            return self.comp_hap_prob(Hap[0], 0, 0, 0)
+        haplo_probs = self.find_option_freq(division, Hap, missing)
 
         # Return {'Haps': haplos, 'Probs': probs} of the dict
         return self.open_dict_data(haplo_probs)
 
-    def missing_from_data_to_string(self,hap,not_in_data):
+    def missing_from_data_to_string(self, hap, not_in_data):
         str_hap = ""
         str_not_in = []
-        str_list=[]
+        str_list = []
 
         for luci in hap:
             type_luci = luci.split("*")[0]
             if self.index_dict[type_luci] in not_in_data:
                 str_not_in.append(luci)
             else:
-                str_hap+="~"+str(luci)
+                str_hap += "~" + str(luci)
 
         str_list.append(str_hap[1:])
-        str_not_in=list(set(str_not_in))
-
-        return[str_list,str_not_in]
-
+        str_not_in = list(set(str_not_in))
 
+        return [str_list, str_not_in]
 
-    def find_option_freq_missing_data(self,option, haplos, missing,not_in_data):
+    def find_option_freq_missing_data(self, option, haplos, missing, not_in_data):
         # Here we calculate the  freq of first the first division of the option
         # The first division
 
-        all_the_data = set(self.index_dict.values())#[1,2,3,4,5]
+        all_the_data = set(self.index_dict.values())  # [1,2,3,4,5]
 
         # All data that we dont want to
-        #all_missing = list(set(missing + not_in_data))
-        all_missing = list(set( not_in_data))
+        # all_missing = list(set(missing + not_in_data))
+        all_missing = list(set(not_in_data))
         all_the_data = [x for x in all_the_data if x not in all_missing]
 
         dict_res = dict()
         for hap in haplos[0]:
             # The string of hap
-            string_option = self.missing_from_data_to_string(hap,not_in_data)
+            string_option = self.missing_from_data_to_string(hap, not_in_data)
             # Find the dict of hap and his freqs
-            if len(string_option[0]) > 0 and string_option[0][0] != '':
-                dict_all = self.get_haplo_freqs_pan_b(string_option[0],all_the_data)
+            if len(string_option[0]) > 0 and string_option[0][0] != "":
+                dict_all = self.get_haplo_freqs_pan_b(string_option[0], all_the_data)
                 for key in dict_all.keys():
                     list_key = key.split("~")
-                    list_key=list_key[:not_in_data[0]-1]+string_option[1]+list_key[not_in_data[0]-1:]
-                    dict_res["~".join(sorted(list_key))]=[x*(self._factor_missing_data**len(all_missing)) for x in dict_all[key]]
+                    list_key = (
+                        list_key[: not_in_data[0] - 1]
+                        + string_option[1]
+                        + list_key[not_in_data[0] - 1 :]
+                    )
+                    dict_res["~".join(sorted(list_key))] = [
+                        x * (self._factor_missing_data ** len(all_missing))
+                        for x in dict_all[key]
+                    ]
 
         return dict_res
 
-    def comp_hap_prob_plan_b_missing_data(self, Hap,division,missing,not_in_data):
+    def comp_hap_prob_plan_b_missing_data(self, Hap, division, missing, not_in_data):
 
-        haplo_probs = self.find_option_freq_missing_data(division, Hap,missing,not_in_data)
+        haplo_probs = self.find_option_freq_missing_data(
+            division, Hap, missing, not_in_data
+        )
         # Return {'Haps': haplos, 'Probs': probs} of the dict
         return self.open_dict_data(haplo_probs)
 
-
-    def read_matrix(self,index):
+    def read_matrix(self, index):
         # Init
         div_option = []
 
-
         # If the index is in the size of the matrix
         if len(self.matrix_planb) > index:
             div_option = self.matrix_planb[index]
@@ -988,7 +1202,7 @@ def read_matrix(self,index):
         # Return the division
         return div_option
 
-    def check_full_haplo(self,hap):
+    def check_full_haplo(self, hap):
         # To get the array
         wanted_hap = hap[0][0]
         missing = []
@@ -1002,55 +1216,57 @@ def findLinkType(self, typing):
         loci_lc = "".join([allele.lower() for allele in loci])
         return loci_lc
 
-    def get_haplo_freqs_pan_b(self, haplos_string,division):
+    def get_haplo_freqs_pan_b(self, haplos_string, division):
         haplo_probs = {}
         if len(haplos_string) > 0:
             # Check the type of the haplos
             type = self.cypher_plan_b.findTypeFromIndexes(division)
-            label_haplo  = self.cypher_plan_b.findLinkType(haplos_string)
-            haplo_probs = self.netGraph.adjs_query_by_color(haplos_string, label_haplo , type)
+            label_haplo = self.cypher_plan_b.findLinkType(haplos_string)
+            haplo_probs = self.netGraph.adjs_query_by_color(
+                haplos_string, label_haplo, type
+            )
         return haplo_probs
 
-    def check_if_alleles_exist(self,allels):
+    def check_if_alleles_exist(self, allels):
         type = self.find_type_loci(allels[0])
-        div= [self.index_dict[type]]
-        probs = self.get_haplo_freqs_pan_b(allels,div)
+        div = [self.index_dict[type]]
+        probs = self.get_haplo_freqs_pan_b(allels, div)
         return probs
 
-    def check_if_alleles_in_data(self,phase,index):
-        size_haplo=phase[0][0]
-        loci=[]
+    def check_if_alleles_in_data(self, phase, index):
+        size_haplo = phase[0][0]
+        loci = []
         missing = []
         for t in range(len(size_haplo[0][0])):
             for i in range(len(phase)):
                 for hapes in phase[i][index]:
-                     for hap in hapes:
+                    for hap in hapes:
                         loci.append(hap[t])
             # Remove duplicates
-            loci=list(set(loci))
-            probs= self.check_if_alleles_exist(loci)
+            loci = list(set(loci))
+            probs = self.check_if_alleles_exist(loci)
             if probs == {}:
-                type=self.find_type_loci(loci[0])
+                type = self.find_type_loci(loci[0])
                 missing.append(self.index_dict[type])
 
-            loci=[]
+            loci = []
         return missing
 
-    def check_if_alleles_of_one_phase_in_data(self,phase):
-        size_haplo=phase[0]
-        loci=[]
+    def check_if_alleles_of_one_phase_in_data(self, phase):
+        size_haplo = phase[0]
+        loci = []
         missing = []
         for t in range(len(size_haplo[0])):
             for hapes in phase[0]:
                 loci.append(hapes[t])
             # Remove duplicates
-            loci=list(set(loci))
-            probs= self.check_if_alleles_exist(loci)
+            loci = list(set(loci))
+            probs = self.check_if_alleles_exist(loci)
             if probs == {}:
-                type=self.find_type_loci(loci[0])
+                type = self.find_type_loci(loci[0])
                 missing.append(self.index_dict[type])
 
-            loci=[]
+            loci = []
         return missing
 
     def allel_to_SR(self, allel_dict):
@@ -1063,7 +1279,7 @@ def comp_hap_prob_plan_c(self, phases, missing):
         for phase in phases:
             dict_all_tmp = {}
             miss = []
-            for i,loci in enumerate(phase):
+            for i, loci in enumerate(phase):
                 index = self.find_type_loci(loci)
                 index_loci = self.index_dict[index]
                 div_dict = self.get_haplo_freqs_pan_b([loci], [index_loci])
@@ -1075,15 +1291,17 @@ def comp_hap_prob_plan_c(self, phases, missing):
                     if dict_all_tmp == {}:
                         dict_all_tmp = div_dict
                     else:
-                        dict_all_tmp  = self.open_option_(div_dict, dict_all_tmp, True)
+                        dict_all_tmp = self.open_option_(div_dict, dict_all_tmp, True)
                         if not dict_all_tmp:
                             break
             if len(miss) > 0:
                 for key in dict_all_tmp:
                     list_key = key.split("~")
                     list_key = list_key + miss
-                    dict_all["~".join(sorted(list_key))] = [x * (self._factor_missing_data ** len(miss)) for x in
-                                                        dict_all_tmp[key]]
+                    dict_all["~".join(sorted(list_key))] = [
+                        x * (self._factor_missing_data ** len(miss))
+                        for x in dict_all_tmp[key]
+                    ]
             else:
                 for key in dict_all_tmp:
                     dict_all[key] = dict_all_tmp[key]
@@ -1104,8 +1322,7 @@ def comp_hap_prob_plan_c(self, phases, missing):
 
         return dict_all
 
-
-    def comp_phase_prob_plan_c(self,phases, N_Loc, epsilon, MUUG_output):
+    def comp_phase_prob_plan_c(self, phases, N_Loc, epsilon, MUUG_output):
         epsilon = 0
         hap_pairs_total = []
         pop_res_haplo = []
@@ -1120,41 +1337,68 @@ def comp_phase_prob_plan_c(self,phases, N_Loc, epsilon, MUUG_output):
         for i in range(len(phases)):
             # If we dont know about missing alleles
 
-            P1 =self.open_dict_data(self.comp_hap_prob_plan_c(phases[i][0][0], missing))
+            P1 = self.open_dict_data(
+                self.comp_hap_prob_plan_c(phases[i][0][0], missing)
+            )
 
             # This will open locus ambiguities and comp probabilities for Hap1
-            Haps1 = P1['Haps']
-            Prob1 = P1['Probs']
+            Haps1 = P1["Haps"]
+            Prob1 = P1["Probs"]
 
             if len(Prob1) > 0:
-                P2 = self.open_dict_data(self.comp_hap_prob_plan_c(phases[i][1][0], missing))
-
-                Haps2 = P2['Haps']
-                Prob2 = P2['Probs']
+                P2 = self.open_dict_data(
+                    self.comp_hap_prob_plan_c(phases[i][1][0], missing)
+                )
 
+                Haps2 = P2["Haps"]
+                Prob2 = P2["Probs"]
 
             Prob1WithIndexes = self.convert_list_to_one_dim(Prob1)
 
             Prob2WithIndexes = self.convert_list_to_one_dim(Prob2)
 
             if MUUG_output:
-                maxProb = self.calc_haps_pairs(Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsilon, hap_total,
-                                               pop_res, maxProb, geno_seen)
+                maxProb = self.calc_haps_pairs(
+                    Haps1,
+                    Haps2,
+                    Prob1WithIndexes,
+                    Prob2WithIndexes,
+                    epsilon,
+                    hap_total,
+                    pop_res,
+                    maxProb,
+                    geno_seen,
+                )
             else:
-                maxProb = self.calc_haps_pairs_haplotype(Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsilon,
-                                                         hap_total,
-                                                         pop_res, maxProb, hap_pairs_total, geno_seen, pop_res_haplo,
-                                                         p_total)
+                maxProb = self.calc_haps_pairs_haplotype(
+                    Haps1,
+                    Haps2,
+                    Prob1WithIndexes,
+                    Prob2WithIndexes,
+                    epsilon,
+                    hap_total,
+                    pop_res,
+                    maxProb,
+                    hap_pairs_total,
+                    geno_seen,
+                    pop_res_haplo,
+                    p_total,
+                )
         if MUUG_output:
             sum_prob = sum(pop_res.values())
-            pop_res_final = {'all_pops,all_pops': sum_prob}
-            return {'MaxProb': maxProb, 'Haps': hap_total, 'Pops': pop_res_final}
+            pop_res_final = {"all_pops,all_pops": sum_prob}
+            return {"MaxProb": maxProb, "Haps": hap_total, "Pops": pop_res_final}
 
         for i in range(len(pop_res_haplo)):
-            pop_res_haplo[i][0] = 'all_pops'
-            pop_res_haplo[i][1] = 'all_pops'
+            pop_res_haplo[i][0] = "all_pops"
+            pop_res_haplo[i][1] = "all_pops"
 
-        return {'MaxProb': maxProb, 'Haps': hap_pairs_total, 'Probs': p_total, 'Pops': pop_res_haplo}
+        return {
+            "MaxProb": maxProb,
+            "Haps": hap_pairs_total,
+            "Probs": p_total,
+            "Pops": pop_res_haplo,
+        }
 
     # Calculate full plan b
     def comp_phase_prob_plan_b(self, phases, N_Loc, epsilon, MUUG_output):
@@ -1189,31 +1433,35 @@ def comp_phase_prob_plan_b(self, phases, N_Loc, epsilon, MUUG_output):
             for i in range(len(phases)):
                 # If we dont know about missing alleles
                 if missing_data_1 == []:
-                    index = min(matrix_index,phases[i][0][1])
+                    index = min(matrix_index, phases[i][0][1])
                     option = self.read_matrix(index)
                     P1 = self.comp_hap_prob_plan_b(phases[i][0], option, missing)
-                    if len(P1['Haps']):
+                    if len(P1["Haps"]):
                         phases[i][0][1] = index
                 else:
-                    P1=self.comp_hap_prob_plan_b_missing_data(phases[i][0], option, missing,missing_data_1)
+                    P1 = self.comp_hap_prob_plan_b_missing_data(
+                        phases[i][0], option, missing, missing_data_1
+                    )
                 # This will open locus ambiguities and comp probabilities for Hap1
-                Haps1 = P1['Haps']
-                Prob1 = P1['Probs']
-                #if len(Prob1) > 0:
-                    # If we dont know about missing alleles
+                Haps1 = P1["Haps"]
+                Prob1 = P1["Probs"]
+                # if len(Prob1) > 0:
+                # If we dont know about missing alleles
                 if missing_data_2 == []:
                     index = min(matrix_index, phases[i][1][1])
                     option = self.read_matrix(index)
                     P2 = self.comp_hap_prob_plan_b(phases[i][1], option, missing)
-                    if len(P2['Haps']):
+                    if len(P2["Haps"]):
                         phases[i][1][1] = index
-                    Haps2 = P2['Haps']
-                    Prob2 = P2['Probs']
+                    Haps2 = P2["Haps"]
+                    Prob2 = P2["Probs"]
                 else:
                     if len(Prob1) > 0:
-                        P2 =self.comp_hap_prob_plan_b_missing_data(phases[i][1], option, missing,missing_data_2)
-                        Haps2 = P2['Haps']
-                        Prob2 = P2['Probs']
+                        P2 = self.comp_hap_prob_plan_b_missing_data(
+                            phases[i][1], option, missing, missing_data_2
+                        )
+                        Haps2 = P2["Haps"]
+                        Prob2 = P2["Probs"]
                     # This will do the same for Hap 2;
 
                 Prob1WithIndexes = self.convert_list_to_one_dim(Prob1)
@@ -1221,83 +1469,130 @@ def comp_phase_prob_plan_b(self, phases, N_Loc, epsilon, MUUG_output):
                 Prob2WithIndexes = self.convert_list_to_one_dim(Prob2)
 
                 if MUUG_output:
-                    maxProb = self.calc_haps_pairs(Haps1, Haps2,Prob1WithIndexes, Prob2WithIndexes, epsilon, hap_total, pop_res, maxProb, geno_seen)
+                    maxProb = self.calc_haps_pairs(
+                        Haps1,
+                        Haps2,
+                        Prob1WithIndexes,
+                        Prob2WithIndexes,
+                        epsilon,
+                        hap_total,
+                        pop_res,
+                        maxProb,
+                        geno_seen,
+                    )
                 else:
-                    maxProb = self.calc_haps_pairs_haplotype(Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsilon,
-                                                             hap_total,
-                                                             pop_res, maxProb, hap_pairs_total, geno_seen, pop_res_haplo,
-                                                             p_total)
+                    maxProb = self.calc_haps_pairs_haplotype(
+                        Haps1,
+                        Haps2,
+                        Prob1WithIndexes,
+                        Prob2WithIndexes,
+                        epsilon,
+                        hap_total,
+                        pop_res,
+                        maxProb,
+                        hap_pairs_total,
+                        geno_seen,
+                        pop_res_haplo,
+                        p_total,
+                    )
 
             # Go to the next row in thr matrix
             matrix_index += 1
 
         matrix_index = 10
         matrix_index_curr = 0
-        while hap_total == {} and matrix_index_curr<6 :
+        while hap_total == {} and matrix_index_curr < 6:
             for i in range(len(phases)):
                 index_1 = min(matrix_index, phases[i][0][1])
                 index_2 = min(matrix_index, phases[i][1][1])
-                if not (index_1==10 and index_2==10):
-                    if (index_1 == 10 and len(phases[i][0][0]) > 0):
+                if not (index_1 == 10 and index_2 == 10):
+                    if index_1 == 10 and len(phases[i][0][0]) > 0:
                         option = self.read_matrix(matrix_index_curr)
-                        missing_data_1 = self.check_if_alleles_of_one_phase_in_data(phases[i][0])
-                        P1 = self.comp_hap_prob_plan_b_missing_data(phases[i][0], option, missing, missing_data_1)
+                        missing_data_1 = self.check_if_alleles_of_one_phase_in_data(
+                            phases[i][0]
+                        )
+                        P1 = self.comp_hap_prob_plan_b_missing_data(
+                            phases[i][0], option, missing, missing_data_1
+                        )
 
                         option = self.read_matrix(index_2)
                         P2 = self.comp_hap_prob_plan_b(phases[i][1], option, missing)
 
-                    if (index_2 == 10 and len(phases[i][1][0]) > 0):
+                    if index_2 == 10 and len(phases[i][1][0]) > 0:
                         option = self.read_matrix(index_1)
                         P1 = self.comp_hap_prob_plan_b(phases[i][0], option, missing)
 
                         option = self.read_matrix(matrix_index_curr)
-                        missing_data_2 = self.check_if_alleles_of_one_phase_in_data(phases[i][1])
-                        P2 = self.comp_hap_prob_plan_b_missing_data(phases[i][1], option, missing, missing_data_2)
-
-
-                    Haps1 = P1['Haps']
-                    Prob1 = P1['Probs']
-                    Haps2 = P2['Haps']
-                    Prob2 = P2['Probs']
+                        missing_data_2 = self.check_if_alleles_of_one_phase_in_data(
+                            phases[i][1]
+                        )
+                        P2 = self.comp_hap_prob_plan_b_missing_data(
+                            phases[i][1], option, missing, missing_data_2
+                        )
+
+                    Haps1 = P1["Haps"]
+                    Prob1 = P1["Probs"]
+                    Haps2 = P2["Haps"]
+                    Prob2 = P2["Probs"]
 
                     Prob1WithIndexes = self.convert_list_to_one_dim(Prob1)
 
                     Prob2WithIndexes = self.convert_list_to_one_dim(Prob2)
 
                     if MUUG_output:
-                        maxProb = self.calc_haps_pairs(Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes, epsilon,
-                                                           hap_total, pop_res, maxProb, geno_seen)
+                        maxProb = self.calc_haps_pairs(
+                            Haps1,
+                            Haps2,
+                            Prob1WithIndexes,
+                            Prob2WithIndexes,
+                            epsilon,
+                            hap_total,
+                            pop_res,
+                            maxProb,
+                            geno_seen,
+                        )
                     else:
-                        maxProb = self.calc_haps_pairs_haplotype(Haps1, Haps2, Prob1WithIndexes, Prob2WithIndexes,
-                                                                     epsilon,
-                                                                     hap_total,
-                                                                     pop_res, maxProb, hap_pairs_total, geno_seen,
-                                                                     pop_res_haplo,
-                                                                     p_total)
+                        maxProb = self.calc_haps_pairs_haplotype(
+                            Haps1,
+                            Haps2,
+                            Prob1WithIndexes,
+                            Prob2WithIndexes,
+                            epsilon,
+                            hap_total,
+                            pop_res,
+                            maxProb,
+                            hap_pairs_total,
+                            geno_seen,
+                            pop_res_haplo,
+                            p_total,
+                        )
 
             matrix_index_curr += 1
 
-
-                # p_total returns an array of N*N (N is number of populations), hap_total - pairs of haplotypes.
-                # pop_res are the names of the populations
+            # p_total returns an array of N*N (N is number of populations), hap_total - pairs of haplotypes.
+            # pop_res are the names of the populations
         if MUUG_output:
-            return {'MaxProb': maxProb, 'Haps': hap_total, 'Pops': pop_res}
+            return {"MaxProb": maxProb, "Haps": hap_total, "Pops": pop_res}
 
-        return {'MaxProb': maxProb, 'Haps': hap_pairs_total, 'Probs': p_total, 'Pops': pop_res_haplo}
+        return {
+            "MaxProb": maxProb,
+            "Haps": hap_pairs_total,
+            "Probs": p_total,
+            "Pops": pop_res_haplo,
+        }
 
     # haplotype - list of all loci options
     # return - type of tne haplotype
     def input_type(self, haplotype):
         type_list = []
         for locus_options in haplotype:
-            locus = locus_options.split('*')[0]
+            locus = locus_options.split("*")[0]
             type_list.append(self.index_dict[locus])
         return type_list
 
-    def open_ambiguities(self, hap, loc, split_loc):
-        return open_ambiguities(hap, loc, split_loc)
-
-    def comp_cand(self, gl_string, binary, epsilon, n, MUUG_output, haps_output, planb, em):
+    def comp_cand(
+        self, gl_string, binary, epsilon, n, MUUG_output, haps_output, planb, em
+    ):
         # receives a list of phases and computes haps and
         # probabilties and accumulate cartesian productEpsilon=0.0001
         chr = self.gl2haps(gl_string)
@@ -1305,22 +1600,22 @@ def comp_cand(self, gl_string, binary, epsilon, n, MUUG_output, haps_output, pla
             return None, None
         # if we in 9-loci, check if the type input in valid format
         if self.nodes_for_plan_A:
-            geno_type = self.input_type(chr['Genotype'][0])
+            geno_type = self.input_type(chr["Genotype"][0])
             if not geno_type in self.nodes_for_plan_A:
                 return None, None
 
-        n_loci = chr['N_Loc']
+        n_loci = chr["N_Loc"]
 
         # generate the 2^(N-1) phases
-        pmags = self.gen_phases(chr['Genotype'], n_loci, binary)
+        pmags = self.gen_phases(chr["Genotype"], n_loci, binary)
 
         # return if the result is empty (why would that be?)
         if pmags == []:
             return None, None
 
-        #res_muugs = {'Haps': 'NaN', 'Probs': 0}
-        res_muugs = {'MaxProb': 0, 'Haps': {}, 'Pops': {}}
-        res_haps = {'Haps': 'Nan', 'Probs': 0, 'Pops':{}}
+        # res_muugs = {'Haps': 'NaN', 'Probs': 0}
+        res_muugs = {"MaxProb": 0, "Haps": {}, "Pops": {}}
+        res_haps = {"Haps": "Nan", "Probs": 0, "Pops": {}}
         # this step "opens" each phase by generating the cartesian product
         # NOTE: why do it here and not pass the ambiguity to the graph query?
         # [['A*01:01/A*01:02/A*01:03/A*01:16N', 'C*06:02']
@@ -1332,30 +1627,38 @@ def comp_cand(self, gl_string, binary, epsilon, n, MUUG_output, haps_output, pla
         #
         phases = self.open_phases(pmags, n_loci, gl_string)
         if not phases:
-            print('in reduce_phase_to_valid_alleles')
+            print("in reduce_phase_to_valid_alleles")
             self.reduce_phase_to_valid_allels(pmags, n_loci)
             phases = self.open_phases(pmags, n_loci, gl_string)
         if not phases:
-            print('in reduce_phase_to_commons_alleles')
+            print("in reduce_phase_to_commons_alleles")
             self.reduce_phase_to_commons_alleles(pmags, n_loci, commons_number=10)
             phases = self.open_phases(pmags, n_loci, gl_string)
 
         if phases:
             if MUUG_output:
-                prior_matrix_orig = np.array(self.priorMatrix, order='K', copy=True) #copy.deepcopy(self.priorMatrix)
-                res_muugs = self.call_comp_phase_prob(epsilon, n, phases, chr, True, planb)
-                if planb and len(res_muugs['Haps']) == 0:
-                    self.plan = 'c'
+                prior_matrix_orig = copy.deepcopy(self.priorMatrix)
+                res_muugs = self.call_comp_phase_prob(
+                    epsilon, n, phases, chr, True, planb
+                )
+                if planb and len(res_muugs["Haps"]) == 0:
+                    self.plan = "c"
                     self.reduce_phase_to_commons_alleles(pmags, n_loci, 1, True)
                     phases = self.open_phases(pmags, n_loci, gl_string)
-                    res_muugs = self.comp_phase_prob_plan_c(phases, n_loci, epsilon, True)
+                    res_muugs = self.comp_phase_prob_plan_c(
+                        phases, n_loci, epsilon, True
+                    )
                 self.priorMatrix = prior_matrix_orig
-            if (haps_output):
-                res_haps = self.call_comp_phase_prob(epsilon, n, phases, chr, False, planb)
-                if planb and len(res_haps['Haps']) == 0 and not em:#em
+            if haps_output:
+                res_haps = self.call_comp_phase_prob(
+                    epsilon, n, phases, chr, False, planb
+                )
+                if planb and len(res_haps["Haps"]) == 0 and not em:  # em
                     self.reduce_phase_to_commons_alleles(pmags, n_loci, 1, True)
                     phases = self.open_phases(pmags, n_loci, gl_string)
-                    res_haps = self.comp_phase_prob_plan_c(phases, n_loci, epsilon, False)
+                    res_haps = self.comp_phase_prob_plan_c(
+                        phases, n_loci, epsilon, False
+                    )
 
         return res_muugs, res_haps
 
@@ -1363,80 +1666,89 @@ def call_comp_phase_prob(self, epsilon, n, phases, chr, MUUG_output, planb):
 
         n_res = 0  # number of results
         min_res = 100  # minimum number of results (NOTE: why 10?)
-        min_epsilon = 1.e-9  # minimum threshold, then why the other?
-        res = {'Haps': 'NaN', 'Probs': 0}
+        min_epsilon = 1.0e-9  # minimum threshold, then why the other?
+        res = {"Haps": "NaN", "Probs": 0}
         lastRound = False
-        while (epsilon > 0):
+        while epsilon > 0:
             # each time through reduce epsilon by an order of magnitude
             # until you have seen "min_res" results
             epsilon /= 10
             #
             # if you have reduced epsilon below min_epsilon then just make it 0
             #
-            if (epsilon < min_epsilon):
+            if epsilon < min_epsilon:
                 epsilon = 0.0
 
             # get results
 
             if MUUG_output:
-                res = self.comp_phase_prob_genotype(phases, chr['N_Loc'], epsilon, n)
+                res = self.comp_phase_prob_genotype(phases, chr["N_Loc"], epsilon, n)
             else:
-                res = self.comp_phase_prob_haplotype(phases, chr['N_Loc'], epsilon, n)
-            haps = res['Haps']
+                res = self.comp_phase_prob_haplotype(phases, chr["N_Loc"], epsilon, n)
+            haps = res["Haps"]
             n_res = len(haps)
-            if (n_res > 0 and epsilon > 0):
+            if n_res > 0 and epsilon > 0:
                 # sorted_by_value = sorted(haps.items(), key=lambda kv: kv[1], reverse=True)
-                epsilon = res['MaxProb'] / 100000
+                epsilon = res["MaxProb"] / 100000
                 lastRound = True
                 break
 
         if lastRound:
             if MUUG_output:
-                res = self.comp_phase_prob_genotype(phases, chr['N_Loc'], epsilon, n)
+                res = self.comp_phase_prob_genotype(phases, chr["N_Loc"], epsilon, n)
             else:
-                res = self.comp_phase_prob_haplotype(phases, chr['N_Loc'], epsilon, n)
+                res = self.comp_phase_prob_haplotype(phases, chr["N_Loc"], epsilon, n)
 
         # no plan b
         for level in range(2):
             if level == 1:
-                self.priorMatrix = np.ones((len(self.populations), len(self.populations)))  ####
-            if planb and len(res['Haps']) == 0:
-                self.plan = 'b'
+                if self.unk_priors == "MR":
+                    self.priorMatrix = np.ones(
+                        (len(self.populations), len(self.populations))
+                    )
+                else:
+                    self.priorMatrix = np.identity(len(self.populations))
+                # self.priorMatrix = np.ones((len(self.populations), len(self.populations)))  ####
+            if planb and len(res["Haps"]) == 0:
+                self.plan = "b"
                 epsilon = 1e-14
                 n_res = 0
                 min_res = 10
-                min_epsilon = 1.e-3
+                min_epsilon = 1.0e-3
                 # self.priorMatrix = np.ones((len(self.populations), len(self.populations)))
                 while (epsilon > 0) & (n_res < min_res):
                     epsilon /= 10
-                    if (epsilon < min_epsilon):
+                    if epsilon < min_epsilon:
                         epsilon = 0.0
                     phases_planb = deepcopy_list(phases)
                     # Find the option according to plan b
                     if MUUG_output:
-                        res = self.comp_phase_prob_plan_b(phases_planb, chr['N_Loc'], epsilon, True)
+                        res = self.comp_phase_prob_plan_b(
+                            phases_planb, chr["N_Loc"], epsilon, True
+                        )
                     else:
-                        res = self.comp_phase_prob_plan_b(phases_planb, chr['N_Loc'], epsilon, False)
-                    n_res = len(res['Haps'])
+                        res = self.comp_phase_prob_plan_b(
+                            phases_planb, chr["N_Loc"], epsilon, False
+                        )
+                    n_res = len(res["Haps"])
 
         return res
 
-
-    #sum all probs for each first race and return the one with the highest prob
+    # sum all probs for each first race and return the one with the highest prob
     def find_first_race(self, pops, probs):
-        prob_by_race =dict()
+        prob_by_race = dict()
         for i, pop in enumerate(pops):
             key = pop[0]
             if key in prob_by_race:
-                prob_by_race[key] += probs[i][0]*probs[i][1]
+                prob_by_race[key] += probs[i][0] * probs[i][1]
             else:
-                prob_by_race[key] = probs[i][0]*probs[i][1]
+                prob_by_race[key] = probs[i][0] * probs[i][1]
 
         sortedDict = sorted(prob_by_race.items(), key=lambda x: x[1], reverse=True)
         return sortedDict[0][0]
 
-    #aum all probs of pairs- race1,second race
-    #and return second race of the highest pair
+    # aum all probs of pairs- race1,second race
+    # and return second race of the highest pair
     def find_second_race(self, race1, pops, probs):
         prob_by_race = dict()
         for i, pop in enumerate(pops):
@@ -1450,31 +1762,51 @@ def find_second_race(self, race1, pops, probs):
         sortedDict = sorted(prob_by_race.items(), key=lambda x: x[1], reverse=True)
         return sortedDict[0][0]
 
-    #find the haps of race1-race2, and sum the probs
-    def write_haplotype(self, race1, race2, haps, pops, probs, name_gl, foutHap, foutPop):
+    # find the haps of race1-race2, and sum the probs
+    def write_haplotype(
+        self, race1, race2, haps, pops, probs, name_gl, foutHap, foutPop
+    ):
         haps_dict = dict()
         for i, line in enumerate(pops):
             if race1 == line[0] and race2 == line[1]:
-                key = haps[i][0] + ',' + haps[i][1]
+                key = haps[i][0] + "," + haps[i][1]
                 if key in haps_dict:
                     haps_dict[key] += probs[i][0] * probs[i][1]
                 else:
                     haps_dict[key] = probs[i][0] * probs[i][1]
 
-        #write the 1000 highest to file
+        # write the 1000 highest to file
         sortedDict = sorted(haps_dict.items(), key=lambda x: x[1], reverse=True)
         minBestResult = min(1000, len(sortedDict))
         for k in range(minBestResult):
-             foutHap.write(name_gl + ',' + str(sortedDict[k][0]) + ',' +
-                        str(sortedDict[k][1]) + ',' + str(k) +  ',' + '\n')
-        foutPop.write(name_gl + ',' + str(race1) + ',' + str(race2) +'\n')
-
-    #find pair of races with the highest prob
+            foutHap.write(
+                name_gl
+                + ","
+                + str(sortedDict[k][0])
+                + ","
+                + str(sortedDict[k][1])
+                + ","
+                + str(k)
+                + ","
+                + "\n"
+            )
+        foutPop.write(name_gl + "," + str(race1) + "," + str(race2) + "\n")
+
+    # find pair of races with the highest prob
     def find_races(self, res, foutHap, foutPop, name_gl):
-        if len(res['Pops']) > 0:
-            race1 = self.find_first_race(res['Pops'], res['Probs'])
-            race2 = self.find_second_race(race1, res['Pops'], res['Probs'])
-            self.write_haplotype(race1, race2, res['Haps'], res['Pops'], res['Probs'], name_gl, foutHap, foutPop)
+        if len(res["Pops"]) > 0:
+            race1 = self.find_first_race(res["Pops"], res["Probs"])
+            race2 = self.find_second_race(race1, res["Pops"], res["Probs"])
+            self.write_haplotype(
+                race1,
+                race2,
+                res["Haps"],
+                res["Pops"],
+                res["Probs"],
+                name_gl,
+                foutHap,
+                foutPop,
+            )
 
     """def calc_priority_matrix(self, list_race1, list_race2, priority ):
         list_race1 = list_race1.split(';')
@@ -1520,7 +1852,7 @@ def find_races(self, res, foutHap, foutPop, name_gl):
 
         self.priorMatrix = self.priorMatrix / prior_sum"""
 
-    def calc_priority_matrix(self, list_race1, list_race2, priority ):
+    def calc_priority_matrix(self, list_race1, list_race2, priority):
         popLen = len(self.populations)
         self.priorMatrix = np.zeros((popLen, popLen))
         # Identity matrix
@@ -1528,10 +1860,14 @@ def calc_priority_matrix(self, list_race1, list_race2, priority ):
 
         for race_1 in list_race1:
             for race_2 in list_race2:
-                if race_1 == '' and race_2 == '':
+                if race_1 == "" and race_2 == "":
                     continue
-                if race_1 == '' or race_2 == '':
-                    race = self.populations.index(race_2) if race_1 == '' else self.populations.index(race_1)
+                if race_1 == "" or race_2 == "":
+                    race = (
+                        self.populations.index(race_2)
+                        if race_1 == ""
+                        else self.populations.index(race_1)
+                    )
                     """if race_1 == '':
                         race = self.populations.index(race_2)
                     else:
@@ -1541,35 +1877,47 @@ def calc_priority_matrix(self, list_race1, list_race2, priority ):
 
                     # calc priority
                     for i in range(popLen):
-                        tmp_matrix[race, i] = tmp_matrix[race, i] + priority['gamma']*2
+                        tmp_matrix[race, i] = (
+                            tmp_matrix[race, i] + priority["gamma"] * 2
+                        )
                     tmp_matrix = tmp_matrix + tmp_matrix.transpose()
-                    tmp_matrix[race, race] -= (priority['gamma']*2)
+                    tmp_matrix[race, race] -= priority["gamma"] * 2
 
-                    tmp_matrix = priority['eta'] * np.ones((popLen, popLen)) + tmp_matrix + priority['beta'] * Imatrix
+                    tmp_matrix = (
+                        priority["eta"] * np.ones((popLen, popLen))
+                        + tmp_matrix
+                        + priority["beta"] * Imatrix
+                    )
 
                     self.priorMatrix += tmp_matrix
                 else:
                     tmp_matrix = np.zeros((popLen, popLen))
                     race1 = self.populations.index(race_1)
                     race2 = self.populations.index(race_2)
-                # calc priority
+                    # calc priority
                     for i in range(popLen):
-                        tmp_matrix[race1, i] = tmp_matrix[race1, i] + priority['gamma']
-                        tmp_matrix[i, race2] = tmp_matrix[i, race2] + priority['gamma']
+                        tmp_matrix[race1, i] = tmp_matrix[race1, i] + priority["gamma"]
+                        tmp_matrix[i, race2] = tmp_matrix[i, race2] + priority["gamma"]
 
-                    tmp_matrix[race1, race2] -= priority['gamma']
+                    tmp_matrix[race1, race2] -= priority["gamma"]
 
-                    tmp_matrix[race1, race2] = tmp_matrix[race1, race2] + priority['alpha']
+                    tmp_matrix[race1, race2] = (
+                        tmp_matrix[race1, race2] + priority["alpha"]
+                    )
 
                     if race1 != race2:
                         tmp_matrix = tmp_matrix + tmp_matrix.transpose()
-                        tmp_matrix[race1, race1] -= priority['gamma']
-                        tmp_matrix[race2, race2] -= priority['gamma']
+                        tmp_matrix[race1, race1] -= priority["gamma"]
+                        tmp_matrix[race2, race2] -= priority["gamma"]
 
-                    tmp_matrix[race1, race1] += priority['delta']
+                    tmp_matrix[race1, race1] += priority["delta"]
                     if race1 != race2:
-                        tmp_matrix[race2, race2] += priority['delta']
-                    tmp_matrix = priority['eta'] * np.ones((popLen, popLen)) + tmp_matrix + priority['beta'] * Imatrix
+                        tmp_matrix[race2, race2] += priority["delta"]
+                    tmp_matrix = (
+                        priority["eta"] * np.ones((popLen, popLen))
+                        + tmp_matrix
+                        + priority["beta"] * Imatrix
+                    )
 
                     self.priorMatrix += tmp_matrix
 
@@ -1577,52 +1925,75 @@ def calc_priority_matrix(self, list_race1, list_race2, priority ):
         prior_sum = 0
         for i in range(popLen):
             for j in range(popLen):
-                self.priorMatrix[i][j] = self.priorMatrix[i][j] * self.count_by_prob[i] * self.count_by_prob[j]
+                self.priorMatrix[i][j] = (
+                    self.priorMatrix[i][j]
+                    * self.count_by_prob[i]
+                    * self.count_by_prob[j]
+                )
                 prior_sum += self.priorMatrix[i][j]
 
         self.priorMatrix = self.priorMatrix / prior_sum
 
-
     def update_prob_by_priority(self, res, race1, race2, priority):
         # prob by priority
-        pops = res['Pops']
+        pops = res["Pops"]
         for i, pop in enumerate(pops):
             race1 = self.populations.index(pop[0])
             race2 = self.populations.index(pop[1])
-            res['Probs'][i][0] = float(res['Probs'][i][0])*math.sqrt(self.priorMatrix[race1][race2])
-            res['Probs'][i][1] = float(res['Probs'][i][1])*math.sqrt(self.priorMatrix[race1][race2])
+            res["Probs"][i][0] = float(res["Probs"][i][0]) * math.sqrt(
+                self.priorMatrix[race1][race2]
+            )
+            res["Probs"][i][1] = float(res["Probs"][i][1]) * math.sqrt(
+                self.priorMatrix[race1][race2]
+            )
         return res
 
-    def impute_one(self, subject_id, gl, binary, race1, race2, priority, epsilon, n, MUUG_output, haps_output, planb, em):#em
+    def impute_one(
+        self,
+        subject_id,
+        gl,
+        binary,
+        race1,
+        race2,
+        priority,
+        epsilon,
+        n,
+        MUUG_output,
+        haps_output,
+        planb,
+        em,
+    ):  # em
         clean_gl = clean_up_gl(gl)
         if self.unk_priors == "MR":
-            self.priorMatrix =  np.ones((len(self.populations), len(self.populations)))
+            self.priorMatrix = np.ones((len(self.populations), len(self.populations)))
         else:
             self.priorMatrix = np.identity(len(self.populations))
         to_calc_prior_matrix = False
         if race1 or race2:
-            race1 = race1.split(';')
-            for i,race in enumerate(race1):
+            race1 = race1.split(";")
+            for i, race in enumerate(race1):
                 if not race in self.populations:
-                    race1[i] = ''
+                    race1[i] = ""
                 else:
                     to_calc_prior_matrix = True
-            race2 = race2.split(';')
+            race2 = race2.split(";")
             for i, race in enumerate(race2):
                 if not race in self.populations:
-                    race2[i] = ''
+                    race2[i] = ""
                 else:
                     to_calc_prior_matrix = True
             if to_calc_prior_matrix:
-                    self.calc_priority_matrix(race1, race2, priority)
+                self.calc_priority_matrix(race1, race2, priority)
         # lookup based on genotype
         res_muugs = res_haps = None
         if gl:
-            res_muugs, res_haps = self.comp_cand(clean_gl, binary, epsilon, n, MUUG_output, haps_output, planb, em)
+            res_muugs, res_haps = self.comp_cand(
+                clean_gl, binary, epsilon, n, MUUG_output, haps_output, planb, em
+            )
 
         return subject_id, res_muugs, res_haps
 
-    def impute_file(self, config,  planb=None, em_mr = False, em = False):##em
+    def impute_file(self, config, planb=None, em_mr=False, em=False):  ##em
         priority = config["priority"]
         MUUG_output = config["output_MUUG"]
         haps_output = config["output_haplotypes"]
@@ -1630,9 +2001,9 @@ def impute_file(self, config,  planb=None, em_mr = False, em = False):##em
         epsilon = config["epsilon"]
         number_of_results = config["number_of_results"]
         number_of_pop_results = config["number_of_pop_results"]
-        #planb = config["planb"]#em
-        if planb is None:#em
-            planb = config["planb"]#em
+        # planb = config["planb"]#em
+        if planb is None:  # em
+            planb = config["planb"]  # em
 
         # TODO: do the right thing if its a gzip
         if self.verbose:
@@ -1644,88 +2015,144 @@ def impute_file(self, config,  planb=None, em_mr = False, em = False):##em
                 f_bin = json.load(json_file)
                 f_bin_exist = True
 
-        f = open(config["imputation_input_file"], 'r')
+        f = open(config["imputation_input_file"], "r")
 
         if MUUG_output:
-            fout_hap_muug = open(config["imputation_out_umug_freq_file"], 'w')
-            fout_pop_muug = open(config["imputation_out_umug_pops_file"], 'w')
+            fout_hap_muug = open(config["imputation_out_umug_freq_file"], "w")
+            fout_pop_muug = open(config["imputation_out_umug_pops_file"], "w")
         if haps_output:
-            fout_hap_haplo = open(config["imputation_out_hap_freq_file"], 'w')
-            fout_pop_haplo = open(config["imputation_out_hap_pops_file"], 'w')
+            fout_hap_haplo = open(config["imputation_out_hap_freq_file"], "w")
+            fout_pop_haplo = open(config["imputation_out_hap_pops_file"], "w")
 
-        miss = open(config["imputation_out_miss_file"], 'w')
-        problem = open(config["imputation_out_problem_file"], 'w')
+        miss = open(config["imputation_out_miss_file"], "w")
+        problem = open(config["imputation_out_problem_file"], "w")
 
         with f as lines:
             for (i, name_gl) in enumerate(lines):
-                try:
-                    name_gl = name_gl.rstrip()  # remove trailing whitespace
-                    if ',' in name_gl:
-                        list_gl = name_gl.split(',')
-                    else:
-                        list_gl = name_gl.split('%')
-
-                    subject_id = list_gl[0]
-                    subject_gl = list_gl[1]
-                    subject_bin = [1] *(len(self.full_loci) - 1)
-                    if f_bin_exist:
-                        subject_bin = f_bin[subject_id]
-                    race1 = race2 = None
-                    if len(list_gl) > 2:
-                        race1 = list_gl[2]
-                        race2 = list_gl[3]
-
-                    start = timeit.default_timer()
-
-                    #
-                    # Impute One
-                    # This method will impute one subject given the parameters
-                    self.plan = 'a'
-                    self.option_1 = 0
-                    self.option_2 = 0
-                    subject_id, res_muugs, res_haps = self.impute_one(subject_id, subject_gl, subject_bin, race1, race2, priority, epsilon, n, MUUG_output, haps_output, planb, em)#em
-
-                    if res_muugs is None:
-                        problem.write(str(i)+","+str(subject_id)+"\n")
-                        continue
-
-                    if (len(res_haps['Haps']) == 0 or res_haps['Haps'] == 'NaN') and  len(res_muugs['Haps']) == 0:
-                        miss.write(str(i)+","+str(subject_id)+"\n")
-
-
-                    if haps_output:
-                        haps = res_haps['Haps']
-                        probs = res_haps['Probs']
-                        pops = res_haps['Pops']
-                        print("{index} Subject: {id} {hap_length} haplotypes".format(index=i, id=subject_id, hap_length=len(haps)))
-                        if em_mr:
-                            write_best_hap_race_pairs(subject_id, haps, pops, probs, number_of_results, fout_hap_haplo)
-                            write_best_prob(subject_id, pops, probs, 1, fout_pop_haplo)
-                        else:
-                            write_best_prob(subject_id, haps, probs, number_of_results, fout_hap_haplo, "+")
-                            write_best_prob(subject_id, pops, probs, number_of_pop_results, fout_pop_haplo)
-                    if MUUG_output:
-                        haps = res_muugs['Haps']
-                        pops = res_muugs['Pops']
-                        print("{index} Subject: {id} {hap_length} haplotypes".format(index=i, id=subject_id, hap_length=len(haps)))
-                        write_best_prob_genotype(subject_id, haps, number_of_results, fout_hap_muug)
-                        write_best_prob_genotype(subject_id, pops, number_of_pop_results, fout_pop_muug)
-
-                    if self.verbose:
-                        self.logger.info("{index} Subject: {id} {hap_length} haplotypes".format(index=i, id=subject_id, hap_length=len(haps)))
-                        self.logger.info("{index} Subject: {id} plan: {plan} oppen_phases - count of open regular option: {option1}, count of alternative opening: {option2}".format(index=i, id=subject_id,
-                                                                                                plan=self.plan, option1= self.option_1, option2 = self.option_2))
-
-                    stop = timeit.default_timer()
-                    time_taken = stop-start
-                    print(time_taken)
-                    if self.verbose:
-                        self.logger.info("Time taken: " + str(time_taken))
-                except:
-                    print(f"{i} Subject: {subject_id} - Exception")
-                    problem.write(str(name_gl) + "\n")
+                # try:
+                name_gl = name_gl.rstrip()  # remove trailing whitespace
+                if "," in name_gl:
+                    list_gl = name_gl.split(",")
+                else:
+                    list_gl = name_gl.split("%")
+
+                subject_id = list_gl[0]
+                subject_gl = list_gl[1]
+                subject_bin = [1] * (len(self.full_loci) - 1)
+                if f_bin_exist:
+                    subject_bin = f_bin[subject_id]
+                race1 = race2 = None
+                if len(list_gl) > 2:
+                    race1 = list_gl[2]
+                    race2 = list_gl[3]
+
+                start = timeit.default_timer()
+
+                #
+                # Impute One
+                # This method will impute one subject given the parameters
+                self.plan = "a"
+                self.option_1 = 0
+                self.option_2 = 0
+                subject_id, res_muugs, res_haps = self.impute_one(
+                    subject_id,
+                    subject_gl,
+                    subject_bin,
+                    race1,
+                    race2,
+                    priority,
+                    epsilon,
+                    n,
+                    MUUG_output,
+                    haps_output,
+                    planb,
+                    em,
+                )  # em
+
+                if res_muugs is None:
+                    problem.write(str(i) + "," + str(subject_id) + "\n")
                     continue
 
+                if (len(res_haps["Haps"]) == 0 or res_haps["Haps"] == "NaN") and len(
+                    res_muugs["Haps"]
+                ) == 0:
+                    miss.write(str(i) + "," + str(subject_id) + "\n")
+
+                if haps_output:
+                    haps = res_haps["Haps"]
+                    probs = res_haps["Probs"]
+                    pops = res_haps["Pops"]
+                    print(
+                        "{index} Subject: {id} {hap_length} haplotypes".format(
+                            index=i, id=subject_id, hap_length=len(haps)
+                        )
+                    )
+                    if em_mr:
+                        write_best_hap_race_pairs(
+                            subject_id,
+                            haps,
+                            pops,
+                            probs,
+                            number_of_results,
+                            fout_hap_haplo,
+                        )
+                        write_best_prob(subject_id, pops, probs, 1, fout_pop_haplo)
+                    else:
+                        write_best_prob(
+                            subject_id,
+                            haps,
+                            probs,
+                            number_of_results,
+                            fout_hap_haplo,
+                            "+",
+                        )
+                        write_best_prob(
+                            subject_id,
+                            pops,
+                            probs,
+                            number_of_pop_results,
+                            fout_pop_haplo,
+                        )
+                if MUUG_output:
+                    haps = res_muugs["Haps"]
+                    pops = res_muugs["Pops"]
+                    print(
+                        "{index} Subject: {id} {hap_length} haplotypes".format(
+                            index=i, id=subject_id, hap_length=len(haps)
+                        )
+                    )
+                    write_best_prob_genotype(
+                        subject_id, haps, number_of_results, fout_hap_muug
+                    )
+                    write_best_prob_genotype(
+                        subject_id, pops, number_of_pop_results, fout_pop_muug
+                    )
+
+                if self.verbose:
+                    self.logger.info(
+                        "{index} Subject: {id} {hap_length} haplotypes".format(
+                            index=i, id=subject_id, hap_length=len(haps)
+                        )
+                    )
+                    self.logger.info(
+                        "{index} Subject: {id} plan: {plan} oppen_phases - count of open regular option: {option1}, count of alternative opening: {option2}".format(
+                            index=i,
+                            id=subject_id,
+                            plan=self.plan,
+                            option1=self.option_1,
+                            option2=self.option_2,
+                        )
+                    )
+
+                stop = timeit.default_timer()
+                time_taken = stop - start
+                print(time_taken)
+                if self.verbose:
+                    self.logger.info("Time taken: " + str(time_taken))
+                """except:
+                        problem.write(str(name_gl) + "\n")
+                        continue"""
+
             f.close()
             if MUUG_output:
                 fout_hap_muug.close()
diff --git a/grim/imputation/imputegl/networkx_graph.py b/grim/imputation/imputegl/networkx_graph.py
index c7c834a..01da167 100755
--- a/grim/imputation/imputegl/networkx_graph.py
+++ b/grim/imputation/imputegl/networkx_graph.py
@@ -22,11 +22,11 @@ def __init__(self, config):
 
             # bug: dies if file doesn't exist
             # bug: list_f doesn't exist
-            with open(path + '/nodes_for_plan_a.txt') as list_f:
+            with open(path + "/nodes_for_plan_a.txt") as list_f:
                 for item in list_f:
                     self.nodes_plan_a.append(item.strip())
             # bug: dies if file doesn't exist
-            with open(path + '/nodes_for_plan_b.txt') as list_f:
+            with open(path + "/nodes_for_plan_b.txt") as list_f:
                 for item in list_f:
                     self.nodes_plan_b.append(item.strip())
             # self.nodes_plan_a = pickle.load(open( path + '/nodes_for_plan_a.pkl', "rb"))
@@ -37,22 +37,30 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
         nodesDict = dict()
         # add nodes from file
         with open(nodesFile) as nodesfile:
-            readNodes = csv.reader(nodesfile, delimiter=',')
-            next(readNodes)
+            readNodes = csv.reader(nodesfile, delimiter=",")
+            firstLine = next(readNodes)
             for row in readNodes:
                 if len(row) > 0:
                     if not self.nodes_plan_a or row[2] in self.nodes_plan_a:
-                        self.graph.add_node(row[1], label=row[2], freq=list(map(float, row[3].split(";"))))
+                        self.graph.add_node(
+                            row[1],
+                            label=row[2],
+                            freq=list(map(float, row[3].split(";"))),
+                        )
                     if not self.nodes_plan_b or row[2] in self.nodes_plan_b:
-                        self.whole_graph.add_node(row[1], label=row[2], freq=list(map(float, row[3].split(";"))))
+                        self.whole_graph.add_node(
+                            row[1],
+                            label=row[2],
+                            freq=list(map(float, row[3].split(";"))),
+                        )
                     nodesDict[row[0]] = row[1]
 
         nodesfile.close()
 
         # add edges from file
         with open(edgesFile) as edgesfile:
-            readEdges = csv.reader(edgesfile, delimiter=',')
-            next(readEdges)
+            readEdges = csv.reader(edgesfile, delimiter=",")
+            firstLine = next(readEdges)
             for row in readEdges:
                 if len(row) > 0:
                     node1 = nodesDict[row[0]]
@@ -67,20 +75,22 @@ def build_graph(self, nodesFile, edgesFile, allEdgesFile):
 
         # add edges from file
         with open(allEdgesFile) as allEdgesfile:
-            readEdges = csv.reader(allEdgesfile, delimiter=',')
-            next(readEdges)
+            readEdges = csv.reader(allEdgesfile, delimiter=",")
+            firstLine = next(readEdges)
             for row in readEdges:
                 if len(row) > 0:
                     node1 = nodesDict[row[0]]
                     node2 = nodesDict[row[1]]
-                    if len(self.whole_graph.nodes[node1]['label']) < len(self.whole_graph.nodes[node2]['label']):
-                        connector = self.whole_graph.nodes[node2]['label'] + node1
-                        self.whole_graph.add_edge(node1, connector)
-                        self.whole_graph.add_edge(connector, node2)
-                    else:
-                        connector =  self.whole_graph.nodes[node1]['label'] + node2
-                        self.whole_graph.add_edge(node2, connector)
-                        self.whole_graph.add_edge(connector, node1)
+                    kind = "-".join(
+                        sorted(
+                            [
+                                self.whole_graph.nodes[node1]["label"],
+                                self.whole_graph.nodes[node2]["label"],
+                            ],
+                            key=len,
+                        )
+                    )
+                    self.whole_graph.add_edge(node1, node2, color=kind)
 
         allEdgesfile.close()
         nodesDict.clear()
@@ -109,10 +119,10 @@ def haps_with_probs_by_label(self, label):
         listLabel = self.haps_by_label(label)
         if not self.nodes_plan_a or label in self.nodes_plan_a:
             for allele in listLabel:
-                dictAlleles[allele] = self.graph.nodes[allele]['freq']
+                dictAlleles[allele] = self.graph.nodes[allele]["freq"]
         elif label in self.nodes_plan_b:
             for allele in listLabel:
-                dictAlleles[allele] = self.whole_graph.nodes[allele]['freq']
+                dictAlleles[allele] = self.whole_graph.nodes[allele]["freq"]
 
         return dictAlleles
 
@@ -120,14 +130,13 @@ def haps_with_probs_by_label(self, label):
     def adjs_query(self, alleleList):
         adjDict = dict()
         for allele in alleleList:
-            if allele in self.graph:
-                allele_node = self.graph.nodes[allele]
-                if allele_node["label"] == self.full_loci:  # 'ABCQR':
-                    adjDict[allele] = allele_node['freq']
+            if allele in self.graph.nodes():
+                if self.graph.nodes[allele]["label"] == self.full_loci:  # 'ABCQR':
+                    adjDict[allele] = self.graph.nodes[allele]["freq"]
                 else:
                     adjs = self.graph.adj[allele]
                     for adj in adjs:
-                        adjDict[adj] = self.graph.nodes[adj]['freq']
+                        adjDict[adj] = self.graph.nodes[adj]["freq"]
         return adjDict
 
     # find all adj of alleleList by label
@@ -138,10 +147,29 @@ def adjs_query_by_color(self, alleleList, labelA, labelB):
             return self.node_probs(alleleList, labelA)
 
         for allele in alleleList:
-            if allele in self.whole_graph:
-                alleles = self.whole_graph.adj.get(labelB + allele  , [])
+            if allele in self.whole_graph.nodes():
+                copyLabelA = labelA
+                newLabelA = labelA
+                miss = missing(labelA, labelB)
+                alleles = [allele]
+
+                while len(miss) > 0:
+                    tmpAllels = list()
+                    newLabelA = copyLabelA + miss[0]
+                    newLabelA = "".join(sorted(newLabelA))
+                    del miss[0]
+                    for oneAllel in alleles:
+                        #    alleles.remove(oneAllel)
+                        adjs = self.whole_graph.adj[oneAllel]
+                        label = copyLabelA + "-" + newLabelA
+                        for adj in adjs:
+                            if adjs[adj]["color"] == label:
+                                tmpAllels.append(adj)
+                    alleles = tmpAllels
+                    copyLabelA = newLabelA
+
                 for adj in alleles:
-                    adjDict[adj] = self.whole_graph.nodes[adj]['freq']
+                    adjDict[adj] = self.whole_graph.nodes[adj]["freq"]
         return adjDict
 
     # return dict of nodes and there proper freq
@@ -149,10 +177,10 @@ def node_probs(self, nodes, label):
         nodesDict = dict()
         if not self.nodes_plan_b or label in self.nodes_plan_b:
             for node in nodes:
-                if node in self.whole_graph:
-                    nodesDict[node] = self.whole_graph.nodes[node]['freq']
+                if node in self.whole_graph.nodes():
+                    nodesDict[node] = self.whole_graph.nodes[node]["freq"]
         elif label in self.nodes_plan_a:
             for node in nodes:
-                if node in self.whole_graph:
-                    nodesDict[node] = self.graph.nodes[node]['freq']
+                if node in self.whole_graph.nodes():
+                    nodesDict[node] = self.graph.nodes[node]["freq"]
         return nodesDict
diff --git a/grim/imputation/setup.py b/grim/imputation/setup.py
index f3dbd8a..88708cb 100755
--- a/grim/imputation/setup.py
+++ b/grim/imputation/setup.py
@@ -1,8 +1,8 @@
-'''
+"""
 Created on Feb 16, 2018
 
 @author: mmaiers-nmdp
-'''
+"""
 
 from setuptools import setup, find_packages
 from codecs import open
@@ -11,55 +11,40 @@
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
-with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
+with open(path.join(here, "README.rst"), encoding="utf-8") as f:
     long_description = f.read()
 
 setup(
-    name='imputegl',
-
+    name="imputegl",
     # Versions should comply with PEP440.  For a discussion on single-sourcing
     # the version across setup.py and the project code, see
     # https://packaging.python.org/en/latest/single_source_version.html
-    version='0.0.4',
-
-    description='Imputation based on Neo4j graph',
+    version="0.0.4",
+    description="Imputation based on Neo4j graph",
     long_description=long_description,
-
     # The project's main homepage.
-    url='https://github.com/nmdp-bioinformatics/graph-imputation-match',
-
-    packages=[
-        'imputegl'
-    ],
-    package_dir={'imputegl':
-                 'imputegl'},
+    url="https://github.com/nmdp-bioinformatics/graph-imputation-match",
+    packages=["imputegl"],
+    package_dir={"imputegl": "imputegl"},
     # Author details
-    author_email='mmaiers@nmdp.org',
-
+    author_email="mmaiers@nmdp.org",
     # Choose your license
-    license='LGPL3',
-
+    license="LGPL3",
     # What does your project relate to?
-    keywords='imputation HLA graph',
-
+    keywords="imputation HLA graph",
     # You can just specify the packages manually here if your project is
     # simple. Or you can use find_packages().
-
     # List run-time dependencies here.  These will be installed by pip when
     # your project is installed. For an analysis of "install_requires" vs pip's
     # requirements files see:
     # https://packaging.python.org/en/latest/requirements.html
-    install_requires=['pandas>=1.0.3', 'networkx==2.3'],
-
+    install_requires=["pandas>=1.0.3", "networkx==2.3"],
     # List additional groups of dependencies here (e.g. development
     # dependencies). You can install these using the following syntax,
     # for example:
     # $ pip install -e .[dev,test]
     extras_require={
-        'dev': ['check-manifest'],
-        'test': ['coverage'],
+        "dev": ["check-manifest"],
+        "test": ["coverage"],
     },
-
 )
-
-
diff --git a/grim/model/allele.py b/grim/model/allele.py
deleted file mode 100644
index a13b8cc..0000000
--- a/grim/model/allele.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# -*- coding: utf-8 -*-
-
-#
-#    grim Graph Imputation
-#    Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
-#
-#    This library is free software; you can redistribute it and/or modify it
-#    under the terms of the GNU Lesser General Public License as published
-#    by the Free Software Foundation; either version 3 of the License, or (at
-#    your option) any later version.
-#
-#    This library is distributed in the hope that it will be useful, but WITHOUT
-#    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-#    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-#    License for more details.
-#
-#    You should have received a copy of the GNU Lesser General Public License
-#    along with this library;  if not, write to the Free Software Foundation,
-#    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.
-#
-#    > http://www.fsf.org/licensing/licenses/lgpl.html
-#    > http://www.opensource.org/licenses/lgpl-license.php
-#
-
-
-class Allele:
-    """
-    A Class I allele is an allele of Locus 'HLA-A', 'HLA-B', 'HLA-C'
-    """
-
-    def __init__(self, locus: str, allele: str):
-        """
-        Validate the Class I locus and allele first.
-
-        @param locus: Class I Locus
-        @param allele: Class I Allele
-        """
-        self.validate(locus, allele)
-        self.allele = allele
-        self.locus = locus
-
-    def __eq__(self, other: object) -> bool:
-        if isinstance(other, Allele):
-            return self.locus == other.locus and self.allele == other.allele
-        return False
-
-    def __lt__(self, other):
-        if isinstance(other, Allele):
-            return self.allele < other.allele
-        return False
-
-    def __str__(self) -> str:
-        if self.allele.startswith("HLA-"):
-            return self.allele
-        return f"{self.locus}*{self.locus}"
-
-    @staticmethod
-    def validate(locus: str, allele: str):
-        if locus not in ["HLA-A", "HLA-B", "HLA-C"]:
-            raise InvalidAllele(f"{locus} is Not a valid locus")
-        if "*" not in allele and ":" not in allele:
-            raise InvalidAllele("f{allele} is Not a valid allele")
-
-    @classmethod
-    def from_fullname(cls, allele):
-        if not allele.startswith("HLA-"):
-            raise InvalidAllele(f"{allele} is not a fully-named allele")
-        locus = allele.split("*")[0]
-        return Allele(locus, allele)
-
-
-class InvalidAllele(Exception):
-    def __init__(self, message: str) -> None:
-        self.message = message
diff --git a/grim/model/slug.py b/grim/model/slug.py
deleted file mode 100644
index 90f87be..0000000
--- a/grim/model/slug.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# -*- coding: utf-8 -*-
-
-#
-#    grim Graph Imputation
-#    Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
-#
-#    This library is free software; you can redistribute it and/or modify it
-#    under the terms of the GNU Lesser General Public License as published
-#    by the Free Software Foundation; either version 3 of the License, or (at
-#    your option) any later version.
-#
-#    This library is distributed in the hope that it will be useful, but WITHOUT
-#    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-#    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-#    License for more details.
-#
-#    You should have received a copy of the GNU Lesser General Public License
-#    along with this library;  if not, write to the Free Software Foundation,
-#    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.
-#
-#    > http://www.fsf.org/licensing/licenses/lgpl.html
-#    > http://www.opensource.org/licenses/lgpl-license.php
-#
-
-from .allele import Allele
-
-
-class SLUG:
-    """
-    SLUGs are Single Locus Un-phased Genotypes made up of a pair of alleles.
-    The alleles are lexicographically sorted.
-    """
-
-    def __init__(self, allele_1: Allele, allele_2: Allele) -> None:
-        """
-        SLUGs are single locus genotypes with alleles in sorted order
-
-        @param allele_1:
-        @param allele_2:
-        """
-        if allele_1 < allele_2:
-            self.allele_1 = allele_1
-            self.allele_2 = allele_2
-        else:
-            self.allele_1 = allele_2
-            self.allele_2 = allele_1
-
-    def __str__(self) -> str:
-        """
-        String version of SLUG is in GL String format
-
-        @return: gl-string format of SLUG
-        """
-        return f"{self.allele_1}+{self.allele_2}"
-
-    def __eq__(self, other: object) -> bool:
-        """
-        2 SLUGs are equal if the individual alleles are equal
-        to each other.
-
-        @param other:
-        @return: True indicating whether 2 SLUGS are equal
-        """
-        if isinstance(other, SLUG):
-            return self.allele_1 == other.allele_1 and self.allele_2 == other.allele_2
-        return False
-
-    @classmethod
-    def from_glstring(cls, slug):
-        allele_1, allele_2 = slug.split("+")
-        slug = SLUG(Allele.from_fullname(allele_1), Allele.from_fullname(allele_2))
-        return slug
diff --git a/grim/validation/parallel-imputation.py b/grim/validation/parallel-imputation.py
index f935394..7efc7cb 100755
--- a/grim/validation/parallel-imputation.py
+++ b/grim/validation/parallel-imputation.py
@@ -18,17 +18,23 @@
 pr.enable()
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-c", "--config",
-                    required=False,
-                    default="../minimal-configuration.json",
-                    help="Configuration JSON file",
-                    type=str)
-
-parser.add_argument("-p", "--processes",
-                    required=False,
-                    default=0,
-                    help="Number of processes to use. Defaults to total cores - 1",
-                    type=int)
+parser.add_argument(
+    "-c",
+    "--config",
+    required=False,
+    default="../minimal-configuration.json",
+    help="Configuration JSON file",
+    type=str,
+)
+
+parser.add_argument(
+    "-p",
+    "--processes",
+    required=False,
+    default=0,
+    help="Number of processes to use. Defaults to total cores - 1",
+    type=int,
+)
 
 args = parser.parse_args()
 configuration_file = args.config
@@ -42,51 +48,67 @@
     json_conf = json.load(f)
 
 config = {
-    "planb": json_conf.get('planb', True),
-    "pops": json_conf.get('populations'),
-    "priority": json_conf.get('priority'),
-    "epsilon": json_conf.get('epsilon', 1e-3),
-    "number_of_results": json_conf.get('number_of_results', 1000),
-    "number_of_pop_results": json_conf.get('number_of_pop_results', 100),
+    "planb": json_conf.get("planb", True),
+    "pops": json_conf.get("populations"),
+    "priority": json_conf.get("priority"),
+    "epsilon": json_conf.get("epsilon", 1e-3),
+    "number_of_results": json_conf.get("number_of_results", 1000),
+    "number_of_pop_results": json_conf.get("number_of_pop_results", 100),
     "output_MUUG": json_conf.get("output_MUUG", True),
     "output_haplotypes": json_conf.get("output_haplotypes", False),
     "node_file": project_dir + json_conf.get("node_csv_file"),
     "top_links_file": project_dir + json_conf.get("top_links_csv_file"),
     "edges_file": project_dir + json_conf.get("edges_csv_file"),
     "imputation_input_file": project_dir + json_conf.get("imputation_in_file"),
-    "imputation_out_umug_freq_file": output_dir + json_conf.get("imputation_out_umug_freq_filename"),
-    "imputation_out_umug_pops_file": output_dir + json_conf.get("imputation_out_umug_pops_filename"),
-    "imputation_out_hap_freq_file": output_dir + json_conf.get("imputation_out_hap_freq_filename"),
-    "imputation_out_hap_pops_file": output_dir + json_conf.get("imputation_out_hap_pops_filename"),
-    "imputation_out_miss_file": output_dir + json_conf.get("imputation_out_miss_filename"),
-    "imputation_out_problem_file": output_dir + json_conf.get("imputation_out_problem_filename"),
+    "imputation_out_umug_freq_file": output_dir
+    + json_conf.get("imputation_out_umug_freq_filename"),
+    "imputation_out_umug_pops_file": output_dir
+    + json_conf.get("imputation_out_umug_pops_filename"),
+    "imputation_out_hap_freq_file": output_dir
+    + json_conf.get("imputation_out_hap_freq_filename"),
+    "imputation_out_hap_pops_file": output_dir
+    + json_conf.get("imputation_out_hap_pops_filename"),
+    "imputation_out_miss_file": output_dir
+    + json_conf.get("imputation_out_miss_filename"),
+    "imputation_out_problem_file": output_dir
+    + json_conf.get("imputation_out_problem_filename"),
     "factor_missing_data": json_conf.get("factor_missing_data", 0.01),
-    "loci_map": json_conf.get("loci_map", {"A": 1, "B":3, "C": 2, "DQB1": 4, "DRB1": 5} ),
-    "matrix_planb": json_conf.get("Plan_B_Matrix", [
-        [[1, 2, 3, 4, 5]],
-        [[1, 2, 3], [4, 5]],
-        [[1], [2, 3], [4, 5]],
-        [[1, 2, 3], [4], [5]],
-        [[1], [2, 3], [4], [5]],
-        [[1], [2], [3], [4], [5]]
-    ]),
-    "pops_count_file": project_dir + json_conf.get("pops_count_file", ''),
+    "loci_map": json_conf.get(
+        "loci_map", {"A": 1, "B": 3, "C": 2, "DQB1": 4, "DRB1": 5}
+    ),
+    "matrix_planb": json_conf.get(
+        "Plan_B_Matrix",
+        [
+            [[1, 2, 3, 4, 5]],
+            [[1, 2, 3], [4, 5]],
+            [[1], [2, 3], [4, 5]],
+            [[1, 2, 3], [4], [5]],
+            [[1], [2, 3], [4], [5]],
+            [[1], [2], [3], [4], [5]],
+        ],
+    ),
+    "pops_count_file": project_dir + json_conf.get("pops_count_file", ""),
     "use_pops_count_file": json_conf.get("pops_count_file", False),
     "number_of_options_threshold": json_conf.get("number_of_options_threshold", 100000),
-    "max_haplotypes_number_in_phase": json_conf.get("max_haplotypes_number_in_phase", 100),
-    "bin_imputation_input_file": project_dir + json_conf.get("bin_imputation_in_file", "None"),
+    "max_haplotypes_number_in_phase": json_conf.get(
+        "max_haplotypes_number_in_phase", 100
+    ),
+    "bin_imputation_input_file": project_dir
+    + json_conf.get("bin_imputation_in_file", "None"),
     "nodes_for_plan_A": json_conf.get("Plan_A_Matrix", []),
-    "save_mode": json_conf.get("save_space_mode", False)
+    "save_mode": json_conf.get("save_space_mode", False),
 }
 
 all_loci_set = set()
 for _, val in config["loci_map"].items():
     all_loci_set.add(str(val))
 
-config["full_loci"] = ''.join(sorted(all_loci_set))
+config["full_loci"] = "".join(sorted(all_loci_set))
 
 # Display the configurations we are using
-print('****************************************************************************************************')
+print(
+    "****************************************************************************************************"
+)
 print("\tLoci: {}".format(config["full_loci"]))
 print("\tPopulation: {}".format(config["pops"]))
 print("\tPriority: {}".format(config["priority"]))
@@ -111,17 +133,25 @@
 print("\tPops Count File: {}".format(config["pops_count_file"]))
 print("\tUse Pops Count File: {}".format(config["use_pops_count_file"]))
 print("\tNumber of Options Threshold: {}".format(config["number_of_options_threshold"]))
-print("\tMax Number of haplotypes in phase: {}".format(config["max_haplotypes_number_in_phase"]))
+print(
+    "\tMax Number of haplotypes in phase: {}".format(
+        config["max_haplotypes_number_in_phase"]
+    )
+)
 print("\tbin_imputation_input_file: {}".format(config["bin_imputation_input_file"]))
 if config["nodes_for_plan_A"]:
     print("\tNodes in plan A: {}".format(config["nodes_for_plan_A"]))
 print("\tSave space mode: {}".format(config["save_mode"]))
-print('****************************************************************************************************')
+print(
+    "****************************************************************************************************"
+)
 
 if num_of_cpus == 0:
     # Use all the CPUs but 1
     num_of_cpus = os.cpu_count() - 1
-print("Using {} processes with parent PID {}".format(num_of_cpus, os.getpid()), flush=True)
+print(
+    "Using {} processes with parent PID {}".format(num_of_cpus, os.getpid()), flush=True
+)
 
 priority = config["priority"]
 epsilon = config["epsilon"]
@@ -141,14 +171,23 @@
 
 def perform_impute_one(*args):
     subject_id, gl, race1, race2 = args[0]
-    subject_bin =  [1] *(len(config["full_loci"]) - 1)
+    subject_bin = [1] * (len(config["full_loci"]) - 1)
     try:
         imputation = Imputation(graph, copy.deepcopy(config))
-        impute_results = imputation.impute_one(subject_id,
-                                               gl, subject_bin, race1, race2,
-                                               priority, epsilon, 1000,
-                                               output_umug, output_haplotypes,
-                                               planb, em=False)
+        impute_results = imputation.impute_one(
+            subject_id,
+            gl,
+            subject_bin,
+            race1,
+            race2,
+            priority,
+            epsilon,
+            1000,
+            output_umug,
+            output_haplotypes,
+            planb,
+            em=False,
+        )
         imputation.print_options_count(subject_id)
     except MemoryError as e:
         print("Memory Error for subject_id: ", subject_id)
@@ -159,28 +198,29 @@ def perform_impute_one(*args):
 
 
 def imputation_record_from(imputation_file):
-    if not imputation_file.endswith('.gz'):
-        with open(imputation_file, 'r') as imputation_file_unzipped:
+    if not imputation_file.endswith(".gz"):
+        with open(imputation_file, "r") as imputation_file_unzipped:
             for line in imputation_file_unzipped:
-                subject_id, gl, race1, race2 = line.rstrip().split(',')
+                subject_id, gl, race1, race2 = line.rstrip().split(",")
                 yield subject_id, gl, race1, race2
     else:
-        with gzip.open(imputation_file, 'rt') as imputation_gzip:
+        with gzip.open(imputation_file, "rt") as imputation_gzip:
             for line in imputation_gzip:
-                subject_id, gl, race1, race2 = line.rstrip().split(',')
+                subject_id, gl, race1, race2 = line.rstrip().split(",")
                 yield subject_id, gl, race1, race2
 
+
 # Create output directory if it doesn't exist
 pathlib.Path(output_dir).mkdir(parents=False, exist_ok=True)
 
 problem_ids = []
 miss_ids = []
 if MUUG_output:
-    fout_hap_muug = open(config["imputation_out_umug_freq_file"], 'w')
-    fout_pop_muug = open(config["imputation_out_umug_pops_file"], 'w')
+    fout_hap_muug = open(config["imputation_out_umug_freq_file"], "w")
+    fout_pop_muug = open(config["imputation_out_umug_pops_file"], "w")
 if haps_output:
-    fout_hap_haplo = open(config["imputation_out_hap_freq_file"], 'w')
-    fout_pop_haplo = open(config["imputation_out_hap_pops_file"], 'w')
+    fout_hap_haplo = open(config["imputation_out_hap_freq_file"], "w")
+    fout_pop_haplo = open(config["imputation_out_hap_pops_file"], "w")
 
 with Pool(processes=num_of_cpus) as pool:
     subject_records = imputation_record_from(imputation_infile)
@@ -190,28 +230,42 @@ def imputation_record_from(imputation_file):
             problem_ids.append(subject_id)
             continue
 
-        if (len(haps_results['Haps']) == 0 or haps_results['Haps'] == 'NaN') and len(umug_results['Haps']) == 0:
+        if (len(haps_results["Haps"]) == 0 or haps_results["Haps"] == "NaN") and len(
+            umug_results["Haps"]
+        ) == 0:
             miss_ids.append(subject_id)
 
         if haps_output:
-            haps = haps_results['Haps']
-            probs = haps_results['Probs']
-            pops = haps_results['Pops']
-            print("Subject: {id} {hap_length} haplotypes".format(id=subject_id, hap_length=len(haps)))
+            haps = haps_results["Haps"]
+            probs = haps_results["Probs"]
+            pops = haps_results["Pops"]
+            print(
+                "Subject: {id} {hap_length} haplotypes".format(
+                    id=subject_id, hap_length=len(haps)
+                )
+            )
             write_best_prob(subject_id, haps, probs, number_of_results, fout_hap_haplo)
-            write_best_prob(subject_id, pops, probs, number_of_pop_results, fout_pop_haplo)
+            write_best_prob(
+                subject_id, pops, probs, number_of_pop_results, fout_pop_haplo
+            )
 
         if MUUG_output:
-            haps = umug_results['Haps']
-            pops = umug_results['Pops']
-            print("Subject: {id} {hap_length} haplotypes".format(id=subject_id, hap_length=len(haps)))
+            haps = umug_results["Haps"]
+            pops = umug_results["Pops"]
+            print(
+                "Subject: {id} {hap_length} haplotypes".format(
+                    id=subject_id, hap_length=len(haps)
+                )
+            )
             write_best_prob_genotype(subject_id, haps, number_of_results, fout_hap_muug)
-            write_best_prob_genotype(subject_id, pops, number_of_pop_results, fout_pop_muug)
+            write_best_prob_genotype(
+                subject_id, pops, number_of_pop_results, fout_pop_muug
+            )
 
 num_of_misses = len(miss_ids)
 print("Missing: {}".format(num_of_misses))
 if num_of_misses > 0:
-    with open(config["imputation_out_miss_file"], 'w') as miss_csv_file:
+    with open(config["imputation_out_miss_file"], "w") as miss_csv_file:
         miss_writer = csv.writer(miss_csv_file)
         for miss_id in miss_ids:
             miss_writer.writerow([miss_id])
@@ -219,7 +273,7 @@ def imputation_record_from(imputation_file):
 num_of_problems = len(problem_ids)
 print("Problems: {}".format(num_of_problems))
 if num_of_problems > 0:
-    with open(config["imputation_out_problem_file"], 'w') as problem_csv_file:
+    with open(config["imputation_out_problem_file"], "w") as problem_csv_file:
         problem_writer = csv.writer(problem_csv_file)
         for problem_id in problem_ids:
             problem_writer.writerows([problem_id])
diff --git a/grim/validation/reduce_loci.py b/grim/validation/reduce_loci.py
index 503e082..4a0e51d 100755
--- a/grim/validation/reduce_loci.py
+++ b/grim/validation/reduce_loci.py
@@ -8,43 +8,55 @@ def write_best_prob_genotype(name_gl, res, fout, numOfResult=10):
     # write the output to file
     minBestResult = min(numOfResult, len(sorted_by_value))
     for k in range(minBestResult):
-        fout.write(name_gl + ',' + str(sorted_by_value[k][0]) + ',' +
-                   str(sorted_by_value[k][1]) + ',' + str(k) + '\n')
+        fout.write(
+            name_gl
+            + ","
+            + str(sorted_by_value[k][0])
+            + ","
+            + str(sorted_by_value[k][1])
+            + ","
+            + str(k)
+            + "\n"
+        )
+
 
 def convert_res_of_6_to_5(file_in, file_out):
     dict_res = {}
     with open(file_in) as six_file:
         for line in six_file:
-            id, gl, prob, rank = line.strip().split(',')
-            gl = ('^').join(gl.split('^')[:-1])
+            id, gl, prob, rank = line.strip().split(",")
+            gl = ("^").join(gl.split("^")[:-1])
             if not id in dict_res:
                 dict_res[id] = {}
             if gl in dict_res[id]:
-                dict_res[id][gl]+= float(prob)
+                dict_res[id][gl] += float(prob)
             else:
                 dict_res[id][gl] = float(prob)
 
     six_file.close()
 
-    f_out = open(file_out, 'w')
+    f_out = open(file_out, "w")
     for id in dict_res:
         write_best_prob_genotype(id, dict_res[id], f_out)
     f_out.close()
 
 
-
-
 parser = argparse.ArgumentParser()
-parser.add_argument("-c", "--config",
-                    required=False,
-                    default="../conf/minimal-configuration.json",
-                    help="Configuration JSON file",
-                    type=str)
+parser.add_argument(
+    "-c",
+    "--config",
+    required=False,
+    default="../conf/minimal-configuration.json",
+    help="Configuration JSON file",
+    type=str,
+)
 
 args = parser.parse_args()
 configuration_file = args.config
 with open(configuration_file) as f:
     json_conf = json.load(f)
-imputation_out_muug_freqs =  'output/' + json_conf.get("imputation_out_umug_freq_filename")
+imputation_out_muug_freqs = "output/" + json_conf.get(
+    "imputation_out_umug_freq_filename"
+)
 
-convert_res_of_6_to_5(imputation_out_muug_freqs, imputation_out_muug_freqs)
\ No newline at end of file
+convert_res_of_6_to_5(imputation_out_muug_freqs, imputation_out_muug_freqs)
diff --git a/grim/validation/runfile.py b/grim/validation/runfile.py
index 16e7b82..bf44bf5 100644
--- a/grim/validation/runfile.py
+++ b/grim/validation/runfile.py
@@ -11,69 +11,97 @@
 from ..imputation.imputegl.networkx_graph import Graph
 
 # Profiler start
-#pr = cProfile.Profile()
-#pr.enable()
+# pr = cProfile.Profile()
+# pr.enable()
 
-def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_graph = "", project_dir_in_file = ""):
 
-    configuration_file = conf_file
+def run_impute(
+    conf_file="../conf/minimal-configuration.json",
+    project_dir_graph="",
+    project_dir_in_file="",
+):
 
-    #project_dir = ""# "../"
-    #output_dir = "output/"
+    configuration_file = conf_file
 
+    # project_dir = ""# "../"
+    # output_dir = "output/"
 
     # Read configuration file and load properties
     with open(configuration_file) as f:
         json_conf = json.load(f)
 
     graph_files_path = json_conf.get("graph_files_path")
-    if graph_files_path[-1] != '/':
-        graph_files_path += '/'
+    if graph_files_path[-1] != "/":
+        graph_files_path += "/"
     output_dir = json_conf.get("imuptation_out_path", "output")
-    if output_dir[-1] != '/':
-        output_dir += '/'
+    if output_dir[-1] != "/":
+        output_dir += "/"
     config = {
-        "planb": json_conf.get('planb', True),
-        "pops": json_conf.get('populations'),
-        "priority": json_conf.get('priority'),
-        "epsilon": json_conf.get('epsilon', 1e-3),
-        "number_of_results": json_conf.get('number_of_results', 1000),
-        "number_of_pop_results": json_conf.get('number_of_pop_results', 100),
+        "planb": json_conf.get("planb", True),
+        "pops": json_conf.get("populations"),
+        "priority": json_conf.get("priority"),
+        "epsilon": json_conf.get("epsilon", 1e-3),
+        "number_of_results": json_conf.get("number_of_results", 1000),
+        "number_of_pop_results": json_conf.get("number_of_pop_results", 100),
         "output_MUUG": json_conf.get("output_MUUG", True),
         "output_haplotypes": json_conf.get("output_haplotypes", False),
-        "node_file": project_dir_graph + graph_files_path + json_conf.get("node_csv_file"),
-        "top_links_file": project_dir_graph + graph_files_path + json_conf.get("top_links_csv_file"),
-        "edges_file": project_dir_graph + graph_files_path +json_conf.get("edges_csv_file"),
-        "imputation_input_file": project_dir_in_file + json_conf.get("imputation_in_file"),
-        "imputation_out_umug_freq_file": output_dir + json_conf.get("imputation_out_umug_freq_filename"),
-        "imputation_out_umug_pops_file": output_dir + json_conf.get("imputation_out_umug_pops_filename"),
-        "imputation_out_hap_freq_file": output_dir + json_conf.get("imputation_out_hap_freq_filename"),
-        "imputation_out_hap_pops_file": output_dir + json_conf.get("imputation_out_hap_pops_filename"),
-        "imputation_out_miss_file": output_dir + json_conf.get("imputation_out_miss_filename"),
-        "imputation_out_problem_file": output_dir + json_conf.get("imputation_out_problem_filename"),
+        "node_file": project_dir_graph
+        + graph_files_path
+        + json_conf.get("node_csv_file"),
+        "top_links_file": project_dir_graph
+        + graph_files_path
+        + json_conf.get("top_links_csv_file"),
+        "edges_file": project_dir_graph
+        + graph_files_path
+        + json_conf.get("edges_csv_file"),
+        "imputation_input_file": project_dir_in_file
+        + json_conf.get("imputation_in_file"),
+        "imputation_out_umug_freq_file": output_dir
+        + json_conf.get("imputation_out_umug_freq_filename"),
+        "imputation_out_umug_pops_file": output_dir
+        + json_conf.get("imputation_out_umug_pops_filename"),
+        "imputation_out_hap_freq_file": output_dir
+        + json_conf.get("imputation_out_hap_freq_filename"),
+        "imputation_out_hap_pops_file": output_dir
+        + json_conf.get("imputation_out_hap_pops_filename"),
+        "imputation_out_miss_file": output_dir
+        + json_conf.get("imputation_out_miss_filename"),
+        "imputation_out_problem_file": output_dir
+        + json_conf.get("imputation_out_problem_filename"),
         "factor_missing_data": json_conf.get("factor_missing_data", 0.01),
-        "loci_map": json_conf.get("loci_map", {"A": 1, "B":3, "C": 2, "DQB1": 4, "DRB1": 5} ),
-        "matrix_planb": json_conf.get("Plan_B_Matrix", [
-                    [[1, 2, 3, 4, 5]],
-                    [[1, 2, 3], [4, 5]],
-                    [[1], [2, 3], [4, 5]],
-                    [[1, 2, 3], [4], [5]],
-                    [[1], [2, 3], [4], [5]],
-                    [[1], [2], [3], [4], [5]]
-                ]),
-        "pops_count_file": project_dir_graph + json_conf.get("pops_count_file",'' ),
-        "use_pops_count_file": json_conf.get("pops_count_file",False),
-        "number_of_options_threshold": json_conf.get("number_of_options_threshold", 100000),
-        "max_haplotypes_number_in_phase": json_conf.get("max_haplotypes_number_in_phase",100 ),
-        "bin_imputation_input_file": project_dir_in_file + json_conf.get("bin_imputation_in_file", "None"),
+        "loci_map": json_conf.get(
+            "loci_map", {"A": 1, "B": 3, "C": 2, "DQB1": 4, "DRB1": 5}
+        ),
+        "matrix_planb": json_conf.get(
+            "Plan_B_Matrix",
+            [
+                [[1, 2, 3, 4, 5]],
+                [[1, 2, 3], [4, 5]],
+                [[1], [2, 3], [4, 5]],
+                [[1, 2, 3], [4], [5]],
+                [[1], [2, 3], [4], [5]],
+                [[1], [2], [3], [4], [5]],
+            ],
+        ),
+        "pops_count_file": project_dir_graph + json_conf.get("pops_count_file", ""),
+        "use_pops_count_file": json_conf.get("pops_count_file", False),
+        "number_of_options_threshold": json_conf.get(
+            "number_of_options_threshold", 100000
+        ),
+        "max_haplotypes_number_in_phase": json_conf.get(
+            "max_haplotypes_number_in_phase", 100
+        ),
+        "bin_imputation_input_file": project_dir_in_file
+        + json_conf.get("bin_imputation_in_file", "None"),
         "nodes_for_plan_A": json_conf.get("Plan_A_Matrix", []),
         "save_mode": json_conf.get("save_space_mode", False),
-        "UNK_priors" : json_conf.get("UNK_priors", "MR")
-
+        "UNK_priors": json_conf.get("UNK_priors", "MR"),
     }
 
     # Display the configurations we are using
-    print('****************************************************************************************************')
+    print(
+        "****************************************************************************************************"
+    )
     print("Performing imputation based on:")
     print("\tPopulation: {}".format(config["pops"]))
     print("\tPriority: {}".format(config["priority"]))
@@ -86,11 +114,23 @@ def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_gra
     print("\tTop Links File: {}".format(config["edges_file"]))
     print("\tInput File: {}".format(config["imputation_input_file"]))
     print("\tOutput UMUG Format: {}".format(config["output_MUUG"]))
-    print("\tOutput UMUG Freq Filename: {}".format(config["imputation_out_umug_freq_file"]))
-    print("\tOutput UMUG Pops Filename: {}".format(config["imputation_out_umug_pops_file"]))
+    print(
+        "\tOutput UMUG Freq Filename: {}".format(
+            config["imputation_out_umug_freq_file"]
+        )
+    )
+    print(
+        "\tOutput UMUG Pops Filename: {}".format(
+            config["imputation_out_umug_pops_file"]
+        )
+    )
     print("\tOutput Haplotype Format: {}".format(config["output_haplotypes"]))
-    print("\tOutput HAP Freq Filename: {}".format(config["imputation_out_hap_freq_file"]))
-    print("\tOutput HAP Pops Filename: {}".format(config["imputation_out_hap_pops_file"]))
+    print(
+        "\tOutput HAP Freq Filename: {}".format(config["imputation_out_hap_freq_file"])
+    )
+    print(
+        "\tOutput HAP Pops Filename: {}".format(config["imputation_out_hap_pops_file"])
+    )
     print("\tOutput Miss Filename: {}".format(config["imputation_out_miss_file"]))
     print("\tOutput Problem Filename: {}".format(config["imputation_out_problem_file"]))
     print("\tFactor Missing Data: {}".format(config["factor_missing_data"]))
@@ -98,22 +138,33 @@ def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_gra
     print("\tPlan B Matrix: {}".format(config["matrix_planb"]))
     print("\tPops Count File: {}".format(config["pops_count_file"]))
     print("\tUse Pops Count File: {}".format(config["use_pops_count_file"]))
-    print("\tNumber of Options Threshold: {}".format(config["number_of_options_threshold"]))
-    print("\tMax Number of haplotypes in phase: {}".format(config["max_haplotypes_number_in_phase"]))
+    print(
+        "\tNumber of Options Threshold: {}".format(
+            config["number_of_options_threshold"]
+        )
+    )
+    print(
+        "\tMax Number of haplotypes in phase: {}".format(
+            config["max_haplotypes_number_in_phase"]
+        )
+    )
     if config["nodes_for_plan_A"]:
         print("\tNodes in plan A: {}".format(config["nodes_for_plan_A"]))
     print("\tSave space mode: {}".format(config["save_mode"]))
-    print('****************************************************************************************************')
-
+    print(
+        "****************************************************************************************************"
+    )
 
     all_loci_set = set()
     for _, val in config["loci_map"].items():
         all_loci_set.add(str(val))
 
-    config["full_loci"] = ''.join(sorted(all_loci_set))
+    config["full_loci"] = "".join(sorted(all_loci_set))
     # Perform imputation
     graph = Graph(config)
-    graph.build_graph(config["node_file"], config["top_links_file"], config["edges_file"])
+    graph.build_graph(
+        config["node_file"], config["top_links_file"], config["edges_file"]
+    )
     imputation = Imputation(graph, config)
 
     # Create output directory if it doesn't exist
@@ -123,5 +174,5 @@ def run_impute(conf_file = "../conf/minimal-configuration.json", project_dir_gra
     imputation.impute_file(config)
 
     # Profiler end
-    #pr.disable()
-    #pr.print_stats(sort="time")
+    # pr.disable()
+    # pr.print_stats(sort="time")
diff --git a/grim/validation/runfile_mp.py b/grim/validation/runfile_mp.py
index 5d136a9..79de5c0 100755
--- a/grim/validation/runfile_mp.py
+++ b/grim/validation/runfile_mp.py
@@ -15,17 +15,18 @@
 pr.enable()
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-c", "--config",
-                    required=False,
-                    default="../minimal-configuration.json",
-                    help="Configuration JSON file",
-                    type=str)
-
-parser.add_argument("-d", "--processes",
-                    required=False,
-                    default=4,
-                    help="number of processes",
-                    type=int)
+parser.add_argument(
+    "-c",
+    "--config",
+    required=False,
+    default="../minimal-configuration.json",
+    help="Configuration JSON file",
+    type=str,
+)
+
+parser.add_argument(
+    "-d", "--processes", required=False, default=4, help="number of processes", type=int
+)
 
 args = parser.parse_args()
 configuration_file = args.config
@@ -39,28 +40,36 @@
     json_conf = json.load(f)
 
 config = {
-    "planb": json_conf.get('planb', True),
-    "pops": json_conf.get('populations'),
-    "priority": json_conf.get('priority'),
-    "epsilon": json_conf.get('epsilon', 1e-3),
-    "number_of_results": json_conf.get('number_of_results', 1000),
-    "number_of_pop_results": json_conf.get('number_of_pop_results', 100),
+    "planb": json_conf.get("planb", True),
+    "pops": json_conf.get("populations"),
+    "priority": json_conf.get("priority"),
+    "epsilon": json_conf.get("epsilon", 1e-3),
+    "number_of_results": json_conf.get("number_of_results", 1000),
+    "number_of_pop_results": json_conf.get("number_of_pop_results", 100),
     "output_MUUG": json_conf.get("output_MUUG", True),
     "output_haplotypes": json_conf.get("output_haplotypes", False),
     "node_file": project_dir + json_conf.get("node_csv_file"),
     "top_links_file": project_dir + json_conf.get("top_links_csv_file"),
     "edges_file": project_dir + json_conf.get("edges_csv_file"),
     "imputation_input_file": project_dir + json_conf.get("imputation_in_file"),
-    "imputation_out_umug_freq_file": output_dir + json_conf.get("imputation_out_umug_freq_filename"),
-    "imputation_out_umug_pops_file": output_dir + json_conf.get("imputation_out_umug_pops_filename"),
-    "imputation_out_hap_freq_file": output_dir + json_conf.get("imputation_out_hap_freq_filename"),
-    "imputation_out_hap_pops_file": output_dir + json_conf.get("imputation_out_hap_pops_filename"),
-    "imputation_out_miss_file": output_dir + json_conf.get("imputation_out_miss_filename"),
-    "imputation_out_problem_file": output_dir + json_conf.get("imputation_out_problem_filename")
+    "imputation_out_umug_freq_file": output_dir
+    + json_conf.get("imputation_out_umug_freq_filename"),
+    "imputation_out_umug_pops_file": output_dir
+    + json_conf.get("imputation_out_umug_pops_filename"),
+    "imputation_out_hap_freq_file": output_dir
+    + json_conf.get("imputation_out_hap_freq_filename"),
+    "imputation_out_hap_pops_file": output_dir
+    + json_conf.get("imputation_out_hap_pops_filename"),
+    "imputation_out_miss_file": output_dir
+    + json_conf.get("imputation_out_miss_filename"),
+    "imputation_out_problem_file": output_dir
+    + json_conf.get("imputation_out_problem_filename"),
 }
 
 # Display the configurations we are using
-print('****************************************************************************************************')
+print(
+    "****************************************************************************************************"
+)
 print("Performing imputation based on:")
 print("\tPopulation: {}".format(config["pops"]))
 print("\tPriority: {}".format(config["priority"]))
@@ -79,7 +88,9 @@
 print("\tOutput HAP Pops Filename: {}".format(config["imputation_out_hap_pops_file"]))
 print("\tOutput Miss Filename: {}".format(config["imputation_out_miss_file"]))
 print("\tOutput Problem Filename: {}".format(config["imputation_out_problem_file"]))
-print('****************************************************************************************************')
+print(
+    "****************************************************************************************************"
+)
 
 # Perform imputation
 graph = Graph()
@@ -95,12 +106,20 @@
 in_dir = os.path.dirname(input_file)
 in_file_basename = os.path.basename(input_file)
 
-num_subjects = os.popen('wc -l ' + config["imputation_input_file"]).read()
+num_subjects = os.popen("wc -l " + config["imputation_input_file"]).read()
 num_subjects = int(num_subjects.strip().split(" ")[0])
 print(num_subjects)
 
-split_cmd = 'split  -l ' + str(
-    int(math.ceil(num_subjects / num_processes))) + " " + input_file + " " + in_dir + "/" + in_file_basename[0:4]
+split_cmd = (
+    "split  -l "
+    + str(int(math.ceil(num_subjects / num_processes)))
+    + " "
+    + input_file
+    + " "
+    + in_dir
+    + "/"
+    + in_file_basename[0:4]
+)
 
 os.system(split_cmd)
 
@@ -115,12 +134,12 @@
     config["imputation_input_file"] = in_file
     out_file = output_dir + os.path.basename(in_file)
 
-    config["imputation_out_umug_freq_file"] = out_file + '.umug.freqs'
-    config["imputation_out_umug_pops_file"] = out_file + '.umug.pops'
-    config["imputation_out_hap_freq_file"] = out_file + '.hap.freqs'
-    config["imputation_out_hap_pops_file"] = out_file + '.hap.pops'
-    config["imputation_out_miss_file"] = out_file + '.miss'
-    config["imputation_out_problem_file"] = out_file + '.problem'
+    config["imputation_out_umug_freq_file"] = out_file + ".umug.freqs"
+    config["imputation_out_umug_pops_file"] = out_file + ".umug.pops"
+    config["imputation_out_hap_freq_file"] = out_file + ".hap.freqs"
+    config["imputation_out_hap_pops_file"] = out_file + ".hap.pops"
+    config["imputation_out_miss_file"] = out_file + ".miss"
+    config["imputation_out_problem_file"] = out_file + ".problem"
     t = Process(target=imputation.impute_file, args=(config,))
     t.start()
     processes.append(t)
diff --git a/grim/validation/runfile_mt.py b/grim/validation/runfile_mt.py
index a71f2c1..f7dffd2 100755
--- a/grim/validation/runfile_mt.py
+++ b/grim/validation/runfile_mt.py
@@ -4,7 +4,7 @@
 import os
 import pathlib
 import threading
-import string,math
+import string, math
 
 from imputegl import Imputation
 from imputegl.networkx_graph import Graph
@@ -14,17 +14,18 @@
 pr.enable()
 
 parser = argparse.ArgumentParser()
-parser.add_argument("-c", "--config",
-                    required=False,
-                    default="../minimal-configuration.json",
-                    help="Configuration JSON file",
-                    type=str)
+parser.add_argument(
+    "-c",
+    "--config",
+    required=False,
+    default="../minimal-configuration.json",
+    help="Configuration JSON file",
+    type=str,
+)
 
-parser.add_argument("-d", "--threads",
-                    required=False,
-                    default=4,
-                    help="number of threads", 
-                    type=int)
+parser.add_argument(
+    "-d", "--threads", required=False, default=4, help="number of threads", type=int
+)
 
 
 args = parser.parse_args()
@@ -38,28 +39,30 @@
 with open(configuration_file) as f:
     conf = json.load(f)
 
-    
-number_of_pop_results = conf.get('number_of_pop_results', 100)
-number_of_results = conf.get('number_of_results', 1000)
-planb = conf.get('planb')  
-epsilon = conf.get('epsilon', 1e-3)
+
+number_of_pop_results = conf.get("number_of_pop_results", 100)
+number_of_results = conf.get("number_of_results", 1000)
+planb = conf.get("planb")
+epsilon = conf.get("epsilon", 1e-3)
 
 pops = conf.get("populations")
-priority = conf.get('priority')
+priority = conf.get("priority")
 
 output_haplotypes = conf.get("output_haplotypes", False)
 output_MUUG = conf.get("output_MUUG", True)
 
 node_file = project_dir + conf.get("node_csv_file")
 top_linkks_file = project_dir + conf.get("top_links_csv_file")
-edges_file =project_dir + conf.get("edges_csv_file")
+edges_file = project_dir + conf.get("edges_csv_file")
 
 input_file = project_dir + conf.get("imputation_in_file")
 # Output filename is based on input filename
-#output_file = output_dir + os.path.basename(input_file) + '_out'
+# output_file = output_dir + os.path.basename(input_file) + '_out'
 
 # Display the configurations we are using
-print('****************************************************************************************************')
+print(
+    "****************************************************************************************************"
+)
 print("Performing imputation based on:")
 print("\tPopulation: {}".format(pops))
 print("\tPriority: {}".format(priority))
@@ -70,13 +73,14 @@
 print("\tNodes File: {}".format(node_file))
 print("\tTop Links File: {}".format(edges_file))
 print("\tInput File: {}".format(input_file))
-print('****************************************************************************************************')
-
+print(
+    "****************************************************************************************************"
+)
 
 
 # Perform imputation
 graph = Graph()
-graph.build_graph(node_file,top_linkks_file, edges_file)
+graph.build_graph(node_file, top_linkks_file, edges_file)
 
 imputation = Imputation(graph, pops)
 
@@ -88,22 +92,43 @@
 in_dir = os.path.dirname(input_file)
 in_file_basename = os.path.basename(input_file)
 
-num_subjects = os.popen('wc -l ' + input_file ).read()
+num_subjects = os.popen("wc -l " + input_file).read()
 num_subjects = int(num_subjects.strip().split(" ")[0])
 print(num_subjects)
 
-split_cmd ='split  -l ' + str(int(math.ceil(num_subjects/num_threads)))+ " "+ input_file +" "+in_dir+"/"  + in_file_basename[0:4]
+split_cmd = (
+    "split  -l "
+    + str(int(math.ceil(num_subjects / num_threads)))
+    + " "
+    + input_file
+    + " "
+    + in_dir
+    + "/"
+    + in_file_basename[0:4]
+)
 
 os.system(split_cmd)
 
-alpha = string.ascii_lowercase 
+alpha = string.ascii_lowercase
 for i in range(num_threads):
-    in_file = in_dir +"/"+in_file_basename[0:4] +"a" +  alpha[i] 
+    in_file = in_dir + "/" + in_file_basename[0:4] + "a" + alpha[i]
     print(in_file)
-    output_file = output_dir + os.path.basename(in_file) + '_out'
-    t = threading.Thread(target=imputation.impute_file, args = (in_file, output_file, priority, output_MUUG, 
-                                                       output_haplotypes,1000,epsilon,number_of_results,
-                                                      number_of_pop_results,planb, ))
+    output_file = output_dir + os.path.basename(in_file) + "_out"
+    t = threading.Thread(
+        target=imputation.impute_file,
+        args=(
+            in_file,
+            output_file,
+            priority,
+            output_MUUG,
+            output_haplotypes,
+            1000,
+            epsilon,
+            number_of_results,
+            number_of_pop_results,
+            planb,
+        ),
+    )
     t.start()
 
 # Profiler end
diff --git a/grim/validation/simulation/__init__.py b/grim/validation/simulation/__init__.py
new file mode 100644
index 0000000..cfe8889
--- /dev/null
+++ b/grim/validation/simulation/__init__.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+#
+#    grim Graph Imputation
+#    Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
+#
+#    This library is free software; you can redistribute it and/or modify it
+#    under the terms of the GNU Lesser General Public License as published
+#    by the Free Software Foundation; either version 3 of the License, or (at
+#    your option) any later version.
+#
+#    This library is distributed in the hope that it will be useful, but WITHOUT
+#    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+#    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+#    License for more details.
+#
+#    You should have received a copy of the GNU Lesser General Public License
+#    along with this library;  if not, write to the Free Software Foundation,
+#    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.
+#
+#    > http://www.fsf.org/licensing/licenses/lgpl.html
+#    > http://www.opensource.org/licenses/lgpl-license.php
+#
+
+
+"""Top-level package for py-grim."""
+
+__organization__ = "NMDP/CIBMTR Bioinformatics"
diff --git a/grim/validation/simulation/data/__init__.py b/grim/validation/simulation/data/__init__.py
new file mode 100644
index 0000000..cfe8889
--- /dev/null
+++ b/grim/validation/simulation/data/__init__.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+#
+#    grim Graph Imputation
+#    Copyright (c) 2021 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
+#
+#    This library is free software; you can redistribute it and/or modify it
+#    under the terms of the GNU Lesser General Public License as published
+#    by the Free Software Foundation; either version 3 of the License, or (at
+#    your option) any later version.
+#
+#    This library is distributed in the hope that it will be useful, but WITHOUT
+#    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+#    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+#    License for more details.
+#
+#    You should have received a copy of the GNU Lesser General Public License
+#    along with this library;  if not, write to the Free Software Foundation,
+#    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.
+#
+#    > http://www.fsf.org/licensing/licenses/lgpl.html
+#    > http://www.opensource.org/licenses/lgpl-license.php
+#
+
+
+"""Top-level package for py-grim."""
+
+__organization__ = "NMDP/CIBMTR Bioinformatics"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..7307771
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+# Minimum requirements for the build system to execute.
+requires = ["setuptools", "wheel", "cython==0.29.32"]  # PEP 508 specifications.
diff --git a/requirements-deploy.txt b/requirements-deploy.txt
index 0e1ec03..e69de29 100644
--- a/requirements-deploy.txt
+++ b/requirements-deploy.txt
@@ -1,2 +0,0 @@
-connexion[swagger-ui]==2.13.0
-gunicorn==20.1.0
diff --git a/requirements-dev.txt b/requirements-dev.txt
index d2f222f..02e2bf4 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,6 +1,5 @@
-allure-behave==2.9.45
-flake8==4.0.1
+flake8==5.0.4
 bump2version==1.0.1
-coverage==6.3.2
+coverage==6.4.4
 wheel==0.37.1
-pre-commit==2.18.1
+pre-commit==2.20.0
diff --git a/requirements-tests.txt b/requirements-tests.txt
index 77d53a7..2af4c79 100644
--- a/requirements-tests.txt
+++ b/requirements-tests.txt
@@ -1,3 +1,2 @@
 pytest==7.1.2
-behave==1.2.6
 PyHamcrest==2.0.3
diff --git a/requirements.txt b/requirements.txt
index 27df244..f319221 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
+cython==0.29.32
 numpy>=1.20.2
-networkx==2.5.1
-cython
-Cython
+networkx==2.5.1
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index abca7fe..d573e50 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.4
+current_version = 0.0.6
 commit = True
 tag = True
 
diff --git a/tests/features/algorithm/SLUG Match.feature b/tests/features/algorithm/SLUG Match.feature
deleted file mode 100644
index 4cb80df..0000000
--- a/tests/features/algorithm/SLUG Match.feature	
+++ /dev/null
@@ -1,16 +0,0 @@
-Feature: Match SLUG
-
-  SLUGs are matched if each of the alleles of one SLUG are in the other SLUG.
-
-  Scenario Outline: Match SLUGS by allele
-
-    Given the SLUG for patient is <Patient-SLUG>
-    And the SLUG for donor is <Donor-SLUG>
-    When we perform SLUG match patient and donor
-    Then they should be <Matched>
-
-    Examples:
-      | Patient-SLUG            | Donor-SLUG              | Matched |
-      | HLA-C*07:02+HLA-C*16:01 | HLA-C*07:02+HLA-C*16:01 | Yes     |
-      | HLA-A*13:02+HLA-A*13:01 | HLA-A*13:02+HLA-A*30:01 | No      |
-      | HLA-A*13:02+HLA-B*13:01 | HLA-A*13:02+HLA-A*30:01 | No      |
diff --git a/tests/features/definition/Class I HLA Alleles.feature b/tests/features/definition/Class I HLA Alleles.feature
deleted file mode 100644
index 9c7ad99..0000000
--- a/tests/features/definition/Class I HLA Alleles.feature	
+++ /dev/null
@@ -1,21 +0,0 @@
-Feature: SLUGs of Class I HLA Alleles
-
-  Class I HLA Alleles are alleles on genomic loci HLA-A, HLA-B and HLA-C.
-  SLUGs are Single Locus Un-phased Genotypes made up of a pair of alleles.
-  The alleles are lexicographically sorted.
-
-  Scenario Outline: HLA Class I Locus SLUG
-
-  Each gene of HLA-A, HLA-B and HLA-C have a pair of alleles, one from each parent.
-
-    Given The Locus of Gene is <Locus>
-    And The first allele is <Allele1>
-    And The second allele is <Allele2>
-    When I create an un-phased genotype
-    Then I get the single locus un-phased genotype <SLUG>
-
-    Examples: Valid Class I Alleles
-      | Locus | Allele1     | Allele2     | SLUG                    |
-      | HLA-A | HLA-A*13:02 | HLA-A*30:01 | HLA-A*13:02+HLA-A*30:01 |
-      | HLA-B | HLA-B*38:01 | HLA-B*07:02 | HLA-B*07:02+HLA-B*38:01 |
-      | HLA-C | HLA-C*07:02 | HLA-C*16:01 | HLA-C*07:02+HLA-C*16:01 |
diff --git a/tests/steps/HLA_alleles.py b/tests/steps/HLA_alleles.py
deleted file mode 100644
index dfbd31a..0000000
--- a/tests/steps/HLA_alleles.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from behave import *
-from hamcrest import assert_that, is_
-
-from grim.model.allele import Allele
-from grim.model.slug import SLUG
-
-
-@given("The Locus of Gene is {locus}")
-def step_impl(context, locus):
-    context.locus = locus
-
-
-@step("The first allele is {allele1}")
-def step_impl(context, allele1):
-    context.allele_1 = Allele(context.locus, allele1)
-
-
-@step("The second allele is {allele2}")
-def step_impl(context, allele2):
-    context.allele_2 = Allele(context.locus, allele2)
-
-
-@when("I create an un-phased genotype")
-def step_impl(context):
-    context.slug = SLUG(context.allele_1, context.allele_2)
-
-
-@then("I get the single locus un-phased genotype {slug}")
-def step_impl(context, slug):
-    assert_that(str(context.slug), is_(slug))
diff --git a/tests/steps/SLUG_match.py b/tests/steps/SLUG_match.py
deleted file mode 100644
index 6fda209..0000000
--- a/tests/steps/SLUG_match.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from behave import *
-from hamcrest import assert_that, is_
-
-from grim.algorithm.match import slug_match
-from grim.model.slug import SLUG
-
-
-@given("the SLUG for patient is {slug}")
-def step_impl(context, slug):
-    context.patient_slug = SLUG.from_glstring(slug)
-
-
-@step("the SLUG for donor is {slug}")
-def step_impl(context, slug):
-    context.donor_slug = SLUG.from_glstring(slug)
-
-
-@when("we perform SLUG match patient and donor")
-def step_impl(context):
-    context.matched = slug_match(context.patient_slug, context.donor_slug)
-
-
-@then("they should be {is_matched}")
-def step_impl(context, is_matched):
-    matched = is_matched == "Yes"
-    assert_that(context.matched, is_(matched))
diff --git a/tests/unit/test_my_project_template.py b/tests/unit/test_grim.py
similarity index 100%
rename from tests/unit/test_my_project_template.py
rename to tests/unit/test_grim.py

From fc1ceae0031e03ba67b3a6ef9c6edbf224fd95ce Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Wed, 16 Nov 2022 10:30:22 -0600
Subject: [PATCH 16/19] =?UTF-8?q?Bump=20version:=200.0.6=20=E2=86=92=200.0?=
 =?UTF-8?q?.7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 grim/__init__.py | 2 +-
 setup.cfg        | 2 +-
 setup.py         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/grim/__init__.py b/grim/__init__.py
index f45a470..6e79b1e 100644
--- a/grim/__init__.py
+++ b/grim/__init__.py
@@ -26,4 +26,4 @@
 """Top-level package for py-grim."""
 
 __organization__ = "NMDP/CIBMTR Bioinformatics"
-__version__ = "0.0.6"
+__version__ = "0.0.7"
diff --git a/setup.cfg b/setup.cfg
index d573e50..dcf485c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.6
+current_version = 0.0.7
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
index e25cd17..9e43933 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@
 
 setup(
     name="py-graph-imputation",
-    version="0.0.6",
+    version="0.0.7",
     author="Pradeep Bashyal",
     author_email="pbashyal@nmdp.org",
     python_requires=">=3.8",

From b567fa4c85b87590d9de361b24152dd77f56d306 Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Wed, 16 Nov 2022 10:49:20 -0600
Subject: [PATCH 17/19] Added slots back

---
 grim/imputation/imputegl/impute.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/grim/imputation/imputegl/impute.py b/grim/imputation/imputegl/impute.py
index 310daa9..cb4636e 100644
--- a/grim/imputation/imputegl/impute.py
+++ b/grim/imputation/imputegl/impute.py
@@ -119,6 +119,10 @@ def clean_up_gl(gl):
 
 
 class Imputation(object):
+    __slots__ = 'logger', 'verbose', 'populations', 'netGraph', 'priorMatrix', 'full_hapl', 'index_dict', 'full_loci', \
+                'factor', '_factor_missing_data', 'cypher', 'cypher_plan_b', 'matrix_planb', 'count_by_prob', \
+                'number_of_options_threshold', 'plan', 'option_1', 'option_2', \
+                'haplotypes_number_in_phase', 'save_space_mode', 'nodes_for_plan_A', 'unk_priors'    
     def __init__(self, net=None, config=None, count_by_prob=None, verbose=False):
         """Constructor
         Intialize an instance of `Imputation` with a py2neo graph

From 316092d4945d0d8e032d5601ec581163cde6c525 Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Wed, 16 Nov 2022 11:23:47 -0600
Subject: [PATCH 18/19] black applied

---
 grim/conf/__init__.py                        |  4 +--
 grim/imputation/__init__.py                  |  4 +--
 grim/imputation/graph_generation/__init__.py |  4 +--
 grim/imputation/imputegl/__init__.py         |  4 +--
 grim/imputation/imputegl/impute.py           | 30 ++++++++++++++++----
 grim/imputation/imputegl/networkx_graph.py   |  9 +++++-
 grim/validation/__init__.py                  |  4 +--
 7 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/grim/conf/__init__.py b/grim/conf/__init__.py
index 5ff1a91..d64977b 100755
--- a/grim/conf/__init__.py
+++ b/grim/conf/__init__.py
@@ -24,5 +24,5 @@
 
 
 __author__ = """Martin Maiers"""
-__email__ = 'mmaiers@nmdp.org'
-__version__ = '0.0.7'
+__email__ = "mmaiers@nmdp.org"
+__version__ = "0.0.7"
diff --git a/grim/imputation/__init__.py b/grim/imputation/__init__.py
index 2511f5d..2779c9f 100755
--- a/grim/imputation/__init__.py
+++ b/grim/imputation/__init__.py
@@ -21,5 +21,5 @@
 
 
 __author__ = """Martin Maiers"""
-__email__ = 'mmaiers@nmdp.org'
-__version__ = '0.0.7'
+__email__ = "mmaiers@nmdp.org"
+__version__ = "0.0.7"
diff --git a/grim/imputation/graph_generation/__init__.py b/grim/imputation/graph_generation/__init__.py
index 2511f5d..2779c9f 100755
--- a/grim/imputation/graph_generation/__init__.py
+++ b/grim/imputation/graph_generation/__init__.py
@@ -21,5 +21,5 @@
 
 
 __author__ = """Martin Maiers"""
-__email__ = 'mmaiers@nmdp.org'
-__version__ = '0.0.7'
+__email__ = "mmaiers@nmdp.org"
+__version__ = "0.0.7"
diff --git a/grim/imputation/imputegl/__init__.py b/grim/imputation/imputegl/__init__.py
index 2d171e6..bec990d 100755
--- a/grim/imputation/imputegl/__init__.py
+++ b/grim/imputation/imputegl/__init__.py
@@ -23,5 +23,5 @@
 
 
 __author__ = """Martin Maiers"""
-__email__ = 'mmaiers@nmdp.org'
-__version__ = '0.0.7'
+__email__ = "mmaiers@nmdp.org"
+__version__ = "0.0.7"
diff --git a/grim/imputation/imputegl/impute.py b/grim/imputation/imputegl/impute.py
index cb4636e..1cdc63a 100644
--- a/grim/imputation/imputegl/impute.py
+++ b/grim/imputation/imputegl/impute.py
@@ -119,10 +119,31 @@ def clean_up_gl(gl):
 
 
 class Imputation(object):
-    __slots__ = 'logger', 'verbose', 'populations', 'netGraph', 'priorMatrix', 'full_hapl', 'index_dict', 'full_loci', \
-                'factor', '_factor_missing_data', 'cypher', 'cypher_plan_b', 'matrix_planb', 'count_by_prob', \
-                'number_of_options_threshold', 'plan', 'option_1', 'option_2', \
-                'haplotypes_number_in_phase', 'save_space_mode', 'nodes_for_plan_A', 'unk_priors'    
+    __slots__ = (
+        "logger",
+        "verbose",
+        "populations",
+        "netGraph",
+        "priorMatrix",
+        "full_hapl",
+        "index_dict",
+        "full_loci",
+        "factor",
+        "_factor_missing_data",
+        "cypher",
+        "cypher_plan_b",
+        "matrix_planb",
+        "count_by_prob",
+        "number_of_options_threshold",
+        "plan",
+        "option_1",
+        "option_2",
+        "haplotypes_number_in_phase",
+        "save_space_mode",
+        "nodes_for_plan_A",
+        "unk_priors",
+    )
+
     def __init__(self, net=None, config=None, count_by_prob=None, verbose=False):
         """Constructor
         Intialize an instance of `Imputation` with a py2neo graph
@@ -394,7 +415,6 @@ def get_haplo_freqs(self, haplos, epsilon, n=25000):
         # haplos_joined = ["~".join(sorted(item)) for sublist in haplos for item in sublist]
         return self.netGraph.adjs_query(haplos_joined)
 
-
     # def get_haplo_freqs_miss(self, haplos, epsilon):
     #     haplo_probs = {}
     #     haplos_joined = ["~".join(sorted(item)) for sublist in haplos for item in sublist]
diff --git a/grim/imputation/imputegl/networkx_graph.py b/grim/imputation/imputegl/networkx_graph.py
index 49c5b83..1d997ab 100755
--- a/grim/imputation/imputegl/networkx_graph.py
+++ b/grim/imputation/imputegl/networkx_graph.py
@@ -9,7 +9,14 @@ def missing(labelA, labelB):
 
 
 class Graph(object):
-    __slots__ = 'graph', 'labelDict', 'whole_graph', 'full_loci', 'nodes_plan_a', 'nodes_plan_b'
+    __slots__ = (
+        "graph",
+        "labelDict",
+        "whole_graph",
+        "full_loci",
+        "nodes_plan_a",
+        "nodes_plan_b",
+    )
 
     def __init__(self, config):
         self.graph = nx.DiGraph()
diff --git a/grim/validation/__init__.py b/grim/validation/__init__.py
index 2511f5d..2779c9f 100755
--- a/grim/validation/__init__.py
+++ b/grim/validation/__init__.py
@@ -21,5 +21,5 @@
 
 
 __author__ = """Martin Maiers"""
-__email__ = 'mmaiers@nmdp.org'
-__version__ = '0.0.7'
+__email__ = "mmaiers@nmdp.org"
+__version__ = "0.0.7"

From 02334ee16d9970bf166ebfaa093a39a5fd9fafe6 Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Wed, 16 Nov 2022 11:28:10 -0600
Subject: [PATCH 19/19] black applied

---
 setup.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 9e43933..c012b44 100644
--- a/setup.py
+++ b/setup.py
@@ -28,11 +28,12 @@
 
 from setuptools import setup
 from Cython.Build import cythonize
+
 # import numpy
 
 
-      # include_dirs=[numpy.get_include()],
-      # requires=['numpy', 'Cython'])
+# include_dirs=[numpy.get_include()],
+# requires=['numpy', 'Cython'])
 
 
 from setuptools import setup, find_packages, Extension
@@ -71,17 +72,27 @@
     long_description_content_type="text/markdown",
     include_package_data=True,
     keywords="grim",
-    packages=find_packages(include=[
+    packages=find_packages(
+        include=[
             "grim",
             "grim.imputation",
             "grim.imputation.imputegl",
             "grim.imputation.graph_generation",
             "grim.validation",
             "grim.conf",
-        ]),
+        ]
+    ),
     test_suite="tests",
     tests_require=test_requirements,
     url="https://github.com/nmdp-bioinformatics/py-grim",
     zip_safe=False,
-    ext_modules=cythonize([Extension("grim.imputation.imputegl.cutils", ["grim/imputation/imputegl/cutils.pyx"])], language_level="3")
+    ext_modules=cythonize(
+        [
+            Extension(
+                "grim.imputation.imputegl.cutils",
+                ["grim/imputation/imputegl/cutils.pyx"],
+            )
+        ],
+        language_level="3",
+    ),
 )