Skip to content

Commit

Permalink
update to allow 0-node start/end bubble pop
Browse files Browse the repository at this point in the history
  • Loading branch information
skoren committed Aug 14, 2024
1 parent 13418e4 commit 7206b68
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions src/scripts/pop_bubbles_coverage_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ def pop_bubble(start, end, removed_nodes, removed_edges, edges, coverage, nodele
stack.append((edge, top, min(pathwidth, coverage.get(edge[1:], 0))))
assert end in predecessor
#sys.stderr.write("Processing bubble from %s to %s and conservative mode is %s\n"%(start, end, conservative))
# TODO: when our coverage is more trustworthy (e.g. low coverage isn't due to no unique nodes in a path, we can run this in conservative popping mode to remove noise, until then do nothing in these bubbles
if (conservative == True and len(bubble_nodes) > max_bubble_pop_size / 2) or len(bubble_nodes) > max_bubble_pop_size:
# TODO: when our coverage is more trustworthy (e.g. low coverage isn't due to no unique nodes in a path, we can run this in conservative popping mode to remove noise, until then do nothing in these bubbles, also do nothign for 3-node bubbles in high coverage
if (conservative == True and (len(bubble_nodes) == 3 or len(bubble_nodes) > max_bubble_pop_size / 2)) or len(bubble_nodes) > max_bubble_pop_size:
#sys.stderr.write("Error bubble beteween %s and %s is not poppable because it its size of %s is larger than max %s\n"%(start, end, len(bubble_nodes), max_bubble_pop_size))
return

Expand All @@ -142,6 +142,8 @@ def pop_bubble(start, end, removed_nodes, removed_edges, edges, coverage, nodele
comp_coverage = find_component_coverage(start, belongs_to_component, component_coverage_sum, component_length_sum)
if conservative == True:
max_poppable_coverage = 0.25 * comp_coverage
elif start[1:] not in coverage or end[1:] not in coverage:
max_poppable_coverage = 0.25 * comp_coverage
elif len(bubble_nodes) == 3:
max_poppable_coverage = 0.5*comp_coverage
elif start[1:] in coverage and end[1:] in coverage and nodelens[start[1:]] > max_poppable_node_size and nodelens[end[1:]] > max_poppable_node_size and coverage[start[1:]] <= 1.5*comp_coverage and coverage[start[1:]] >= 0.5 * comp_coverage and coverage[end[1:]] <= 1.5*comp_coverage and coverage[end[1:]] >= 0.5*comp_coverage:
Expand Down Expand Up @@ -176,6 +178,7 @@ def pop_bubble(start, end, removed_nodes, removed_edges, edges, coverage, nodele

def try_pop_tip(start, edges, coverage, removed_nodes, removed_edges, max_removable, nodelens):
if start not in edges: return
#sys.stderr.write("Processing pop tip from node %s with edges %s max removable is %s\n"%(start, edges[start], max_removable))
if len(edges[start]) < 2 or len(edges[start]) > 4: return
max_coverage = 0
max_len = 0
Expand Down Expand Up @@ -278,14 +281,17 @@ def try_pop_tip(start, edges, coverage, removed_nodes, removed_edges, max_remova
if node in coverage:
chain_length_sum[node] = nodelens[node]
chain_coverage_sum[node] = coverage[node] * nodelens[node]
else:
chain_length_sum[node] = nodelens[node]
chain_coverage_sum[node] = 0

possible_merges = []

for edge in edges:
bubble = find_bubble(edge, edges, nodelens, max_poppable_node_size * 5)
if not bubble: continue
if bubble[0][1:] not in coverage or bubble[1][1:] not in coverage: continue
possible_merges.append((bubble[0][1:], bubble[1][1:], max(coverage[bubble[0][1:]] / coverage[bubble[1][1:]], coverage[bubble[1][1:]] / coverage[bubble[0][1:]])))
cov = max(coverage[bubble[0][1:]] / coverage[bubble[1][1:]], coverage[bubble[1][1:]] / coverage[bubble[0][1:]]) if bubble[0][1:] in coverage and bubble[1][1:] in coverage else 0
possible_merges.append((bubble[0][1:], bubble[1][1:], cov))

possible_merges.sort(key=lambda x: x[2])
while True:
Expand Down Expand Up @@ -353,9 +359,11 @@ def try_pop_tip(start, edges, coverage, removed_nodes, removed_edges, max_remova
chain_coverage = chain_coverage_sum[key] / chain_length_sum[key]

if not bubble:
#sys.stderr.write("Pop tip called with chain cov %s and comp %s\n"%(chain_coverage, comp_coverage))
try_pop_tip(">" + node, edges, coverage, removed_nodes, removed_edges, comp_coverage * 0.75 if chain_coverage <= comp_coverage * 1.5 else comp_coverage * 0.25, nodelens)
bubble = find_bubble("<" + node, edges, nodelens, max_poppable_node_size)
if not bubble:
#sys.stderr.write("Pop tip reverse called with chain cov %s and comp%s\n"%(chain_coverage, comp_coverage))
try_pop_tip("<" + node, edges, coverage, removed_nodes, removed_edges, comp_coverage * 0.75 if chain_coverage <= comp_coverage * 1.5 else comp_coverage * 0.25, nodelens)

for node in removed_nodes:
Expand Down

0 comments on commit 7206b68

Please sign in to comment.