Skip to content

Commit

Permalink
Concurrent edits on trek/biowulf.
Browse files Browse the repository at this point in the history
Merge branch 'master' of https://github.com/nhansen/SVanalyzer
  • Loading branch information
nhansen committed Oct 25, 2020
2 parents 7697df6 + 6a18fa3 commit 3c49c4c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 15 deletions.
22 changes: 13 additions & 9 deletions lib/NHGRI/MUMmer/AlignSet.pm
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,15 @@ sub _parse_delta_file {
last;
}
}
print STDERR "Parsing entry pair $ref_entry/$query_entry\n";
print STDERR "Parsing entry pair $ref_entry/$query_entry\n" if ($self->{verbose});
push @{$self->{entry_pairs}},
{ref_entry => $ref_entry, query_entry => $query_entry, ref_length => $ref_length, query_length => $query_length};
}
elsif (/^(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)$/) {
my ($ref_start, $ref_end, $query_start, $query_end, $mismatches, $nonposmatches, $nonalphas) =
($1, $2, $3, $4, $5, $6, $7);
my $cigar_string = $self->_read_cigar_string($ref_start, $ref_end, $query_start, $query_end);
print STDERR "Read cigar string $cigar_string\n" if ($self->{verbose});
if ($self->{extend_exact}) {
my ($new_ref_start, $new_ref_end, $new_query_start, $new_query_end, $new_cigar) =
$self->_extend_exact($ref_entry, $ref_start, $ref_end, $query_entry, $query_start, $query_end, $cigar_string);
Expand Down Expand Up @@ -384,7 +385,7 @@ sub _extend_exact {
$query_next_base =~ tr/ATGC/TACG/ if ($comp);

my $right_extend_bases = ($new_cigar =~ s/(\d+)M$//) ? $1 : 0;
while ($ref_next_base eq $query_next_base) {
while ($ref_next_base ne 'N' && $ref_next_base eq $query_next_base) {
$new_ref_end = $ref_next_pos;
$new_query_end = $query_next_pos;
$right_extend_bases++;
Expand All @@ -399,9 +400,11 @@ sub _extend_exact {
$query_next_base = ($rh_queryseqs) ? uc(substr($rh_queryseqs->{$query_entry}, $query_next_pos - 1, 1)) :
uc($query_fasta_db->seq("$query_entry:$query_next_pos-$query_next_pos"));
$query_next_base =~ tr/ATGC/TACG/ if ($comp);

print STDERR "Comparing $ref_next_base to $query_next_base\n" if ($self->{verbose});
}
$new_cigar = $new_cigar.$right_extend_bases.'M' if ($right_extend_bases);
#print STDERR "Rightside REF $ref_next_base does not match QUERY $query_next_base\n";
print STDERR "Rightside REF $ref_next_base does not match QUERY $query_next_base\n" if ($self->{verbose});
}

# extend to left:
Expand All @@ -416,7 +419,7 @@ sub _extend_exact {
$query_next_base =~ tr/ATGC/TACG/ if ($comp);

my $left_extend_bases = ($new_cigar =~ s/^(\d+)M//) ? $1 : 0;
while ($ref_next_base eq $query_next_base) {
while ($ref_next_base ne 'N' && $ref_next_base eq $query_next_base) {
$new_ref_start = $ref_next_pos;
$new_query_start = $query_next_pos;
$left_extend_bases++;
Expand All @@ -431,17 +434,18 @@ sub _extend_exact {
$query_next_base = ($rh_queryseqs) ? uc(substr($rh_queryseqs->{$query_entry}, $query_next_pos - 1, 1)) :
uc($query_fasta_db->seq("$query_entry:$query_next_pos-$query_next_pos"));
$query_next_base =~ tr/ATGC/TACG/ if ($comp);
print STDERR "Comparing $ref_next_base to $query_next_base\n" if ($self->{verbose});
}
$new_cigar = $left_extend_bases.'M'.$new_cigar if ($left_extend_bases);
#print STDERR "Leftside REF $ref_next_base does not match QUERY $query_next_base\n";
print STDERR "Leftside REF $ref_next_base does not match QUERY $query_next_base\n" if ($self->{verbose});
}

if ($self->{verbose} && ($new_cigar ne $cigar || $new_ref_start != $ref_start || $new_ref_end != $ref_end)) {
#print STDERR "Ref $ref_entry:$ref_start-$ref_end extended to $new_ref_start-$new_ref_end\n";
#print STDERR "Query $query_entry:$query_start-$query_end extended to $new_query_start-$new_query_end\n";
print STDERR "Ref $ref_entry:$ref_start-$ref_end extended to $new_ref_start-$new_ref_end\n";
print STDERR "Query $query_entry:$query_start-$query_end extended to $new_query_start-$new_query_end\n";
}
else {
#print STDERR "No extension! Ref $ref_entry:$ref_start-$ref_end (length $ref_length) Query $query_entry:$query_start-$query_end $cigar (length $query_length)\n";
elsif ($self->{verbose}) {
print STDERR "No extension! Ref $ref_entry:$ref_start-$ref_end (length $ref_length) Query $query_entry:$query_start-$query_end $cigar (length $query_length)\n";
}

return ($new_ref_start, $new_ref_end, $new_query_start, $new_query_end, $new_cigar);
Expand Down
13 changes: 7 additions & 6 deletions scripts/misc/delta2sam.pl
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,12 @@ sub write_edgelets {
my $query_start = $rh_align->{query_start};
my $query_end = $rh_align->{query_end};
my $comp = ($query_start > $query_end) ? 1 : 0;
# query_remaining is always the number of query bases to the 3' of the alignment
my $query_remaining = ($comp) ? $query_length - $query_start : $query_length - $query_end;
push @ref_aligns, [$query_entry, $rh_align->{ref_start}, $rh_align->{cigar_string}, $query_start, $query_end, $query_remaining];

if ($Opt{'alignlengths'}) {
my $cigar = $rh_align->{'cigar_string'};
print "CIGAR $cigar\n";
my $longest_match = 0;
my $longest_del = 0;
my $longest_ins = 0;
Expand Down Expand Up @@ -161,17 +161,18 @@ sub write_edgelets {
}
}

my @sorted_ref_aligns = sort {$a->[1] <=> $b->[1]} @ref_aligns;
my @sorted_ref_aligns = sort {$a->[1] <=> $b->[1]} @ref_aligns; # sort by reference start
foreach my $ra_ref_align (@sorted_ref_aligns) {
my ($query_entry, $ref_start, $cigar, $query_start, $query_end, $query_remaining) = @{$ra_ref_align};
my $flag = ($query_start > $query_end) ? 16 : 0;
my $comp = ($query_start > $query_end) ? 1 : 0;
if ((!$comp && $query_start > 1) || ($comp && $query_end > 1)) { # hard clip start
my $query_hc = ($comp) ? $query_end - 1 : $query_start - 1;
if ((!$comp && $query_start > 1) || ($comp && $query_remaining > 0)) { # hard clip start
my $query_hc = ($comp) ? $query_remaining : $query_start - 1;
$cigar = $query_hc."H".$cigar;
}
if ($query_remaining > 0) { # hard clip end
$cigar = $cigar.$query_remaining."H";
if ((!$comp && $query_remaining > 0) || ($comp && $query_end > 1)) { # hard clip end
my $query_hc = ($comp) ? $query_end - 1 : $query_remaining;
$cigar = $cigar.$query_hc."H";
}
my $seq = $query_db->seq($query_entry, $query_start, $query_end);
print $sam_fh "$query_entry\t$flag\t$ref_entry\t$ref_start\t0\t$cigar\t*\t*\t*\t$seq\t*\n";
Expand Down

0 comments on commit 3c49c4c

Please sign in to comment.