Skip to content

Commit

Permalink
Add CRAM indexing tests
Browse files Browse the repository at this point in the history
Add M5: tags to test/index.sam so that the cram tests can find the
right references.  It's done this way rather then using the
reference fasta file so that no UR: tags are added, possibly
changing the length of the header.

Add a function to test.pl to make an MD5 based cache from
test/ce.fa.  It's done this way as checking in the reference
files would make the git repository quite a bit bigger.
  • Loading branch information
daviesrob committed Feb 13, 2019
1 parent 8dc6468 commit 24b121a
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 7 deletions.
Binary file modified test/index.bam.bai
Binary file not shown.
Binary file modified test/index.bam.csi
Binary file not shown.
Binary file added test/index.cram.crai
Binary file not shown.
14 changes: 7 additions & 7 deletions test/index.sam
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
@HD VN:1.6 SO:coordinate
@SQ SN:CHROMOSOME_I LN:1009800
@SQ SN:CHROMOSOME_II LN:5000
@SQ SN:CHROMOSOME_III LN:5000
@SQ SN:CHROMOSOME_IV LN:5000
@SQ SN:CHROMOSOME_V LN:5000
@SQ SN:CHROMOSOME_X LN:5000
@SQ SN:CHROMOSOME_MtDNA LN:5000
@SQ SN:CHROMOSOME_I LN:1009800 M5:8ede36131e0dbf3417807e48f77f3ebd
@SQ SN:CHROMOSOME_II LN:5000 M5:8e7993f7a93158587ee897d7287948ec
@SQ SN:CHROMOSOME_III LN:5000 M5:3adcb065e1cf74fafdbba1e8c352b323
@SQ SN:CHROMOSOME_IV LN:5000 M5:251af66a69ee589c9f3757340ec2de6f
@SQ SN:CHROMOSOME_V LN:5000 M5:cf200a65fb754836dcc56b24b3170ee8
@SQ SN:CHROMOSOME_X LN:5000 M5:6f9368fd2192c89c613718399d2d31fc
@SQ SN:CHROMOSOME_MtDNA LN:5000 M5:cd05857ece6411f40257a565ccfe15bb
@PG ID:bowtie2 PN:bowtie2 VN:2.0.0-beta5
SRR065390.17240207 16 CHROMOSOME_I 999901 42 100M * 0 0 ATGTTTACAGGACTTCAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAG CACAC?CBBAA@?@?BADDBBDBBAB>DDDBBDDABBBCCADDDDDCBCBCCCDBDDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU
SRR065390.15493040 0 CHROMOSOME_I 999912 42 100M * 0 0 ACTTCAAGCAGAGGATTTTTCGATGATTGCCAAAAATTTTGGAACTTTTATAGGCTTAAGCTTATGGTTATGTTTAGGCGTAGGCTTAGGCTTAGGCGTA CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDBCCBDBCCBDDA@>DC?5@?@@??:><<>8>39<37 AS:i:0 XN:i:0 XM:i:0 XO:i:0 XG:i:0 NM:i:0 MD:Z:100 YT:Z:UU
Expand Down
Binary file modified test/index.sam.gz.bai
Binary file not shown.
Binary file modified test/index.sam.gz.csi
Binary file not shown.
55 changes: 55 additions & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
test_bgzip($opts, 0);
test_bgzip($opts, 4);

ce_fa_to_md5_cache($opts);
test_index($opts);

test_view($opts,0);
Expand Down Expand Up @@ -293,6 +294,54 @@ sub is_file_newer
return 0;
}

sub ce_fa_to_md5_cache {
my ($opts) = @_;

# These should really be worked out from the file contents, but
# pre-calculating them avoids a dependency on Digest::MD5
my %csums = (CHROMOSOME_I => '8ede36131e0dbf3417807e48f77f3ebd',
CHROMOSOME_II => '8e7993f7a93158587ee897d7287948ec',
CHROMOSOME_III => '3adcb065e1cf74fafdbba1e8c352b323',
CHROMOSOME_IV => '251af66a69ee589c9f3757340ec2de6f',
CHROMOSOME_V => 'cf200a65fb754836dcc56b24b3170ee8',
CHROMOSOME_X => '6f9368fd2192c89c613718399d2d31fc',
CHROMOSOME_MtDNA => 'cd05857ece6411f40257a565ccfe15bb');

my $m5_dir = "$$opts{tmp}/md5";
if (!-d $m5_dir) {
mkdir($m5_dir) || die "Couldn't make directory $m5_dir\n";
}
my $out;
open(my $fa, '<', "$$opts{path}/ce.fa")
|| die "Couldn't open $$opts{path}/ce.fa : $!\n";
my $name = '';
while (<$fa>) {
chomp;
if (/^>(\S+)/) {
if ($out) {
close($out) || die "Error closing $m5_dir/$csums{$name} : $!\n";
}
$name = $1;
if (!exists($csums{$name})) {
die "Unexpected fasta entry : $name\n";
}
open($out, '>', "$m5_dir/$csums{$name}")
} else {
if (!$out) {
die "$$opts{path}/ce.fa : Got data before fasta header\n";
}
$_ = uc($_);
s/\s+//g;
print $out $_;
}
}
if ($out) {
close($out) || die "Error closing $m5_dir/$csums{$name} : $!\n";
}
close($fa) || die "Error reading $$opts{path}/ce.fa : $!\n";
$$opts{m5_dir} = $m5_dir;
}


# The tests --------------------------

Expand Down Expand Up @@ -587,6 +636,12 @@ sub test_index
unlink("$$opts{tmp}/index.sam.gz.bai");
test_compare($opts,"$$opts{path}/test_index -b $$opts{tmp}/index.sam.gz", "$$opts{tmp}/index.sam.gz.bai", "$$opts{path}/index.sam.gz.bai");

# CRAM
local $ENV{REF_PATH} = $$opts{m5_dir};
test_compare($opts,"$$opts{path}/test_view -l 0 -C -x $$opts{tmp}/index.cram.crai $$opts{path}/index.sam > $$opts{tmp}/index.cram", "$$opts{tmp}/index.cram.crai", "$$opts{path}/index.cram.crai", gz=>1);
unlink("$$opts{tmp}/index.cram.crai");
test_compare($opts,"$$opts{path}/test_index $$opts{tmp}/index.cram", "$$opts{tmp}/index.cram.crai", "$$opts{path}/index.cram.crai", gz=>1);

# BCF
test_compare($opts,"$$opts{path}/test_view -l 0 -b -m 14 -x $$opts{tmp}/index.bcf.csi $$opts{path}/index.vcf > $$opts{tmp}/index.bcf", "$$opts{tmp}/index.bcf.csi", "$$opts{path}/index.bcf.csi", gz=>1);
unlink("$$opts{tmp}/index.bcf.csi");
Expand Down

0 comments on commit 24b121a

Please sign in to comment.