Skip to content

Commit

Permalink
Merge pull request #63 from karel-brinda/curesim_random
Browse files Browse the repository at this point in the history
Add support for curesim random reads
  • Loading branch information
Karel Břinda committed Jul 4, 2017
2 parents d5a2da6 + 89018af commit d9c51c9
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 12 deletions.
48 changes: 37 additions & 11 deletions rnftools/mishmash/CuReSim.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class CuReSim(Source):
number_of_read_tuples (int): Number of read tuples (if coverage specified, then it must be equal to zero). Corresponding CuReSim parameter: ``-n``.
read_length_1 (int): Length of the first read. Corresponding CuReSim parameter: ``-m``.
read_length_2 (int): Length of the second read. Fake parameter (unsupported by CuReSim).
random_reads (bool): Simulate random reads (see CuReSim documentation for more details).
rng_seed (int): Seed for simulator's random number generator. Fake parameter (unsupported by CuReSim).
other_params (str): Other parameters which are used on command-line.
Expand All @@ -33,6 +34,7 @@ def __init__(
number_of_read_tuples=0,
read_length_1=100,
read_length_2=0,
random_reads=False,
rng_seed=1,
other_params="",
):
Expand All @@ -55,6 +57,7 @@ def __init__(

self.read_length_1 = read_length_1
self.read_length_2 = read_length_2
self.random_reads = random_reads
self.other_params = other_params

coverage=float(coverage)
Expand Down Expand Up @@ -101,13 +104,20 @@ def create_fq(self):
self.number_of_read_tuples = int(
self.coverage * genome_size / (self.read_length_1 + self.read_length_2))

if self.random_reads:
no_normal_reads=1
no_random_reads=self.number_of_read_tuples
else:
no_normal_reads=self.number_of_read_tuples
no_random_reads=0

rnftools.utils.shell("""
(cd "{dir}" && \
"{curesim}" \
-f "{fa}" \
-n {nb} \
-n {no_normal} \
-r {no_random} \
-m {rlen1} \
-r 0 \
-sd 0 \
-y 0 \
{other_params} \
Expand All @@ -116,7 +126,8 @@ def create_fq(self):
dir=self.get_dir(),
curesim="curesim",
fa=self._fa_fn,
nb=self.number_of_read_tuples,
no_normal=no_normal_reads,
no_random=no_random_reads,
rlen1=self.read_length_1,
other_params=self.other_params,
rng_seed=self._rng_seed,
Expand All @@ -137,6 +148,7 @@ def create_fq(self):
fai_fo=fai_fo,
genome_id=self.genome_id,
number_of_read_tuples=10 ** 9,
recode_random=self.random_reads,
)

@staticmethod
Expand All @@ -146,6 +158,7 @@ def recode_curesim_reads(
fai_fo,
genome_id,
number_of_read_tuples=10 ** 9,
recode_random=False,
):
"""Recode CuReSim output FASTQ file to the RNF-compatible output FASTQ file.
Expand All @@ -155,6 +168,7 @@ def recode_curesim_reads(
fai_fo (file object): File object for FAI file of the reference genome.
genome_id (int): RNF genome ID to be used.
number_of_read_tuples (int): Expected number of read tuples (to estimate number of digits in RNF).
recode_random (bool): Recode random reads.
Raises:
ValueError
Expand Down Expand Up @@ -214,19 +228,30 @@ def recode_curesim_reads(
end_pos = start_pos - 1 - ins_nb + del_nb

chr_id = 0

random=contig_name[:4]=="rand"

# TODO: uncomment when the chromosome naming bug in curesim is corrected
# chr_id = self.dict_chr_ids[contig_name] if self.dict_chr_ids!={} else "0"

elif i % 4 == 1:
bases = line.strip()
end_pos += len(bases)

if recode_random:
left=0
right=0
else:
left=start_pos + 1
right=end_pos


segment = rnftools.rnfformat.Segment(
genome_id=genome_id,
chr_id=chr_id,
direction=direction,
left=start_pos + 1,
right=end_pos,
left=left,
right=right,
)

elif i % 4 == 2:
Expand All @@ -235,12 +260,13 @@ def recode_curesim_reads(
elif i % 4 == 3:
qualities = line.strip()

fq_creator.add_read(
read_tuple_id=read_tuple_id,
bases=bases,
qualities=qualities,
segments=[segment],
)
if random==recode_random:
fq_creator.add_read(
read_tuple_id=read_tuple_id,
bases=bases,
qualities=qualities,
segments=[segment],
)

read_tuple_id += 1

Expand Down
2 changes: 1 addition & 1 deletion rnftools/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION="0.3.1.1"
VERSION="0.3.1.2"
8 changes: 8 additions & 0 deletions tests/snakemake/01_SE_simulation/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ rnftools.mishmash.CuReSim(
read_length_2=0,
)

rnftools.mishmash.CuReSim(
fasta=fa,
number_of_read_tuples=10000,
read_length_1=100,
read_length_2=0,
random_reads=True,
)

rnftools.mishmash.DwgSim(
fasta=fa,
number_of_read_tuples=10000,
Expand Down

0 comments on commit d9c51c9

Please sign in to comment.