Skip to content

Commit

Permalink
better handling of too few reads to fit models
Browse files Browse the repository at this point in the history
  • Loading branch information
maxibor committed May 7, 2020
1 parent f129313 commit 509f3f6
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 7 deletions.
21 changes: 17 additions & 4 deletions pydamage/damage.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,22 @@ def avg_coverage(pysam_cov):
cov = np.mean(cov_all_bases)
return(cov)

def check_model_fit(model_dict, wlen):
# Check that no fitted parameters or stdev are infinite
if np.inf in model_dict.values() or -np.inf in model_dict.values():
return(False)

# Check that references have at least reads aligned on the first n wlen bases
if not all(i in model_dict.keys() for i in list(range(wlen))):
return(False)
return(model_dict)

def test_damage(ref, bam, mode, wlen, show_al, min_al, min_cov, process, verbose):
al_handle = pysam.AlignmentFile(bam, mode=mode, threads=process)
try:
cov = avg_coverage(al_handle.count_coverage(contig=ref))
nb_reads_aligned = al_handle.count(contig=ref)
reflen = al_handle.get_reference_length(ref)

if nb_reads_aligned >= min_al or cov >= min_cov:
al = al_to_damage(reference=ref, al_handle=al_handle)
Expand All @@ -84,9 +95,11 @@ def test_damage(ref, bam, mode, wlen, show_al, min_al, min_cov, process, verbose
test_res['reference'] = ref
test_res['nb_reads_aligned'] = nb_reads_aligned
test_res['coverage'] = cov
return(test_res)
return(check_model_fit(test_res, wlen))
else:
pass
except ValueError:
print(f"Could not fit a model for {ref} because of too few reads aligned ({nb_reads_aligned})")
pass
except ValueError as e:
print(f"Could not fit a model for {ref} because of too few reads aligned")
print(f"Model fitting error: {e}")
print(f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov} - reflen: {reflen}\n")
return(False)
2 changes: 1 addition & 1 deletion pydamage/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def analyze(bam, wlen=30, show_al=False, mini=2000, cov=0.5, process=1, outdir="
with multiprocessing.Pool(proc) as p:
res = p.map(test_damage_partial, refs)
filt_res = [i for i in res if i]
if plot:
if plot and len(filt_res) > 0:
print("\nGenerating pydamage plots")
for ref in tqdm(filt_res):
dam_plot = damageplot(damage_dict=ref, wlen=wlen, qlen = ref['qlen'], outdir=outdir)
Expand Down
4 changes: 2 additions & 2 deletions pydamage/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class geom_mod():
def __init__(self):
self.kwds = ['geom_p', 'geom_pmin', 'geom_pmax']
self.bounds = ((0.01, 0., 0.01), (0.99, 0.2, 0.99))
self.bounds = ((0.001, 0.001, 0.001), (0.99, 0.2, 0.99))

def __repr__(self):
return(
Expand Down Expand Up @@ -57,7 +57,7 @@ def log_pmf(self, x, geom_p, geom_pmin, geom_pmax):
class unif_mod():
def __init__(self):
self.kwds = ('unif_pmin',)
self.bounds = ((0.,), (0.2,))
self.bounds = ((0.001,), (0.2,))

def __repr__(self):
return(
Expand Down
2 changes: 2 additions & 0 deletions pydamage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def makedir(dirpath, confirm=True, force=False):

def pandas_processing(res_dict, outdir):
df = pd.DataFrame(res_dict)
if len(res_dict) == 0:
return(df)
df['qvalue'] = multipletests(df['pvalue'], method='fdr_bh')[1]
df = df[['unif_pmin', 'unif_pmin_stdev',
'geom_p', 'geom_p_stdev',
Expand Down

0 comments on commit 509f3f6

Please sign in to comment.