Skip to content

Commit

Permalink
update plots for pacbio simulated data
Browse files Browse the repository at this point in the history
  • Loading branch information
mehrdadbakhtiari committed Jul 6, 2018
1 parent dcad3bf commit 66a952c
Showing 1 changed file with 75 additions and 44 deletions.
119 changes: 75 additions & 44 deletions advntr/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,7 @@ def get_correct_estimates_for_ru(files, ru_length=None, adVNTR=False):
def plot_pacbio_ru_length_result(results_dir='../pacbio_ru_data_for_all_vntrs/'):
from matplotlib import rc, rcParams
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
plt.style.use('ggplot')
plt.rcParams['axes.facecolor'] = '#FFFFFF'
Expand All @@ -685,57 +686,87 @@ def plot_pacbio_ru_length_result(results_dir='../pacbio_ru_data_for_all_vntrs/')
ax.text(-0.1, 1.1, r'\textbf{B}', transform=ax.transAxes,
fontsize=16, fontweight='bold', va='top', ha='right')
import glob
import os
ru_dirs = glob.glob(results_dir + '*')
points = []
naive_points = []

def get_lengths_and_discrepancies(file_name):
import ast
with open(file_name) as infile:
lines = infile.readlines()
if len(lines) < 2:
return [], []
length = ast.literal_eval(lines[0])
disc = ast.literal_eval(lines[-1])
return length, disc

lengths = []
naive_lengths = []
discrepancies = []
naive_discrepancies = []
for ru_dir in ru_dirs:
ru_length = int(ru_dir.split('/')[-1])
files = glob.glob(ru_dir + '/*.fasta.out')
naivefiles = glob.glob(ru_dir + '/*.fasta.out.naive')
corrects, error_bar = get_correct_estimates_for_ru(files, ru_length, True)
naive_corrects, naive_error_bar = get_correct_estimates_for_ru(naivefiles, ru_length)
points.append((ru_length, corrects, error_bar))
naive_points.append((ru_length, naive_corrects, naive_error_bar))
points = sorted(points)
ru_lengths = [x for x, y, z in points]
naive_points = sorted(naive_points)

ru_lengths = [int(x/10)*10 for x, y, z in points]
corrects = []
naive_corrects = []
for x in ru_lengths:
vntr_id = os.path.basename(ru_dir)
if not os.path.exists(ru_dir + '/advntr_result.txt') or not os.path.exists(ru_dir + '/naive_result.txt'):
continue
length, disc = get_lengths_and_discrepancies(ru_dir + '/advntr_result.txt')
naive_length, naive_disc = get_lengths_and_discrepancies(ru_dir + '/naive_result.txt')
lengths += length
naive_lengths += naive_length
discrepancies += disc
naive_discrepancies += naive_disc

print(len(discrepancies))

def plot_data(matplot_ax, discrepancies_list, lengths_list, naive=0):
data = {}
width = 200
offset = float(width) / 2
if naive:
offset *= -1

total = 0
num = 0
for xx, yy, zz in points:
if (xx/10)*10 == x:
total += yy
num += 1
corrects.append(float(total)/num)
naive_error_bars = []
from scipy import stats
for x in ru_lengths:
total = 0.0
num = 0.0
observed_values = []
for xx, yy, zz in naive_points:
if (xx/10)*10 == x:
observed_values.append(yy)
total += yy
num += 1
naive_corrects.append(float(total)/num)
naive_error_bars.append(stats.sem(observed_values))

print(naive_error_bars)
error_bars = [0 for x, y, z in points]
# naive_error_bars = [0 for x, y, z in naive_points]
# corrects = [y for x, y, z in points]
# naive_corrects = [y for x, y, z in naive_points]
plt.errorbar(ru_lengths, corrects, yerr=error_bars, label='adVNTR')
plt.errorbar(ru_lengths, naive_corrects, yerr=naive_error_bars, ls='--', label='Naive Method')
total_wrongs = 0
for i in range(len(discrepancies_list)):
key = 500 * (lengths_list[i] / 500)
if key > 3000:
continue
if key not in data.keys():
data[key] = []
data[key].append(discrepancies_list[i])
for key in data.keys():
wrongs = sum([1 for e in data[key] if e > 0])
total_wrongs = wrongs
total += len(data[key])
data[key] = 100 - 100 * wrongs / float(len(data[key]))
print total_wrongs, total
print(float(total_wrongs) / total * 100)
label = 'Naive Method' if naive else 'adVNTR'
matplot_ax.bar(np.array(data.keys()) + offset, data.values(), width=width, label=label)

plot_data(ax, discrepancies, lengths, 0)
plot_data(ax, naive_discrepancies, naive_lengths, 1)
ax.set_ylim((0, 100))
ax.set_xlabel(r'\emph{VNTR Length}', fontsize=13)
ax.set_ylabel(r'\emph{Correct Estimate Percentage}', fontsize=13)

# naive_error_bars = []
# from scipy import stats
# for x in ru_lengths:
# total = 0.0
# num = 0.0
# observed_values = []
# for xx, yy, zz in naive_points:
# if (xx/10)*10 == x:
# observed_values.append(yy)
# total += yy
# num += 1
# naive_corrects.append(float(total)/num)
# naive_error_bars.append(stats.sem(observed_values))

plt.tight_layout(pad=2, w_pad=0.5, h_pad=1)

plt.legend(loc=4, fontsize=16)
plt.legend(loc=3, fontsize=16)
plt.savefig('pacbio_ru_length_results.pdf')


Expand Down Expand Up @@ -1249,7 +1280,7 @@ def plot_vntr_length_distribution(max_len=1000):
# plot_read_recruitment_results()
# plot_inconsistency_difference()

# plot_pacbio_ru_length_result()
plot_pacbio_ru_length_result('../pacbio_simulations/')
# plot_pedigree_tree()
# plot_lr_pcr()

Expand Down

0 comments on commit 66a952c

Please sign in to comment.