Skip to content

Commit

Permalink
Added horizontal barplot for depicting percentage of RNA barcodes wit…
Browse files Browse the repository at this point in the history
…h at least one HTO count
  • Loading branch information
Bo Li committed Sep 14, 2020
1 parent b9225ad commit 45e28e6
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 28 deletions.
2 changes: 1 addition & 1 deletion demuxEM/commands/DemuxEM.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class DemuxEM(Base):
output_name.ambient_hashtag.hist.pdf Optional output. A histogram plot depicting hashtag distributions of empty droplets and non-empty droplets.
output_name.background_probabilities.bar.pdf Optional output. A bar plot visualizing the estimated hashtag background probability distribution.
output_name.real_content.hist.pdf Optional output. A histogram plot depicting hashtag distributions of not-real-cells and real-cells as defined by total number of expressed genes in the RNA assay.
output_name.rna_demux.hist.pdf Optional output. A histogram plot depicting RNA UMI distribution for singlets, doublets and unknown cells.
output_name.rna_demux.hist.pdf Optional output. This figure consists of two plots. The first one is a horizontal bar plot depicting the percentage of RNA barcodes with at least one HTO count. The second plot is a histogram plot depicting RNA UMI distribution for singlets, doublets and unknown cells.
output_name.gene_name.violin.pdf Optional outputs. Violin plots depicting gender-specific gene expression across samples. We can have multiple plots if a gene list is provided in '--generate-gender-plot' option.
Examples:
Expand Down
2 changes: 1 addition & 1 deletion demuxEM/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def run_pipeline(input_rna_file, input_hto_file, output_name, **kwargs):
plot_hto_hist(
hashing_data, "rna_type", output_name + ".real_content.hist.pdf", alpha=0.5
)
plot_rna_hist(rna_data, output_name + ".rna_demux.hist.pdf")
plot_rna_hist(rna_data, hashing_data, output_name + ".rna_demux.hist.pdf")
logger.info("Diagnostic plots are generated.")

if len(kwargs["gen_gender_plot"]) > 0:
Expand Down
66 changes: 41 additions & 25 deletions demuxEM/tools/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,40 +28,56 @@ def plot_hto_hist(hashing_data: UnimodalData, attr: str, out_file: str, alpha: f


def plot_rna_hist(
rna_data: UnimodalData, out_file: str, plot_attr: str = "n_counts", cat_attr: str = "demux_type", dpi: int = 500, figsize: Tuple[float, float] = None
rna_data: UnimodalData, hashing_data: UnimodalData, out_file: str, plot_attr: str = "n_counts", cat_attr: str = "demux_type", dpi: int = 500, figsize: Tuple[float, float] = None
) -> None:
fig, axes = plt.subplots(2, 1, gridspec_kw={'height_ratios': [1, 9], 'hspace': 0.6}, figsize = figsize, dpi = dpi)

# Percentage of RNA barcodes having HTO tags
nhto = rna_data.obs_names.isin(hashing_data.obs_names).sum()
total = rna_data.shape[0]

ax = axes[0]
p1 = nhto * 100.0 / total
p2 = (total - nhto) * 100.0 / total
labels = ['Has HTO', 'No HTO']
ax.barh(y = 0, width = p1, color = 'red', height = 0.5, label = labels[0])
ax.barh(y = 0, width = p2, left = p1, color = 'gray', height = 0.5, label = labels[1])
ax.set_yticks([])
ax.set_ylabel('RNA \nbarcodes', rotation = 0, ha = 'right', va = 'center')
ax.set_xlim(left = 0, right = 100)
ax.set_xlabel('Percentage')
ax.legend(ncol=2, bbox_to_anchor=(1, 1), loc='lower right', fontsize = 'small')

# RNA histogram
bins = np.logspace(
np.log10(min(rna_data.obs[plot_attr])), np.log10(max(rna_data.obs[plot_attr])), 101
)
cat_vec = rna_data.obs[cat_attr]
ax = plt.gca()
if cat_attr == "demux_type":
ax.hist(
rna_data.obs.loc[np.isin(cat_vec, "singlet"), plot_attr],
bins,
alpha=0.5,
label="singlet",
)
ax.hist(
rna_data.obs.loc[np.isin(cat_vec, "doublet"), plot_attr],
bins,
alpha=0.5,
label="doublet",
)
ax.hist(
rna_data.obs.loc[np.isin(cat_vec, "unknown"), plot_attr],
bins,
alpha=0.5,
label="unknown",
)
ax = axes[1]
ax.hist(
rna_data.obs.loc[np.isin(cat_vec, "singlet"), plot_attr],
bins,
alpha=0.5,
label="singlet",
)
ax.hist(
rna_data.obs.loc[np.isin(cat_vec, "doublet"), plot_attr],
bins,
alpha=0.5,
label="doublet",
)
ax.hist(
rna_data.obs.loc[np.isin(cat_vec, "unknown"), plot_attr],
bins,
alpha=0.5,
label="unknown",
)
ax.legend(loc="upper right")
ax.set_xscale("log")
ax.set_xlabel("Number of RNA UMIs (log10 scale)")
ax.set_ylabel("Number of cellular barcodes")
if figsize is not None:
plt.gcf().set_size_inches(*figsize)
plt.savefig(out_file, dpi=dpi)
plt.close()

fig.savefig(out_file)


def plot_bar(heights: List[float], tick_labels: List[str], xlabel: str, ylabel: str, out_file: str, dpi: int = 500, figsize: Tuple[float, float] = None) -> None:
Expand Down
2 changes: 1 addition & 1 deletion docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ to see the usage information::
Optional output. A histogram plot depicting hashtag distributions of not-real-cells and real-cells as defined by total number of expressed genes in the RNA assay.

output_name.rna_demux.hist.pdf
Optional output. A histogram plot depicting RNA UMI distribution for singlets, doublets and unknown cells.
Optional output. This figure consists of two plots. The first one is a horizontal bar plot depicting the percentage of RNA barcodes with at least one HTO count. The second plot is a histogram plot depicting RNA UMI distribution for singlets, doublets and unknown cells.

output_name.gene_name.violin.pdf
Optional outputs. Violin plots depicting gender-specific gene expression across samples. We can have multiple plots if a gene list is provided in '--generate-gender-plot' option.
Expand Down

0 comments on commit 45e28e6

Please sign in to comment.