In [None]:
# import necessary modules
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob, os
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams['svg.fonttype'] = 'none'

In [None]:
input_statistics_quast=snakemake.params.input_quast_report
SAMPLES=list(snakemake.params.samples)
#----------------------------------------------------------------------
output_log_number_contigs_png=snakemake.output.log_number_contigs_png
output_log_number_contigs_svg=snakemake.output.log_number_contigs_svg
output_contig_length_bp_png=snakemake.output.contig_length_bp_png
output_contig_length_bp_svg=snakemake.output.contig_length_bp_svg
output_contig_number_total_png=snakemake.output.contig_number_total_png
output_contig_number_total_svg=snakemake.output.contig_number_total_svg
output_contig_length_total_png=snakemake.output.contig_length_total_png
output_contig_length_total_svg=snakemake.output.contig_length_total_svg

In [None]:
# Sort the SAMPLES list
SAMPLES.sort()

# Read in the input_statistics_quast file as a pandas dataframe
df=pd.read_csv(input_statistics_quast, sep="\t")

# Remove "_spades" from the "Assembly" column values
df["Assembly"]=df["Assembly"].str.split("_spades").str[0]

# Remove "_corrected_scaffolds_pilon.tot" from the "Assembly" column values
df["Assembly"]=df["Assembly"].str.split("_corrected_scaffolds_pilon.tot").str[0]

# Sort the dataframe by the "Assembly" column
df=df.sort_values(by="Assembly")
df

In [None]:
# Set figure size and font scale for seaborn
plt.figure(figsize=(12,12))
sns.set(font_scale=2)
sns.set_style("whitegrid")

# Define the width of the figure based on the number of samples
fig_width = .8 * len(SAMPLES)

# Create the bar chart using pandas dataframe
ax = df.plot(x="Assembly", y=["# contigs (>= 1000 bp)", "# contigs (>= 10000 bp)", "# contigs (>= 25000 bp)", "# contigs (>= 50000 bp)"], kind="bar", figsize=(fig_width,5),width=0.8)

# Add annotations for the height of each bar
for p in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x(), p.get_height() * 1.15),rotation=90, fontsize=10,transform=ax.transAxes)

# Set the y-scale to log
ax.set_yscale('log')

# Set the y-limit to 1.5 times the current limit
ax.set_ylim(0,ax.get_ylim()[1]*1.5)

# Set x and y labels and tick label sizes
ax.set_xlabel("Assembly",fontsize=20)
ax.set_ylabel("log10(# of contigs)",fontsize=20)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.tick_params(axis='both', which='minor', labelsize=10)

# Add legend to the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),fontsize=14)

# Save the figure as png and svg
ax.figure.savefig(output_log_number_contigs_png, format="png")
ax.figure.savefig(output_log_number_contigs_svg, format="svg")

# Display the plot
plt.show()

In [None]:
# set figure size and font scale for seaborn
plt.figure(figsize=(12,12))
sns.set(font_scale=2)
sns.set_style("whitegrid")

fig_width=.8*len(SAMPLES)
ax=df.plot(x="Assembly", y=["Total length (>= 1000 bp)", "Total length (>= 10000 bp)", "Total length (>= 25000 bp)", "Total length (>= 50000 bp)"], kind="bar", figsize=(fig_width,6),width=0.8)
for p in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x(), p.get_height() * 1.15),rotation=90, fontsize=8, transform=ax.transAxes)
ax.set_yscale('log')
ax.set_ylim(0,ax.get_ylim()[1]*5)

ax.set_xlabel("Assembly",fontsize=20)
ax.set_ylabel("log10(# of basepairs)",fontsize=20)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.tick_params(axis='both', which='minor', labelsize=10)

ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),fontsize=14)
ax.figure.savefig(output_contig_length_bp_png  , format="png")
ax.figure.savefig(output_contig_length_bp_svg  , format="svg")
plt.show()

In [None]:
# set figure size and font scale for seaborn
plt.figure(figsize=(12,12))
sns.set(font_scale=2)
sns.set_style("whitegrid")


fig_width=.4*len(SAMPLES)
ax=df.plot(x="Assembly", y=["# contigs (>= 1000 bp)"], kind="bar", figsize=(fig_width,12),width=0.8)
for p in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x(), p.get_height() * 1.1),rotation=90,fontsize=12,transform=ax.transAxes)
ax.set_yscale('log')
ax.set_ylim(0,ax.get_ylim()[1]*1.5)

ax.set_xlabel("Assembly",fontsize=20)
ax.set_ylabel("log10(# of contigs)",fontsize=20)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.tick_params(axis='both', which='minor', labelsize=10)

ax.get_legend().remove()
ax.figure.savefig(output_contig_number_total_png , format="png")
ax.figure.savefig(output_contig_number_total_svg , format="svg")

plt.show()

In [None]:
# set figure size and font scale for seaborn
plt.figure(figsize=(12,12))
sns.set(font_scale=2)
sns.set_style("whitegrid")


df["Total length million"]=(df["Total length"]/1000000).round(3)
fig_width=.4*len(SAMPLES)

ax=df.plot(x="Assembly", y=["Total length million"], kind="bar", figsize=(fig_width,12),width=0.8)
for p in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x(), p.get_height() + 1),rotation=90,fontsize=12)
ax.set_ylim(0,ax.get_ylim()[1]*1.1)


ax.set_xlabel("Assembly",fontsize=20)
ax.set_ylabel("Million basepairs (Mbp)",fontsize=20)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.tick_params(axis='both', which='minor', labelsize=10)
ax.get_legend().remove()

ax.figure.savefig(output_contig_length_total_png  , format="png")
ax.figure.savefig(output_contig_length_total_svg  , format="svg")

plt.show()