In [None]:
# import necessary modules
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob, os
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_style("ticks",{'axes.grid' : True})
plt.rcParams["axes.linewidth"] = 1.5
plt.rcParams["xtick.major.width"] = 1.5
plt.rcParams["ytick.major.width"] = 1.5
plt.rcParams["xtick.major.size"] = 8
plt.rcParams["ytick.major.size"] = 8
plt.rcParams["axes.titlepad"] = 20

plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams["axes.titlesize"] = 30
plt.rcParams['axes.labelsize'] = 23.5
plt.rcParams['xtick.labelsize'] = 18
plt.rcParams['ytick.labelsize'] = 18
plt.rcParams['legend.fontsize'] = 18
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Liberation Sans']
plt.rcParams['text.usetex'] = False
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams["savefig.dpi"]=300

In [None]:
input_statistics_quast=snakemake.params.input_quast_report
SAMPLES=list(snakemake.params.samples)
#----------------------------------------------------------------------
output_log_number_contigs_png=snakemake.output.log_number_contigs_png
output_log_number_contigs_svg=snakemake.output.log_number_contigs_svg
output_contig_length_bp_png=snakemake.output.contig_length_bp_png
output_contig_length_bp_svg=snakemake.output.contig_length_bp_svg
output_contig_number_total_png=snakemake.output.contig_number_total_png
output_contig_number_total_svg=snakemake.output.contig_number_total_svg
output_contig_length_total_png=snakemake.output.contig_length_total_png
output_contig_length_total_svg=snakemake.output.contig_length_total_svg

In [None]:
# Sort the SAMPLES list
SAMPLES.sort()

# Read in the input_statistics_quast file as a pandas dataframe
df=pd.read_csv(input_statistics_quast, sep="\t")

# Remove "_spades" from the "Assembly" column values
df["Assembly"]=df["Assembly"].str.split("_spades").str[0]

# Remove "_corrected_scaffolds_pilon.tot" from the "Assembly" column values
df["Assembly"]=df["Assembly"].str.split("_corrected_scaffolds_pilon.tot").str[0]

# Sort the dataframe by the "Assembly" column
df=df.sort_values(by="Assembly")
df

# Number of assembled contigs per contig length groups

In [None]:
# Define the width of the figure based on the number of samples
fig_width = .8 * len(SAMPLES)

# Create the bar chart using pandas dataframe
df_log = df.copy() # make a copy of the original dataframe
df_log.iloc[:, 1:] = np.log10(df.iloc[:, 1:]) # apply log transformation to columns 1 and onwards
ax = df_log.plot(x="Assembly", y=["# contigs (>= 1000 bp)", "# contigs (>= 10000 bp)", "# contigs (>= 25000 bp)", "# contigs (>= 50000 bp)"], kind="bar", figsize=(fig_width,12),width=0.8)

# Add annotations for the height of each bar
for p in ax.patches:
    ax.annotate(str(round(10**p.get_height())), (p.get_x(), p.get_height() + 0.1),rotation=90, fontsize=14,transform=ax.transAxes)

# Set the y-limit to to give room to the labels
ax.set_ylim(0, ax.get_ylim()[1] + 0.2)


# Set the y-ticks to powers of 10
yticks = ax.get_yticks()
yticklabels = [r'$10^{{{}}}$'.format(int(y)) for y in yticks]
ax.set_yticklabels(yticklabels)

# Set x and y labels and tick label sizes
ax.set_xlabel("Assembly")
ax.set_ylabel("log10(# of contigs)")
ax.tick_params(axis='both', which='major')
ax.tick_params(axis='both', which='minor')

# Add legend to the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# Save the figure as png and svg
ax.figure.savefig(output_log_number_contigs_png, format="png", bbox_inches = "tight",transparent=True)
ax.figure.savefig(output_log_number_contigs_svg, format="svg", bbox_inches = "tight",transparent=True)

# Display the plot
plt.show()


# Length of assembled bases per contig length groups

In [None]:
# Calculate figure width based on number of samples
fig_width = 0.8 * len(SAMPLES)

# Plot data using a bar chart
ax = df_log.plot(x="Assembly", y=["Total length (>= 1000 bp)", "Total length (>= 10000 bp)", "Total length (>= 25000 bp)", "Total length (>= 50000 bp)"], kind="bar", figsize=(fig_width,12), width=0.8)

# Add annotations to each bar in the chart
for p in ax.patches:
    ax.annotate(str(round(10**p.get_height())), (p.get_x(), p.get_height() + 0.2),rotation=90, fontsize=8,transform=ax.transAxes)

# Set y-axis scale to logarithmic
# ax.set_yscale('log')

# Set the y-limit to give room to the labels
ax.set_ylim(0, ax.get_ylim()[1] + 0.3)

# Set the y-ticks to powers of 10
yticks = ax.get_yticks()
yticklabels = [r'$10^{{{}}}$'.format(int(y)) for y in yticks]
ax.set_yticklabels(yticklabels)

# Set x and y-axis labels and tick label sizes
ax.set_xlabel("Assembly")
ax.set_ylabel("log10(# of basepairs)")
ax.tick_params(axis='both', which='major')
ax.tick_params(axis='both', which='minor')

# Add legend and save the figure in two different formats
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.figure.savefig(output_contig_length_bp_png, format="png", bbox_inches = "tight",transparent=True)
ax.figure.savefig(output_contig_length_bp_svg, format="svg", bbox_inches = "tight",transparent=True)

# Display the plot
plt.show()


# Total number of assembled contigs 

In [None]:
# Calculate figure width based on number of samples
fig_width = 0.4 * len(SAMPLES)

# Plot data using a bar chart
ax = df_log.plot(x="Assembly", y=["# contigs (>= 1000 bp)"], kind="bar", figsize=(fig_width,12), width=0.8)

# Add annotations to each bar in the chart
for p in ax.patches:
    ax.annotate(str(round(10**p.get_height())), (p.get_x(), p.get_height() + 0.1),rotation=90, fontsize=17,transform=ax.transAxes)

# Set the y-limit to give room to the labels
ax.set_ylim(0, ax.get_ylim()[1] + .25)

# Set the y-ticks to powers of 10
yticks = ax.get_yticks()
yticklabels = [r'$10^{{{}}}$'.format(int(y)) for y in yticks]
ax.set_yticklabels(yticklabels)

# Set x and y-axis labels and tick label sizes
ax.set_xlabel("Assembly")
ax.set_ylabel("log10(# of contigs)")
ax.tick_params(axis='both', which='major')
ax.tick_params(axis='both', which='minor')

# Remove legend and save the figure in two different formats
ax.get_legend().remove()
ax.figure.savefig(output_contig_number_total_png, format="png", bbox_inches = "tight",transparent=True)
ax.figure.savefig(output_contig_number_total_svg, format="svg", bbox_inches = "tight",transparent=True)

# Display the plot
plt.show()


# Total length of assembled bases 

In [None]:
# Add new column to DataFrame and set width of bars
df["Total length million"]=(df["Total length"]/1000000).round(3)
fig_width=.4*len(SAMPLES)

# Create bar plot and annotate bars with their values
ax=df.plot(x="Assembly", y=["Total length million"], kind="bar", figsize=(fig_width,12),width=0.8)
for p in ax.patches:
    ax.annotate(str(round(p.get_height(),1)), (p.get_x(), p.get_height() + 2),rotation=90,fontsize=17)

# Set y-limit and labels
ax.set_ylim(0,ax.get_ylim()[1]+5)
ax.set_xlabel("Assembly")
ax.set_ylabel("Million basepairs (Mbp)")
ax.tick_params(axis='both', which='major')
ax.tick_params(axis='both', which='minor')
ax.get_legend().remove()

# Save and display plot
ax.figure.savefig(output_contig_length_total_png  , format="png", bbox_inches = "tight",transparent=True)
ax.figure.savefig(output_contig_length_total_svg  , format="svg", bbox_inches = "tight",transparent=True)
plt.show()