# MinION basecalling stats

In [46]:
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objects as go
import plotly.express as px

In [47]:
import os
os.getcwd()

'/run/user/1000/gvfs/sftp:host=minion.local,user=kilpert/vol/minion/minion_basecalling/workflow/notebooks'

In [48]:
import socket
hostname = socket.gethostname()
hostname

'kilpert-pc'

In [49]:
if hostname.endswith("-pc"):
    ##stats_tsv = "../../results/001_20220412_FAME3/dna_r9.4.1_450bps_fast/guppy_basecaller_fastq_alias/001_20220412_FAME3.flowcell.fastq_stats.tsv"
    stats_tsv = "../../results/001_20220412_FAME3/dna_r9.4.1_450bps_sup/guppy_basecaller_fastq_alias/001_20220412_FAME3.flowcell.fastq_stats.tsv"
    run_name = os.path.basename(stats_tsv).split(".")[0]
    outfile = f"{run_name}.fastq_stats.html"
else:
    stats_tsv = snakemake.input.tsv
    run_name = snakemake.params.run
    outfile = snakemake.output

In [50]:
stats_tsv

'../../results/001_20220412_FAME3/dna_r9.4.1_450bps_sup/guppy_basecaller_fastq_alias/001_20220412_FAME3.flowcell.fastq_stats.tsv'

In [51]:
run_name

'001_20220412_FAME3'

In [52]:
outfile

'001_20220412_FAME3.fastq_stats.html'

## Data

In [53]:
df = pd.read_csv(stats_tsv, sep="\t")
df

Unnamed: 0,sample,qc,reads,bases
0,PAY002,fail,4371,5606222.0
1,PAY004,fail,8498,26436426.0
2,PAY005,fail,11408,45417195.0
3,PAY004_F1-R1,fail,30030,24660597.0
4,PAY011,fail,9502,19315887.0
...,...,...,...,...
189,barcode93,pass,2,2593.0
190,barcode94,pass,3,10084.0
191,barcode95,pass,3,8841.0
192,barcode96,pass,4,9131.0


## Fill NA in bases

In [54]:
sum(df["bases"].isna())

3

In [55]:
df["bases"] = df["bases"].fillna(0)

In [56]:
sum(df["bases"].isna())

0

## Reads

In [57]:
trace1 = go.Bar(
    x=df["sample"],
    y=df[df["qc"]=="pass"]["reads"],
    name="pass",
    marker={"color": "blue"})

trace2 = go.Bar(
    x=df["sample"],
    y=df[df["qc"]=="fail"]["reads"],
    name="fail",
    marker={"color": "red"})

data = [trace1, trace2]
layout = go.Layout(title=f"{run_name} - Reads (n)", yaxis_title="Reads (n)")
fig1 = go.Figure(data=data, layout=layout)
fig1.show(config={'displaylogo':False}) # hide ploty logo
##fig1.write_html(f"{run_name} - MinION Reads.html", config={'displaylogo':False})

## Bases

In [58]:
trace1 = go.Bar(
    x=df["sample"],
    y=df[df["qc"]=="pass"]["bases"],
    name="pass",
    marker={"color": "blue"})

trace2 = go.Bar(
    x=df["sample"],
    y=df[df["qc"]=="fail"]["bases"],
    name="fail",
    marker={"color": "red"})

data = [trace1, trace2]
layout = go.Layout(title=f"{run_name} - Bases (bp)", yaxis_title="Bases (bp)")
fig2 = go.Figure(data=data, layout=layout)
fig2.show(config={'displaylogo':False}) # hide ploty logo
##fig2.write_html(f"{run_name} - MinION Bases.html", config={'displaylogo':False})

## Percent

In [59]:
total_reads = sum(df["reads"])
total_reads

4595125

In [60]:
df["reads_perc"] = df["reads"] / total_reads * 100

In [61]:
total_bases = sum(df["bases"])
total_bases

6456111076.0

In [62]:
df["bases_perc"] = df["bases"] / total_bases * 100

In [63]:
df

Unnamed: 0,sample,qc,reads,bases,reads_perc,bases_perc
0,PAY002,fail,4371,5606222.0,0.095123,0.086836
1,PAY004,fail,8498,26436426.0,0.184935,0.409479
2,PAY005,fail,11408,45417195.0,0.248263,0.703476
3,PAY004_F1-R1,fail,30030,24660597.0,0.653519,0.381973
4,PAY011,fail,9502,19315887.0,0.206784,0.299188
...,...,...,...,...,...,...
189,barcode93,pass,2,2593.0,0.000044,0.000040
190,barcode94,pass,3,10084.0,0.000065,0.000156
191,barcode95,pass,3,8841.0,0.000065,0.000137
192,barcode96,pass,4,9131.0,0.000087,0.000141


In [64]:
sum( df["reads_perc"] )

100.0

In [65]:
sum( df["bases_perc"] )

100.0

In [66]:
trace1 = go.Bar(
    x=df["sample"],
    y=df[df["qc"]=="pass"]["reads_perc"],
    name="pass",
    marker={"color": "blue"})

trace2 = go.Bar(
    x=df["sample"],
    y=df[df["qc"]=="fail"]["reads_perc"],
    name="fail",
    marker={"color": "red"})

data = [trace1, trace2]
layout = go.Layout(title=f"{run_name} - Reads (%)", yaxis_title="Reads (%)")
fig3 = go.Figure(data=data, layout=layout)
fig3.show(config={'displaylogo':False}) # hide ploty logo
##fig3.write_html(f"{run_name} - MinION Reads perc.html", config={'displaylogo':False})

In [67]:
trace1 = go.Bar(
    x=df["sample"],
    y=df[df["qc"]=="pass"]["bases_perc"],
    name="pass",
    marker={"color": "blue"})

trace2 = go.Bar(
    x=df["sample"],
    y=df[df["qc"]=="fail"]["bases_perc"],
    name="fail",
    marker={"color": "red"})

data = [trace1, trace2]
layout = go.Layout(title=f"{run_name} - Bases (%)", yaxis_title="Bases (%)")
fig4 = go.Figure(data=data, layout=layout)
fig4.show(config={'displaylogo':False}) # hide ploty logo
##fig4.write_html(f"{run_name} - MinION Bases perc.html", config={'displaylogo':False})

## Pie charts

In [68]:
df

Unnamed: 0,sample,qc,reads,bases,reads_perc,bases_perc
0,PAY002,fail,4371,5606222.0,0.095123,0.086836
1,PAY004,fail,8498,26436426.0,0.184935,0.409479
2,PAY005,fail,11408,45417195.0,0.248263,0.703476
3,PAY004_F1-R1,fail,30030,24660597.0,0.653519,0.381973
4,PAY011,fail,9502,19315887.0,0.206784,0.299188
...,...,...,...,...,...,...
189,barcode93,pass,2,2593.0,0.000044,0.000040
190,barcode94,pass,3,10084.0,0.000065,0.000156
191,barcode95,pass,3,8841.0,0.000065,0.000137
192,barcode96,pass,4,9131.0,0.000087,0.000141


In [69]:
## classes for pie chart
df["class"] = None

df.loc[ df["sample"].str.startswith("unclassified"), 'class'] = "unclassified"

df.loc[ (pd.isna(df["class"])) & (df["sample"].str.startswith("barcode")),
       "class"] = "other barcode"

df.loc[ (pd.isna(df["class"])) & (df["qc"]=="fail"),
       "class"] = "Sample (fail)"

df.loc[ (pd.isna(df["class"])) & (df["qc"]=="pass"),
       "class"] = "Sample (pass)"


##df = df.sort_values(['qc', 'class'], ascending=[False, False])
df = df.sort_values(by=['class'], ascending=True)

df

Unnamed: 0,sample,qc,reads,bases,reads_perc,bases_perc,class
0,PAY002,fail,4371,5606222.0,0.095123,0.086836,Sample (fail)
28,VA33_235,fail,1546,1665962.0,0.033644,0.025804,Sample (fail)
27,VA33_228,fail,3826,6469334.0,0.083262,0.100205,Sample (fail)
26,VA33_213,fail,1376,1279553.0,0.029945,0.019819,Sample (fail)
25,VA33_211_F1-R1,fail,30933,14580282.0,0.673170,0.225837,Sample (fail)
...,...,...,...,...,...,...,...
77,barcode78,fail,76,250642.0,0.001654,0.003882,other barcode
71,barcode72,fail,49,67607.0,0.001066,0.001047,other barcode
191,barcode95,pass,3,8841.0,0.000065,0.000137,other barcode
96,unclassified,fail,435865,848134490.0,9.485379,13.136925,unclassified


In [70]:
df["class"].unique()

array(['Sample (fail)', 'Sample (pass)', 'other barcode', 'unclassified'],
      dtype=object)

In [71]:
fig5 = px.pie(
    df, 
    values='reads', names='class', title=f"{run_name} - Reads",
    color='class',
    color_discrete_map={'unclassified':'black',
                        'other barcode':'grey',
                        'Sample (pass)':'green',
                        'Sample (fail)':'orange'}
)
fig5.update_traces(sort=False) 
fig5.show(config={'displaylogo':False})

In [72]:
fig6 = px.pie(
    df, 
    values='bases', names='class', title=f"{run_name} - Bases",
    color='class',
    color_discrete_map={'unclassified':'black',
                        'other barcode':'grey',
                        'Sample (pass)':'green',
                        'Sample (fail)':'orange'}
)
fig6.update_traces(sort=False) 
fig6.show(config={'displaylogo':False})

## Save to on html

In [75]:
with open(str(outfile), 'w') as f:
    f.write(fig5.to_html(full_html=False, include_plotlyjs='cdn', config={'displaylogo':False}))
    f.write(fig1.to_html(full_html=False, include_plotlyjs='cdn', config={'displaylogo':False}))
    f.write(fig3.to_html(full_html=False, include_plotlyjs='cdn', config={'displaylogo':False}))
    f.write(fig6.to_html(full_html=False, include_plotlyjs='cdn', config={'displaylogo':False}))
    f.write(fig2.to_html(full_html=False, include_plotlyjs='cdn', config={'displaylogo':False}))
    f.write(fig4.to_html(full_html=False, include_plotlyjs='cdn', config={'displaylogo':False}))

In [74]:
outfile

'001_20220412_FAME3.fastq_stats.html'