In [6]:
# std imports
from typing import Optional
import math
from io import BytesIO
from pathlib import Path
import zipfile

# 3rd party imports
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text, bindparam
import plotly
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


#############
# parameters

### use some ISA runs for testing
raw_files = ["FLI18416std", "FLI18414std", "EXII01692std", "QEXI39066std"]

### folder to save the plots as json files
output_path = "graphics/"


##########

engine = create_engine("mysql+mariadbconnector://mpcqc:quality@mpc-qc/mpcqc")

file_df: Optional[pd.DataFrame] = None
with engine.connect() as conn:
    statement = text("SELECT * FROM files WHERE filename IN :raw_files")
    statement = statement.bindparams(
        bindparam("raw_files", tuple(raw_files), expanding=True)
    )
    query = conn.execute(statement)
    file_df = pd.DataFrame(query.fetchall(), columns=query.keys())

ids = file_df["id"]

ISA_df: Optional[pd.DataFrame] = None
with engine.connect() as conn:
    statement = text("SELECT * FROM isa_data WHERE fileId IN :ids")
    statement = statement.bindparams(
        bindparam("ids", tuple(ids), expanding=True)
    )
    print(statement.compile(compile_kwargs={"literal_binds": True}))
    query = conn.execute(statement)
    ISA_df = pd.DataFrame(query.fetchall(), columns=query.keys())
    
print(ISA_df)

SELECT * FROM isa_data WHERE fileId IN (11966, 11967, 11968, 11969)
   fileId      run_name       creation_date  nrProteins  nrProteingroups  \
0   11966  QEXI39066std 2023-02-20 17:33:05        3918             1608   
1   11967  EXII01692std 2023-02-21 06:19:12        5957             3002   
2   11968   FLI18414std 2023-02-20 12:28:01        4872             2183   
3   11969   FLI18416std 2023-02-20 15:24:01        4955             2268   

   nrProteingroups_unfiltered  nrPeptides  nrPSMs  nrMS1  nrMS2  \
0                        2247        5305    7533   9515  26525   
1                        3390       16244   16811   3594  71054   
2                        2568        8323    9785   9976  34534   
3                        2624        8876   10427  10002  34282   

   total_ion_current  base_peak_intensity  tic_up_to_105mins  \
0       9.860618e+08         3.971713e+08       9.514153e+08   
1       3.950855e+09         1.318303e+09       3.950855e+09   
2       1.697500e+09   

In [7]:
# Figure 1: Number of proteins, protein groups and unfiltered protein groups

df_pl1 = ISA_df[["fileId", "run_name", "nrProteins", "nrProteingroups", "nrProteingroups_unfiltered"]]
df_pl1_long = df_pl1.melt(id_vars = ["fileId", "run_name"])
#print(df_pl1_long)
#df_pl1_long = df_pl1_long.reset_index(level=["level"])

fig1 = px.bar(df_pl1_long, x="run_name", y="value", color="variable", barmode = "group", 
              title = "Number of proteins, protein groups and unfiltered protein groups")
fig1.update_yaxes(exponentformat="none") 
fig1.show()

with open(output_path +"/isafig1_barplot_proteins_proteingroups.json", "w") as json_file:
    json_file.write(plotly.io.to_json(fig1))

In [9]:
# Figure 2: Number of peptides

df_pl2 = ISA_df[["fileId", "run_name", "nrPeptides"]]
#df_pl1_long = df_pl1.melt(id_vars = ["fileId", "run_name"])
#print(df_pl1_long)
#df_pl1_long = df_pl1_long.reset_index(level=["level"])

fig2 = px.bar(df_pl2, x="run_name", y="nrPeptides", #color="variable", barmode = "group", 
              title = "Number of peptides")
fig2.update_yaxes(exponentformat="none") 
fig2.show()

with open(output_path +"/isafig2_barplot_peptides.json", "w") as json_file:
    json_file.write(plotly.io.to_json(fig2))

In [10]:
# Figure 3: Number of PSMs

df_pl3 = ISA_df[["fileId", "run_name", "nrPSMs"]]
#df_pl1_long = df_pl1.melt(id_vars = ["fileId", "run_name"])
#print(df_pl1_long)
#df_pl1_long = df_pl1_long.reset_index(level=["level"])

fig3 = px.bar(df_pl3, x="run_name", y="nrPSMs", #color="variable", barmode = "group", 
              title = "Number of PSMs")
fig3.update_yaxes(exponentformat="none") 
fig3.show()

with open(output_path +"/isafig3_barplot_PSMs.json", "w") as json_file:
    json_file.write(plotly.io.to_json(fig3))