In [1]:
%load_ext autoreload
%autoreload 2
from matchms.importing import load_from_mgf
from matchms.exporting import save_as_mgf
import pandas as pd
import os
import msfeast.pipeline
#from msfeast.pipeline import Msfeast
import os
import pandas as pd

print("Define Filepaths...")
test_data_directory = os.path.join("data", "mushroom_type_comparison")
filepath_test_spectra = os.path.join(test_data_directory, "spectra.mgf")
filepath_test_quant_table = os.path.join(test_data_directory, "quant_table.csv")
filepath_test_treat_table = os.path.join(test_data_directory, "treat_table.csv")

output_directory = os.path.join(test_data_directory)
r_output_filename = os.path.join("r_output.json")
r_filepath = os.path.join(output_directory, r_output_filename)
dashboard_output_filepath = os.path.join(output_directory, "dashboard_data.json")

print("Loading Input Data...")
treat_table = pd.read_csv(filepath_test_treat_table)
quant_table = pd.read_csv(filepath_test_quant_table)

Define Filepaths...
Loading Input Data...


In [2]:
print("Initializing pipeline...")
pipeline = msfeast.pipeline.Msfeast()

print("Attaching data...")
pipeline.attach_spectra_from_file(filepath_test_spectra, identifier_key="scans")
pipeline.attach_quantification_table(quant_table)
pipeline.attach_treatment_table(treat_table)


Initializing pipeline...
Attaching data...


In [3]:
print("Running spectral similarity computations...")
pipeline.run_and_attach_spectral_similarity_computations("ModifiedCosine")

Running spectral similarity computations...


In [4]:
print("Run kmedoid grid...")
pipeline.run_and_attach_kmedoid_grid([50,100,150,200,250,300,400,500, 600, 800, 1000])

Run kmedoid grid...
Kmedoid grid results. Use to inform kmedoid classification selection ilocs.
    iloc     k  silhouette_score  random_seed_used
0      0    50          0.186953                 0
1      1   100          0.189126                 0
2      2   150          0.215336                 0
3      3   200          0.189144                 0
4      4   250          0.202870                 0
5      5   300          0.222685                 0
6      6   400          0.277351                 0
7      7   500          0.195643                 0
8      8   600          0.214177                 0
9      9   800          0.239880                 0
10    10  1000          0.284767                 0


In [5]:
pipeline.select_kmedoid_settings(iloc = 6)

In [6]:
print("Run t-sne grid...")
pipeline.run_and_attach_tsne_grid([20, 30, 40, 50, 100, 200])

Run t-sne grid...
T-sne grid results. Use to inform t-sne embedding selection.
   iloc  perplexity  pearson_score  spearman_score  random_seed_used
0     0          20       0.402873        0.371266                 0
1     1          30       0.419150        0.388948                 0
2     2          40       0.426179        0.401479                 0
3     3          50       0.419256        0.401680                 0
4     4         100       0.495364        0.478248                 0
5     5         200       0.544686        0.525507                 0


In [7]:
pipeline.select_tsne_settings(iloc = 4)

In [8]:
print("Initializing R runtime...")
if os.path.isfile(r_filepath):
  os.remove(r_filepath)
pipeline.run_and_attach_statistical_comparisons(output_directory, r_output_filename)

print("Integrating pipeline results...")
pipeline.integrate_and_attach_dashboard_data(top_k_max=50, alpha=0.01)

print("Exporting json file...")
pipeline.export_dashboard_json(filepath=dashboard_output_filepath)

print("Reached end of trial run.")

Initializing R runtime...
[1] "Starting Routine log at "   "2024-04-10 16:49:00.179679"
[1] "R Routine: run integration test..."
[1] "R Routine: Validating input file paths..."
[1] "R Routine: Loading required packages..."
[1] "R Routine: Reading input files..."
[1] "R Routine: running global test and fold change computations..."
[1] "R Routine: exporting globaltest and log fold change computations..."
[1] "R Routine: complete, file saved, exiting R session."
Integrating pipeline results...
Exporting json file...
Reached end of trial run.
