In [9]:
%load_ext autoreload
%autoreload 2
from matchms.importing import load_from_mgf
from matchms.exporting import save_as_mgf
import pandas as pd
import os
import msfeast.pipeline
#from msfeast.pipeline import Msfeast
import os
import pandas as pd

print("Define Filepaths...")
test_data_directory = os.path.join("data", "omsw_pleurotus_ms2deepscore")
filepath_test_spectra = os.path.join(test_data_directory, "spectra.mgf")
filepath_test_quant_table = os.path.join(test_data_directory, "quant_table.csv")
filepath_test_treat_table = os.path.join(test_data_directory, "treat_table.csv")

model_path = os.path.join("..", "models", "ms2deepscore_model.pt")

output_directory = os.path.join(test_data_directory)
r_output_filename = os.path.join("r_output.json")
r_filepath = os.path.join(output_directory, r_output_filename)
dashboard_output_filepath = os.path.join(output_directory, "dashboard_data.json")

print("Loading Input Data...")
treat_table = pd.read_csv(filepath_test_treat_table)
quant_table = pd.read_csv(filepath_test_quant_table)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Define Filepaths...
Loading Input Data...


In [10]:
print("Initializing pipeline...")
pipeline = msfeast.pipeline.Msfeast()

print("Attaching data...")
pipeline.attach_spectra_from_file(filepath_test_spectra, identifier_key="scans")
pipeline.attach_quantification_table(quant_table)
pipeline.attach_treatment_table(treat_table)


Initializing pipeline...
Attaching data...


In [11]:
print("Running spectral similarity computations...")
pipeline.run_and_attach_spectral_similarity_computations("ms2deepscore", model_directory=model_path)

Running spectral similarity computations...
The model version (0.5.0) does not match the version of MS2Deepscore (2.0.0), consider downloading a new model or changing the MS2Deepscore version


1975it [00:09, 204.21it/s]


In [12]:
print("Run kmedoid grid...")
pipeline.run_and_attach_kmedoid_grid([50,100,150,200,250,300,400,500, 600, 800, 1000])

Run kmedoid grid...
Kmedoid grid results. Use to inform kmedoid classification selection ilocs.
    iloc     k  silhouette_score  random_seed_used
0      0    50          0.198892                 0
1      1   100          0.203835                 0
2      2   150          0.206368                 0
3      3   200          0.220599                 0
4      4   250          0.231379                 0
5      5   300          0.239876                 0
6      6   400          0.250223                 0
7      7   500          0.259888                 0
8      8   600          0.264054                 0
9      9   800          0.257956                 0
10    10  1000          0.245360                 0


In [13]:
pipeline.select_kmedoid_settings(iloc = 4)

In [14]:
print("Run t-sne grid...")
pipeline.run_and_attach_tsne_grid([20, 30, 40, 50, 100, 200])

Run t-sne grid...
T-sne grid results. Use to inform t-sne embedding selection.
   iloc  perplexity  pearson_score  spearman_score  random_seed_used
0     0          20       0.588174        0.563051                 0
1     1          30       0.589654        0.563378                 0
2     2          40       0.598774        0.574423                 0
3     3          50       0.594160        0.565570                 0
4     4         100       0.587254        0.584420                 0
5     5         200       0.612820        0.612633                 0


In [15]:
pipeline.select_tsne_settings(iloc = 3)

In [16]:
print("Initializing R runtime...")
if os.path.isfile(r_filepath):
  os.remove(r_filepath)
pipeline.run_and_attach_statistical_comparisons(output_directory, r_output_filename)

print("Integrating pipeline results...")
pipeline.integrate_and_attach_dashboard_data(top_k_max=50, alpha=0.01)

print("Exporting json file...")
pipeline.export_dashboard_json(filepath=dashboard_output_filepath)

print("Processing complete.")

Initializing R runtime...
[1] "Starting Routine log at "   "2024-04-15 16:20:52.958224"
[1] "R Routine: run integration test..."
[1] "R Routine: Validating input file paths..."
[1] "R Routine: Loading required packages..."
[1] "R Routine: Reading input files..."
[1] "R Routine: running global test and fold change computations..."
[1] "R Routine: exporting globaltest and log fold change computations..."
[1] "R Routine: complete, file saved, exiting R session."
Integrating pipeline results...
Exporting json file...
Processing complete.
