# Wheat data illustrative example processing

In [None]:
import specxplore.importing
from specxplore.session_data import load_specxplore_object_from_pickle
from specxplore.dashboard import SpecxploreDashboard
import os # for filepath generality across operating systems of this notebook; the user may use operating specific filepaths
import pandas as pd

In [None]:
data_file_path = os.path.join("data", "data_wheat_output", "wheat_plus_phophe.mgf")
model_file_path = os.path.join("models", "pos")
specxplore_output_filepath_ms2query = os.path.join("output", "ms2query_output_wheat.csv")
specxplore_output_filepath_session = os.path.join("output", "specxplore_session_data_wheat.pickle")
metadata_csv_filepath = os.path.join("data", "data_phophe_output", "metadata_phophe_standards_pos_processed.csv")

standards_metadata = pd.read_csv(metadata_csv_filepath)
standards_metadata['feature_id'] = standards_metadata['feature_id'].astype('string')

In [None]:
pipeline_instance = specxplore.importing.specxploreImportingPipeline()
pipeline_instance.attach_spectra_from_file(data_file_path)
pipeline_instance.run_spectral_processing(
  minimum_number_of_peaks = 3,
  maximum_number_of_peaks = 200,
  max_mz = 1000,
  min_mz = 0
)
pipeline_instance.run_spectral_similarity_computations(model_file_path)

In [None]:
# slow step, run following code chunk if data already available (~11 minutes)
pipeline_instance.run_ms2query(model_file_path, results_filepath = specxplore_output_filepath_ms2query)

In [None]:
# pipeline_instance.attach_ms2query_results(specxplore_output_filepath_ms2query)

In [None]:
pipeline_instance.run_and_attach_tsne_grid(perplexity_values = [600]) 
pipeline_instance.run_and_attach_kmedoid_grid(k_values = [8, 160, 200]) 

In [None]:
features_to_highlight = standards_metadata["feature_id"].to_list()
selected_tsne_iloc = 0
selected_kmedoid_ilocs = [0, 1, 2]

In [None]:
pipeline_instance.attach_metadata_from_data_frame(standards_metadata)
pipeline_instance.attach_feature_highlights(features_to_highlight)
pipeline_instance.select_tsne_settings(selected_tsne_iloc)
pipeline_instance.select_kmedoid_settings(selected_kmedoid_ilocs)
pipeline_instance.export_specxplore_session_data(force = True, filepath = specxplore_output_filepath_session)

In [None]:
data = load_specxplore_object_from_pickle(filepath=specxplore_output_filepath_session)
dashboard = SpecxploreDashboard(data)
dashboard.run_app(jupyter_mode = "external")