# Virtual Metabolomics Mass Spectrometer (ViMMS) pipeline

In [None]:
import sys
sys.path.append('C:\\Users\\joewa\\Work\\git\\clms\\Simulator\\codes')

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd
import pylab as plt
import matplotlib.patches as mpatches

In [None]:
from VMSfunctions.Chemicals import *
from VMSfunctions.Chromatograms import *
from VMSfunctions.MassSpec import *
from VMSfunctions.Controller import *
from VMSfunctions.Common import *
from VMSfunctions.DataGenerator import *
from VMSfunctions.TopNExperiment import *
from VMSfunctions.Roi import *
from VMSfunctions.PlotsForPaper import *

In [None]:
set_log_level_info()

## Parameters

In [None]:
base_dir = 'C:\\Users\\joewa\\University of Glasgow\\Vinny Davies - CLDS Metabolomics Project\\'
mzml_path = os.path.join(base_dir, 'Data\\Fusion_1578_Ronan_Daly_CLP_pHILIC_22May19\\Positive\\fragmentation\\mzML')
# fragfile = 'QCB_N10_DEW015.mzML'
fullscan_file = os.path.join(base_dir, 'Data\\Fusion_1578_Ronan_Daly_CLP_pHILIC_22May19\\Positive\\fullscan\\QCB_22May19_1.mzML')

experiment_name = 'beerqcb'
experiment_out_dir = os.path.join(base_dir, 'C:\\Users\\joewa\\Work\\data\\evaluation\\beerqcb\\mzML')

In [None]:
min_rt = 0
max_rt = 1600

# min_rt = 3*60
# max_rt = 21*60

In [None]:
# kde_min_ms1_intensity = 0 # min intensity to be selected for kdes
# kde_min_ms2_intensity = 0

ROI extraction parameters

In [None]:
roi_mz_tol = 30
roi_min_length = 1
roi_min_intensity = 0
roi_start_rt = min_rt
roi_stop_rt = max_rt

In [None]:
# roi_mz_tol = 10
# roi_min_length = 2
# roi_min_intensity = 10000
# roi_start_rt = min_rt
# roi_stop_rt = max_rt

Top-N parameters

In [None]:
isolation_window = 1   # the isolation window in Dalton around a selected precursor ion
ionisation_mode = POSITIVE
N = 10
rt_tol = 15
mz_tol = 10
min_ms1_intensity = 0 # minimum ms1 intensity to fragment

## Train densities

In [None]:
# ds = DataSource()
# ds.load_data(mzml_path, file_name=fragfile)
# print('MS1')
# ds.plot_data(fragfile, ms_level=1, max_data=100000, min_rt=min_rt, max_rt=max_rt)
# print('MS2')
# ds.plot_data(fragfile, ms_level=2, max_data=100000, min_rt=min_rt, max_rt=max_rt)

#### Fit KDEs for (mz, intensity), rt and number of peaks for ms_level=2 only

In [None]:
# densities = PeakDensityEstimator(kde_min_ms1_intensity, kde_min_ms2_intensity, min_rt, max_rt, plot=True)
# densities.kde(ds, fragfile, 2, bandwidth_mz_intensity_rt=1.0, bandwidth_n_peaks=1.0)
# ps = PeakSampler(densities)

## Prepare dataset

Extract all ROIs

In [None]:
mzml_file = fullscan_file
good_roi, junk = make_roi(mzml_file, mz_tol=roi_mz_tol, mz_units='ppm', min_length=roi_min_length,
                          min_intensity=0, start_rt=roi_start_rt, stop_rt=roi_stop_rt)
all_roi = good_roi

In [None]:
len(all_roi)

How many singleton ROIs?

In [None]:
len([roi for roi in all_roi if roi.n == 1])

Keep only ROIs that can possibly be fragmented above min_ms1_intensity

In [None]:
# keep = []
# for roi in all_roi:
#     if np.count_nonzero(np.array(roi.intensity_list) > roi_min_intensity) > 0:
#         keep.append(roi)

# print(len(keep))

In [None]:
keep = all_roi

Turn ROIs into chromatograms/chemicals

In [None]:
set_log_level_debug()
rtcc = RoiToChemicalCreator(None, keep)

In [None]:
data = rtcc.chemicals
save_obj(data, os.path.join(experiment_out_dir, 'dataset.p'))

## Run Top-N Experiments

In [None]:
# density = ps.density_estimator

In [None]:
density = None
set_log_level_warning()
pbar = False

In [None]:
Ns = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]
rt_tols = [1, 5, 10, 15, 20, 25, 30, 45, 60, 90, 120]

Ns = [1, 2, 3, 4, 5, 10, 15, 20, 35, 50]
rt_tols = [15, 30, 60, 120]

In [None]:
fragfiles = {}
for N in Ns:
    for rt_tol in rt_tols:
        filename = 'QCB_N%02d_DEW%03d.mzML' % (N, rt_tol)
        fragfiles[(N, rt_tol, )] = filename

In [None]:
params = get_params(experiment_name, Ns, rt_tols, mz_tol, isolation_window, ionisation_mode, data, density, 
                    min_ms1_intensity, min_rt, max_rt, experiment_out_dir, pbar, mzml_path, fragfiles)

In [None]:
# for i in range(len(params)):
#     param = params[i]
#     run_serial_experiment(param, i, len(params))

In [None]:
%time run_parallel_experiment(params)