# Experiment: Varying N in top-N DDA fragmentation

We demonstrate that the simulator can be used for scan-level closed-loop DDA experiments. 
- Take an existing data. Find out which MS1 peaks are linked to which MS2 peaks.
- Run all MS1 peaks through the simulator’s Top-N protocol. 
- For the top-100 most intense MS1 peaks, how many got fragmented in the simulator as we change N?
- If N is greater than the real data, do we see the same MS1 peaks from (1) being fragmented again, plus additional fragment peaks?
- Verification on actual machine.
- Talk to stefan about machine time.

In [1]:
%matplotlib inline

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import pandas as pd
import sys
import os
import scipy.stats
import pylab as plt
from IPython import display
import pylab as plt
from random import random, shuffle
from joblib import Parallel, delayed
import multiprocessing

In [4]:
sys.path.append('../codes')

In [5]:
from VMSfunctions.Chemicals import *
from VMSfunctions.Chromatograms import *
from VMSfunctions.MassSpec import *
from VMSfunctions.Controller import *
from VMSfunctions.Common import *
from VMSfunctions.DataGenerator import *
from VMSfunctions.Noise import *

In [6]:
set_log_level_debug()

### Load densities trained on beer1pos data (see [loader_kde](loader_kde.ipynb)).

In [7]:
base_dir = 'C:\\Users\\joewa\\University of Glasgow\\Vinny Davies - CLDS Metabolomics Project\\'

In [8]:
ps = load_obj(base_dir + 'Trained Models\\peak_sampler_beer1pos_fragmentation.p')

### Load prepared data (BEER1POS)

In [9]:
base_dir = 'C:\\Users\\joewa\\Work\\git\\clms\\Simulator'

In [10]:
dataset = load_obj(os.path.join(base_dir, 'models\\dda_results\\dataset.p'))
noisy_dataset = load_obj(os.path.join(base_dir, 'models\\dda_results\\noisy_dataset.p'))

### Experiment by varying N and rt_tol

In [11]:
def run_experiment(param):
    analysis_name = param['analysis_name']
    mzml_out = param['mzml_out']
    pickle_out = param['pickle_out']
    if os.path.isfile(mzml_out) and os.path.isfile(pickle_out): 
        print('Skipping %s' % (analysis_name))
    else:    
        print('Processing %s' % (analysis_name))
        mass_spec = IndependentMassSpectrometer(param['ionisation_mode'], param['data'], density=param['density'])
        controller = TopNController(mass_spec, param['N'], param['isolation_window'], 
                                    param['rt_tol'], param['min_ms1_intensity'])
        controller.run(param['min_rt'], param['max_rt'], progress_bar=param['pbar'])
        controller.write_mzML(analysis_name, mzml_out)  
        save_obj(controller, pickle_out)
        return analysis_name

In [12]:
def run_parallel_experiment(params):
    import ipyparallel as ipp
    rc = ipp.Client()
    dview = rc[:] # use all engines

    with dview.sync_imports():
        from VMSfunctions.MassSpec import IndependentMassSpectrometer
        from VMSfunctions.Controller import TopNController
        from VMSfunctions.Common import save_obj
        import os
        
    analysis_names = dview.map_sync(run_experiment, params)
    for analysis_name in analysis_names:
        print(analysis_name)

In [13]:
def run_serial_experiment(param, i, total):
    print('Processing \t%d/%d\t%s' % (i+1, total, param['analysis_name']))
    run_experiment(param)

#### Set parameters

In [14]:
def get_params(Ns, rt_tols, isolation_window, ionisation_mode, data, density, 
               min_ms1_intensity, min_rt, max_rt, 
               base_dir, pbar):
    print('N =', Ns)
    print('rt_tol =', rt_tols)    
    params = []
    for N in Ns:
        for rt_tol in rt_tols:
            analysis_name = 'experiment_N_%d_rttol_%d' % (N, rt_tol)    
            mzml_out = os.path.join(base_dir, 'models\\dda_results\\%s.mzML' % analysis_name)    
            pickle_out = os.path.join(base_dir, 'models\\dda_results\\%s.p' % analysis_name)            
            params.append({
                'N': N, 
                'rt_tol': rt_tol,
                'min_ms1_intensity': min_ms1_intensity,
                'isolation_window': isolation_window,
                'ionisation_mode': ionisation_mode,
                'data': data,
                'density': density,
                'min_rt': min_rt,
                'max_rt': max_rt,
                'analysis_name': analysis_name,
                'mzml_out': mzml_out,
                'pickle_out': pickle_out,
                'pbar': pbar
            })
    print('len(params) =', len(params))
    return params

In [15]:
isolation_window = 0.5   # the isolation window in Dalton around a selected precursor ion
ionisation_mode = POSITIVE
data = noisy_dataset
density = ps.density_estimator
min_ms1_intensity = 2.5E5 # minimum ms1 intensity to fragment
min_rt = 3*60
max_rt = 21*60

Varying Ns

In [16]:
Ns = list(range(2, 101, 2)) # top-N DDA fragmentation
rt_tols = [15] # the rt window around a selected precursor ion to prevent it from fragmented multiple times

In [17]:
# set_log_level_warning()
# pbar = True
# params = get_params(Ns, rt_tols, isolation_window, ionisation_mode, data, density, 
#                     min_ms1_intensity, min_rt, max_rt, base_dir, pbar)

# for i in range(len(params)):
#     param = params[i]
#     run_serial_experiment(param, i, len(params))

In [21]:
set_log_level_warning()
pbar = False
params = get_params(Ns, rt_tols, isolation_window, ionisation_mode, data, density, 
                    min_ms1_intensity, min_rt, max_rt, base_dir, pbar)
run_parallel_experiment(params)

N = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100]
rt_tol = [15]
len(params) = 50
importing IndependentMassSpectrometer from VMSfunctions.MassSpec on engine(s)
importing TopNController from VMSfunctions.Controller on engine(s)
importing save_obj from VMSfunctions.Common on engine(s)
importing os on engine(s)
experiment_N_2_rttol_15
experiment_N_4_rttol_15
experiment_N_6_rttol_15
experiment_N_8_rttol_15
experiment_N_10_rttol_15
experiment_N_12_rttol_15
experiment_N_14_rttol_15
experiment_N_16_rttol_15
experiment_N_18_rttol_15
experiment_N_20_rttol_15
experiment_N_22_rttol_15
experiment_N_24_rttol_15
experiment_N_26_rttol_15
experiment_N_28_rttol_15
experiment_N_30_rttol_15
experiment_N_32_rttol_15
experiment_N_34_rttol_15
experiment_N_36_rttol_15
experiment_N_38_rttol_15
experiment_N_40_rttol_15
experiment_N_42_rttol_15
experiment_N_44_rt

Vary rt_tols

In [22]:
Ns = [10] # top-N DDA fragmentation
rt_tols = list(range(5, 31, 5)) # the rt window around a selected precursor ion to prevent it from fragmented multiple times

In [23]:
set_log_level_warning()
pbar = False
params = get_params(Ns, rt_tols, isolation_window, ionisation_mode, data, density, 
                    min_ms1_intensity, min_rt, max_rt, base_dir, pbar)
run_parallel_experiment(params)

N = [10]
rt_tol = [5, 10, 15, 20, 25, 30]
len(params) = 6
importing IndependentMassSpectrometer from VMSfunctions.MassSpec on engine(s)
importing TopNController from VMSfunctions.Controller on engine(s)
importing save_obj from VMSfunctions.Common on engine(s)
importing os on engine(s)
experiment_N_10_rttol_5
experiment_N_10_rttol_10
None
experiment_N_10_rttol_20
experiment_N_10_rttol_25
experiment_N_10_rttol_30


Vary both

In [24]:
Ns = list(range(2, 51, 2)) # top-N DDA fragmentation
rt_tols = list(range(5, 31, 5)) # the rt window around a selected precursor ion to prevent it from fragmented multiple times

In [25]:
set_log_level_warning()
pbar = False
params = get_params(Ns, rt_tols, isolation_window, ionisation_mode, data, density, 
                    min_ms1_intensity, min_rt, max_rt, base_dir, pbar)
run_parallel_experiment(params)

N = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50]
rt_tol = [5, 10, 15, 20, 25, 30]
len(params) = 150
importing IndependentMassSpectrometer from VMSfunctions.MassSpec on engine(s)
importing TopNController from VMSfunctions.Controller on engine(s)
importing save_obj from VMSfunctions.Common on engine(s)
importing os on engine(s)
experiment_N_2_rttol_5
experiment_N_2_rttol_10
None
experiment_N_2_rttol_20
experiment_N_2_rttol_25
experiment_N_2_rttol_30
experiment_N_4_rttol_5
experiment_N_4_rttol_10
None
experiment_N_4_rttol_20
experiment_N_4_rttol_25
experiment_N_4_rttol_30
experiment_N_6_rttol_5
experiment_N_6_rttol_10
None
experiment_N_6_rttol_20
experiment_N_6_rttol_25
experiment_N_6_rttol_30
experiment_N_8_rttol_5
experiment_N_8_rttol_10
None
experiment_N_8_rttol_20
experiment_N_8_rttol_25
experiment_N_8_rttol_30
None
None
None
None
None
None
experiment_N_12_rttol_5
experiment_N_12_rttol_10
None
experiment_N_12_rttol_20
experiment_N_12