# 2. Grid Search

In [3]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import sys
sys.path.append('../..')

In [5]:
from pathlib import Path
from pyDOE import *
from vimms.Environment import *

In [6]:
from vimms.Chemicals import ChemicalCreator, GET_MS2_BY_PEAKS, GET_MS2_BY_SPECTRA
from vimms.MassSpec import IndependentMassSpectrometer
from vimms.Controller import *
from vimms.Common import *
from vimms.PlotsForPaper import *
from vimms.Roi import make_roi, RoiToChemicalCreator, extract_roi
from vimms.SequenceManager import *

In [7]:
data_dir = os.path.join(os.path.abspath(os.path.join(os.path.join(os.getcwd(),".."),"..")),'tests','integration','fixtures')
dataset_file = os.path.join(data_dir, 'QCB_22May19_1.p')
dataset = load_obj(dataset_file)
ps = load_obj(Path(data_dir,'peak_sampler_mz_rt_int_beerqcb_fragmentation.p'))

In [8]:
url = 'http://researchdata.gla.ac.uk/870/2/example_data.zip'
base_dir = os.path.abspath(os.path.join(os.getcwd(),'..','01. Data', 'example_data'))

In [9]:
if not os.path.isdir(base_dir): # if not exist then download the example data and extract it
    print('Creating %s' % base_dir)    
    out_file = 'example_data.zip'
    download_file(url, out_file)
    extract_zip_file(out_file, delete=True)
else:
    print('Found %s' % base_dir)

Found C:\Users\Vinny\work\vimms\demo\01. Data\example_data


In [10]:
mzml_file = os.path.join(base_dir, 'beers', 'fullscan', 'mzML', 'Beer_multibeers_1_fullscan1.mzML')
mzml_file_list=[None, mzml_file, None, mzml_file]

In [11]:
set_log_level_info()

### Set some default parameters

In [12]:
experiment_dir = os.path.join(os.getcwd(), 'results')

In [13]:
mass_spec_params = {'ionisation_mode': POSITIVE,
                    'peak_sampler': ps,
                    'add_noise': False,
                    'isolation_transition_window': 'rectangular',
                    'isolation_transition_window_params': None}

In [14]:
controller_params = {"ionisation_mode": POSITIVE,
                       "N": 10,
                       "mz_tol": 10,
                       "rt_tol":30,
                       "min_ms1_intensity": 1.75E5,
                       "rt_range": [(200, 400)],
                       "isolation_width": 1}

Note: you will need to install the same version of MZMine2 and put it in the same location as ViMMS

In [15]:
evaluation_methods = ['mzmine_peak']
mzmine_command = os.path.abspath(os.path.join(os.getcwd(),'..','..','..','MZmine-2.40.1','MZmine-2.40.1','startMZmine_Windows.bat'))

In [16]:
MZML2CHEMS_DICT = {'min_ms1_intensity': 1.75E5,
                  'mz_tol': 5,
                  'mz_units':'ppm',
                  'min_length':1,
                  'min_intensity':0,
                  'start_rt':0,
                  'stop_rt':1560}

### Get a picked peaks file

You don't need to provide a picked peaks file to run a grid search experiment, e.g. Example 3. But if you have one it will be more efficient

In [38]:
pick_peaks([mzml_file], xml_template=QCB_XML_TEMPLATE_MS1, output_dir=os.getcwd(),
                       mzmine_command=mzmine_command)

2020-08-07 17:01:11.626 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for Beer_multibeers_1_fullscan1.mzML
2020-08-07 17:01:11.629 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for Beer_multibeers_1_fullscan1.mzML


In [56]:
ms1_picked_peaks_file = os.path.join(os.getcwd(), Path(mzml_file).stem + '_pp.csv')

### Some parameter settings to search over

In [39]:
topn_variable_params_dict = {'N': [10], 'rt_tol': [15,30]}

### Example 1 - Top N - Seed with dataset and picked peaks

In [57]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_1')

In [58]:
parallel = False

In [60]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=ms1_picked_peaks_file)
gs = GridSearchExperiment(vsm, 'TopNController', mass_spec_params, dataset_file, topn_variable_params_dict, controller_params, parallel=parallel)
gs.results

2020-08-07 17:33:04.398 | INFO     | vimms.SequenceManager:run:246 - Running in serial mode
2020-08-07 17:33:04.399 | INFO     | vimms.SequenceManager:run_controller:202 - Begun experiment: sample0
2020-08-07 17:33:04.599 | INFO     | vimms.SequenceManager:run_experiment:183 - Experiment already completed. Skipping...
2020-08-07 17:33:04.600 | INFO     | vimms.SequenceManager:run_controller:206 - Completed experiment: sample0
2020-08-07 17:33:04.600 | INFO     | vimms.SequenceManager:run_evaluation:216 - Started Evaluation: sample0


Loaded 4383 scans


2020-08-07 17:33:09.129 | INFO     | vimms.SequenceManager:run_evaluation:218 - Completed Evaluation: sample0
2020-08-07 17:33:09.130 | INFO     | vimms.SequenceManager:run_serial:258 - Finished 0
2020-08-07 17:33:09.131 | INFO     | vimms.SequenceManager:run_controller:202 - Begun experiment: sample1
2020-08-07 17:33:09.333 | INFO     | vimms.SequenceManager:run_experiment:167 - {'Sample ID': {1: 'sample1'}, 'Controller Method': {1: 'TopNController'}, 'Controller Params': {1: {'ionisation_mode': 'Positive', 'N': 10, 'isolation_width': 1, 'mz_tol': 10, 'rt_tol': 30, 'min_ms1_intensity': 175000.0, 'ms1_shift': 0.0, 'ms1_agc_target': 200000.0, 'ms1_max_it': 250.0, 'ms1_collision_energy': 0.0, 'ms1_orbitrap_resolution': 120000.0, 'ms2_agc_target': 30000.0, 'ms2_max_it': 100.0, 'ms2_collision_energy': 25.0, 'ms2_orbitrap_resolution': 7500.0}}, 'MassSpec Params': {1: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSampler object at 0x0000020295A5C2E8>, 'add_noise': 

Loaded 4147 scans


2020-08-07 17:33:58.254 | INFO     | vimms.SequenceManager:run_evaluation:218 - Completed Evaluation: sample1
2020-08-07 17:33:58.256 | INFO     | vimms.SequenceManager:run_serial:258 - Finished 1


Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_shift,ms1_agc_target,ms1_max_it,ms1_collision_energy,ms1_orbitrap_resolution,ms2_agc_target,ms2_max_it,ms2_collision_energy,ms2_orbitrap_resolution,mzmine_peak
0,sample0,TopNController,10,10,15,175000.0,0.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,129.0
1,sample1,TopNController,10,10,30,175000.0,0.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,101.0


### Example 2 - Top N - Seed with mzml and picked peaks

In [25]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_2')

In [26]:
parallel = False

In [27]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=ms1_picked_peaks_file)
gs = GridSearchExperiment(vsm, 'TopNController', mass_spec_params, None, topn_variable_params_dict, controller_params, mzml_file, MZML2CHEMS_DICT=MZML2CHEMS_DICT, ps=ps, parallel=parallel)
gs.results

2020-07-15 12:24:44.656 | INFO     | vimms.Roi:__init__:406 - Found 797 ROIs above thresholds
2020-07-15 12:24:44.660 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\mzmine_files\QCB\fullscan_mzmls\QCB_22May19_1.p
2020-07-15 12:24:46.063 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\mzmine_files\SimpleExperiments\GridSearch\experiment_2_2_2\QCB_22May19_1.p
2020-07-15 12:24:47.090 | INFO     | vimms.SequenceManager:run:240 - Running in serial mode
2020-07-15 12:24:47.091 | INFO     | vimms.SequenceManager:run_controller:196 - Begun experiment: sample0
2020-07-15 12:24:47.293 | INFO     | vimms.SequenceManager:run_experiment:177 - Experiment already completed. Skipping...
2020-07-15 12:24:47.293 | INFO     | vimms.SequenceManager:run_controller:200 - Completed experiment: sample0
2020-07-15 12:24:47.294 | INFO     | vimms.SequenceManager:run_evaluation:210 - Started Evaluation: sample0


Loaded 3090 scans


2020-07-15 12:24:56.540 | INFO     | vimms.SequenceManager:run_evaluation:212 - Completed Evaluation: sample0
2020-07-15 12:24:56.544 | INFO     | vimms.SequenceManager:run_serial:252 - Finished 0
2020-07-15 12:24:56.547 | INFO     | vimms.SequenceManager:run_controller:196 - Begun experiment: sample1
2020-07-15 12:24:56.749 | INFO     | vimms.SequenceManager:run_experiment:177 - Experiment already completed. Skipping...
2020-07-15 12:24:56.750 | INFO     | vimms.SequenceManager:run_controller:200 - Completed experiment: sample1
2020-07-15 12:24:56.751 | INFO     | vimms.SequenceManager:run_evaluation:210 - Started Evaluation: sample1


Loaded 2463 scans


2020-07-15 12:25:02.765 | INFO     | vimms.SequenceManager:run_evaluation:212 - Completed Evaluation: sample1
2020-07-15 12:25:02.766 | INFO     | vimms.SequenceManager:run_serial:252 - Finished 1


Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_agc_target,ms1_max_it,ms1_collision_energy,ms1_orbitrap_resolution,ms2_agc_target,ms2_max_it,ms2_collision_energy,ms2_orbitrap_resolution,mzmine_peak
0,sample0,TopNController,10,10,15,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,496.0
1,sample1,TopNController,10,10,30,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,466.0


### Example 3 - Top N - Seed with mzml, non-parallel

In [33]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_3')

In [34]:
parallel = False

In [35]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=None, mzmine_command=mzmine_command)
gs = GridSearchExperiment(vsm, 'TopNController', mass_spec_params, None, topn_variable_params_dict, controller_params, mzml_file, MZML2CHEMS_DICT=MZML2CHEMS_DICT, ps=ps, parallel=parallel)
gs.results

2020-07-15 12:48:21.637 | INFO     | vimms.Roi:__init__:406 - Found 797 ROIs above thresholds
2020-07-15 12:48:21.637 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\mzmine_files\QCB\fullscan_mzmls\QCB_22May19_1.p
2020-07-15 12:48:23.052 | INFO     | vimms.Common:save_obj:61 - Saving <class 'list'> to C:\Users\Vinny\work\mzmine_files\SimpleExperiments\GridSearch\experiment_2_2_3\QCB_22May19_1.p
2020-07-15 12:48:24.005 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for QCB_22May19_1.mzML
2020-07-15 12:48:24.007 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for QCB_22May19_1.mzML


C:\Users\Vinny\work\mzmine_files\QCB\fullscan_mzmls\QCB_22May19_1.mzML
C:\Users\Vinny\work\vimms\batch_files\QCB_mzmine_batch_ms1.xml
C:\Users\Vinny\work\mzmine_files\SimpleExperiments\GridSearch\experiment_2_2_3
C:\Users\Vinny\work\MZmine-2.40.1\MZmine-2.40.1\startMZmine_Windows.bat


2020-07-15 12:54:04.612 | INFO     | vimms.SequenceManager:run:244 - Running in serial mode
2020-07-15 12:54:04.614 | INFO     | vimms.SequenceManager:run_controller:200 - Begun experiment: sample0
2020-07-15 12:54:04.814 | INFO     | vimms.SequenceManager:run_experiment:181 - Experiment already completed. Skipping...
2020-07-15 12:54:04.815 | INFO     | vimms.SequenceManager:run_controller:204 - Completed experiment: sample0
2020-07-15 12:54:04.815 | INFO     | vimms.SequenceManager:run_evaluation:214 - Started Evaluation: sample0


Loaded 3100 scans


2020-07-15 12:54:08.311 | INFO     | vimms.SequenceManager:run_evaluation:216 - Completed Evaluation: sample0
2020-07-15 12:54:08.312 | INFO     | vimms.SequenceManager:run_serial:256 - Finished 0
2020-07-15 12:54:08.313 | INFO     | vimms.SequenceManager:run_controller:200 - Begun experiment: sample1
(1440.967s) ms_level=1 N=10 DEW=30: 100%|████████████████████████████████████████████████████████████████████████████████████████████████▉| 1439.278592387769/1440 [00:21<00:00, 66.95it/s]
2020-07-15 12:54:39.384 | INFO     | vimms.SequenceManager:run_controller:204 - Completed experiment: sample1
2020-07-15 12:54:39.385 | INFO     | vimms.SequenceManager:run_evaluation:214 - Started Evaluation: sample1


Loaded 2426 scans


2020-07-15 12:54:42.788 | INFO     | vimms.SequenceManager:run_evaluation:216 - Completed Evaluation: sample1
2020-07-15 12:54:42.790 | INFO     | vimms.SequenceManager:run_serial:256 - Finished 1


Unnamed: 0,Sample ID,Controller Method,N,mz_tol,rt_tol,min_ms1_intensity,ms1_agc_target,ms1_max_it,ms1_collision_energy,ms1_orbitrap_resolution,ms2_agc_target,ms2_max_it,ms2_collision_energy,ms2_orbitrap_resolution,mzmine_peak
0,sample0,TopNController,10,10,15,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,488.0
1,sample1,TopNController,10,10,30,175000.0,200000.0,250.0,0.0,120000.0,30000.0,100.0,25.0,7500.0,466.0


### Example 4 - SmartROI - Seed with mzml, non-parallel

In [17]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_4')

In [18]:
smartROI_controller_params = {"ionisation_mode": POSITIVE,
                       "N": 10,
                       "mz_tol": 10,
                       "rt_tol":30,
                       "min_ms1_intensity": 1.75E5,
                       "rt_range": [(200, 400)],
                       "isolation_width": 1,
                       "min_roi_intensity": 1000,
                       "min_roi_length": 1,
                       "min_roi_length_for_fragmentation": 1,
                       "reset_length_seconds": 100,
                       "intensity_increase_factor": 2,
                       "length_units": "scans"}

In [19]:
smartROI_variable_params_dict = {'drop_perc': [0/100,0.1/100], 'intensity_increase_factor': [2]}

In [20]:
parallel = False

In [21]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=None, mzmine_command=mzmine_command)
gs = GridSearchExperiment(vsm, 'TopN_SmartRoiController', mass_spec_params, None, smartROI_variable_params_dict, smartROI_controller_params, mzml_file, MZML2CHEMS_DICT=MZML2CHEMS_DICT, ps=ps, parallel=parallel)
gs.results

2020-08-07 15:08:58.984 | INFO     | vimms.Roi:__init__:412 - Found 11480 ROIs above thresholds
2020-08-07 15:08:58.985 | INFO     | vimms.Common:save_obj:65 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\01. Data\example_data\beers\fullscan\mzML\Beer_multibeers_1_fullscan1.p
2020-08-07 15:09:07.754 | INFO     | vimms.Common:save_obj:65 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_4\Beer_multibeers_1_fullscan1.p
2020-08-07 15:09:15.027 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for Beer_multibeers_1_fullscan1.mzML
2020-08-07 15:09:15.039 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for Beer_multibeers_1_fullscan1.mzML
2020-08-07 15:17:38.177 | INFO     | vimms.SequenceManager:run:246 - Running in serial mode
2020-08-07 15:17:38.178 | INFO     | vimms.SequenceManager:run_controller:202 - Begun experiment: sample0
2020-08-07 15:17:38.382 | INFO     | vimms.SequenceMan

Loaded 5747 scans


2020-08-07 15:19:43.443 | INFO     | vimms.SequenceManager:run_evaluation:218 - Completed Evaluation: sample0
2020-08-07 15:19:43.444 | INFO     | vimms.SequenceManager:run_serial:258 - Finished 0
2020-08-07 15:19:43.445 | INFO     | vimms.SequenceManager:run_controller:202 - Begun experiment: sample1
2020-08-07 15:19:43.648 | INFO     | vimms.SequenceManager:run_experiment:167 - {'Sample ID': {1: 'sample1'}, 'Controller Method': {1: 'TopN_SmartRoiController'}, 'Controller Params': {1: {'ionisation_mode': 'Positive', 'isolation_width': 1, 'mz_tol': 10, 'min_ms1_intensity': 175000.0, 'min_roi_intensity': 1000, 'min_roi_length': 1, 'N': 10, 'rt_tol': 30, 'min_roi_length_for_fragmentation': 1, 'reset_length_seconds': 100, 'intensity_increase_factor': 2, 'length_units': 'scans', 'drop_perc': 0.001, 'ms1_shift': 0, 'ms1_agc_target': 200000, 'ms1_max_it': 250, 'ms1_collision_energy': 0, 'ms1_orbitrap_resolution': 120000, 'ms2_agc_target': 30000, 'ms2_max_it': 100, 'ms2_collision_energy': 25,

Loaded 5745 scans


2020-08-07 15:21:43.086 | INFO     | vimms.SequenceManager:run_evaluation:218 - Completed Evaluation: sample1
2020-08-07 15:21:43.087 | INFO     | vimms.SequenceManager:run_serial:258 - Finished 1


Unnamed: 0,Sample ID,Controller Method,mz_tol,min_ms1_intensity,min_roi_intensity,min_roi_length,N,rt_tol,min_roi_length_for_fragmentation,reset_length_seconds,...,ms1_shift,ms1_agc_target,ms1_max_it,ms1_collision_energy,ms1_orbitrap_resolution,ms2_agc_target,ms2_max_it,ms2_collision_energy,ms2_orbitrap_resolution,mzmine_peak
0,sample0,TopN_SmartRoiController,10,175000.0,1000,1,10,30,1,100,...,0,200000,250,0,120000,30000,100,25,7500,1318.0
1,sample1,TopN_SmartRoiController,10,175000.0,1000,1,10,30,1,100,...,0,200000,250,0,120000,30000,100,25,7500,1312.0


### Example 5 - WeightedDEW - Seed with mzml, non-parallel

In [27]:
output_dir = os.path.join(experiment_dir, 'grid_search_example_5')

In [28]:
weightedDEW_controller_params = {"ionisation_mode": POSITIVE,
                       "N": 10,
                       "mz_tol": 10,
                       "rt_tol":30,
                       "min_ms1_intensity": 1.75E5,
                       "rt_range": [(200, 400)],
                       "isolation_width": 1,
                       "ms1_shift": 0,
                       "exclusion_t_0": 15,
                       "log_intensity": False}

In [32]:
weightedDEW_variable_params_dict = {'rt_tol': [30], 'exclusion_t_0': [10,20]}

In [33]:
parallel = False

In [34]:
vsm = VimmsSequenceManager(None, evaluation_methods, output_dir, progress_bar=True, ms1_picked_peaks_file=None, mzmine_command=mzmine_command)
gs = GridSearchExperiment(vsm, 'WeightedDewController', mass_spec_params, None, weightedDEW_variable_params_dict, weightedDEW_controller_params, mzml_file, MZML2CHEMS_DICT=MZML2CHEMS_DICT, ps=ps, parallel=parallel)
gs.results

2020-08-07 16:04:48.594 | INFO     | vimms.Roi:__init__:412 - Found 11480 ROIs above thresholds
2020-08-07 16:04:48.595 | INFO     | vimms.Common:save_obj:65 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\01. Data\example_data\beers\fullscan\mzML\Beer_multibeers_1_fullscan1.p
2020-08-07 16:04:52.908 | INFO     | vimms.Common:save_obj:65 - Saving <class 'list'> to C:\Users\Vinny\work\vimms\demo\03. MultiSampleMethods\results\grid_search_example_5\Beer_multibeers_1_fullscan1.p
2020-08-07 16:04:57.103 | INFO     | vimms.PythonMzmine:pick_peaks:23 - Creating xml batch file for Beer_multibeers_1_fullscan1.mzML
2020-08-07 16:04:57.105 | INFO     | vimms.PythonMzmine:pick_peaks:53 - Running mzMine for Beer_multibeers_1_fullscan1.mzML
2020-08-07 16:13:34.526 | INFO     | vimms.SequenceManager:run:246 - Running in serial mode
2020-08-07 16:13:34.527 | INFO     | vimms.SequenceManager:run_controller:202 - Begun experiment: sample0
2020-08-07 16:13:34.729 | INFO     | vimms.SequenceMan

Loaded 6214 scans


2020-08-07 16:15:46.972 | INFO     | vimms.SequenceManager:run_evaluation:218 - Completed Evaluation: sample0
2020-08-07 16:15:46.974 | INFO     | vimms.SequenceManager:run_serial:258 - Finished 0
2020-08-07 16:15:46.974 | INFO     | vimms.SequenceManager:run_controller:202 - Begun experiment: sample1
2020-08-07 16:15:47.177 | INFO     | vimms.SequenceManager:run_experiment:167 - {'Sample ID': {1: 'sample1'}, 'Controller Method': {1: 'WeightedDewController'}, 'Controller Params': {1: {'ionisation_mode': 'Positive', 'N': 10, 'isolation_width': 1, 'mz_tol': 10, 'rt_tol': 30, 'min_ms1_intensity': 175000.0, 'ms1_shift': 0, 'exclusion_t_0': 20, 'log_intensity': False, 'ms1_agc_target': 200000.0, 'ms1_max_it': 250.0, 'ms1_collision_energy': 0.0, 'ms1_orbitrap_resolution': 120000.0, 'ms2_agc_target': 30000.0, 'ms2_max_it': 100.0, 'ms2_collision_energy': 25.0, 'ms2_orbitrap_resolution': 7500.0}}, 'MassSpec Params': {1: {'ionisation_mode': 'Positive', 'peak_sampler': <vimms.DataGenerator.PeakSa

KeyboardInterrupt: 