### Controller Demo

In [None]:
%matplotlib inline

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import sys
import scipy.stats
import pylab as plt
from IPython import display
import pylab as plt

In [None]:
sys.path.append('../codes')

In [None]:
from VMSfunctions.Chemicals import *
from VMSfunctions.Chromatograms import *
from VMSfunctions.MassSpec import *
from VMSfunctions.Controller import *
from VMSfunctions.Common import *
from VMSfunctions.DataGenerator import *

In [None]:
from VMSfunctions.Noise import *

In [None]:
set_log_level_info()

Load densities trained on 19 beer data (see [loader_kde](loader_kde.ipynb)).

In [None]:
base_dir = 'C:\\Users\\Vinny\\OneDrive - University of Glasgow\\CLDS Metabolomics Project\\'

In [None]:
base_dir = 'C:\\Users\\joewa\\University of Glasgow\\Vinny Davies - CLDS Metabolomics Project\\'

In [None]:
ps = load_obj(os.path.join(base_dir, 'Trained Models\\peak_sampler_19_beers_fullscan.p'))

Load chromatogram data exported from R

In [None]:
cc = ChromatogramCreator(os.path.join(base_dir, 'Trained Models\\chromatogram_19_beers.csv.gz'))

### Generate some chemicals

In [None]:
hmdb = load_obj(os.path.join(base_dir, 'Trained Models\\hmdb_compounds.p'))

In [None]:
min_ms1_intensity = 1.75E5
rt_range = [(3*60, 21*60)]
mz_range = [(0, 1050)]
n_ms1_peaks = 5000

# alpha = 0.1
# compound_list = hmdb
# chemical_type = 'Known'

alpha = math.inf
compound_list = hmdb
chemical_type = 'Unknown'

ms_levels = 1
chemicals = ChemicalCreator(ps)
dataset = chemicals.sample(cc, mz_range, rt_range, min_ms1_intensity, n_ms1_peaks, ms_levels=ms_levels, chemical_type=chemical_type,
               formula_list=None, compound_list=compound_list, alpha=alpha, fixed_mz=False)

In [None]:
def plot_chromatogram(chrom):
    f, axarr = plt.subplots(2, sharex=True)
    axarr[0].plot(chrom.rts, chrom.intensities)
    axarr[0].set_title('Chromatogram')
    axarr[0].set_ylabel('intensity')
    axarr[1].scatter(chrom.rts, chrom.mzs)
    axarr[1].set_ylabel('mz')
    axarr[1].set_xlabel('RT')    
    plt.show()

for chem in dataset[0:10]:
    print(chem)
    print(chem.chromatogram.raw_intensities)
    plot_chromatogram(chem.chromatogram)

In [None]:
type(dataset[0])

### Set up a simple MS1 controller

In [None]:
set_log_level_warning() # We don't want to see too many messages as the controller is running

In [None]:
mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, density=ps.density_estimator)
controller = SimpleMs1Controller(mass_spec)
controller.make_plot = False
controller.run(rt_range[0][0], rt_range[0][1])

In [None]:
controller.write_mzML('my_analysis', os.path.join(base_dir, 'Manuscript\\B. CentWave\\mzML\\no_noise_ms1_controller.mzML'))
# run R script

In [None]:
save_obj(controller, os.path.join(base_dir, 'Manuscript\\B. CentWave\\no_noise_ms1_controller.p'))

### With Noise

In [None]:
set_log_level_debug()

In [None]:
mzml_path = base_dir + 'Data\\multibeers_urine_data\\beers\\fullscan\\'
xcms_roi_file = mzml_path + 'rois.csv'
extracted_roi_file = base_dir + 'Trained Models\\rois_19_beers.p'

In [None]:
ds = DataSource()
ds.load_data(mzml_path)
ds.load_roi(extracted_roi_file)

In [None]:
rtcc = RoiToChemicalCreator(ps, ds)

In [None]:
len(rtcc.chemicals)

In [None]:
def plot_chems(chem_list, N=10):
    for c in chem_list[0:N]:
        chrom = c.chromatogram
        plt.plot(chrom.raw_rts, chrom.raw_intensities)
        plt.show()

In [None]:
sorted_chems = sorted(rtcc.chemicals, key = lambda chem: chem.chromatogram.roi.num_scans())

In [None]:
plot_chems(sorted_chems[0:10])

In [None]:
plot_chems(sorted_chems[-10:])

In [None]:
biggest = sorted_chems[-100000:]

In [None]:
selected = np.random.choice(sorted_chems, size=20000, replace=False).tolist()

#### Add non-peaks regions of interest to the data

In [None]:
for chem in dataset:
    chem.type = 'data'
for noise in selected:
    noise.type = 'noise'

In [None]:
noisy_dataset = dataset + selected

In [None]:
# shuffle(noisy_dataset)

In [None]:
len(dataset)

In [None]:
len(noisy_dataset)

#### Set up a simple MS1 controller

In [None]:
set_log_level_warning() # We don't want to see too many messages as the controller is running

In [None]:
mass_spec = IndependentMassSpectrometer(POSITIVE, noisy_dataset, density=ps.density_estimator)
controller = SimpleMs1Controller(mass_spec)
controller.make_plot = False
controller.run(rt_range[0][0], rt_range[0][1])

In [None]:
all_peaks = []
for chem in controller.mass_spec.chemicals_to_peaks:
    all_peaks.extend(controller.mass_spec.chemicals_to_peaks[chem])
mzs = np.array([p.mz for p in all_peaks])
rts = np.array([p.rt for p in all_peaks])
intensities = np.array([p.intensity for p in all_peaks])
scans = controller.scans[1]
scan_times = [scan.rt for scan in scans]
scan_durations = [j-i for i, j in zip(scan_times[:-1], scan_times[1:])] # https://stackoverflow.com/questions/2400840/finding-differences-between-elements-of-a-list

In [None]:
_ = plt.boxplot(mzs)
plt.title('mzs')
plt.show()

_ = plt.boxplot(rts)
plt.title('rts')
plt.show()

_ = plt.boxplot(np.log(intensities))
plt.title('intensities')
plt.show()

_ = plt.boxplot(scan_durations)
plt.title('scan_durations')
plt.show()

In [None]:
np.median(rts)

In [None]:
np.min(np.log(intensities)), np.max(np.log(intensities))

Save to mzML file

In [None]:
controller.write_mzML('my_analysis', os.path.join(base_dir, 'Manuscript\\B. CentWave\\mzML\\noisy_ms1_controller.mzML'))
# run R script

In [None]:
save_obj(controller, os.path.join(base_dir, 'Manuscript\\B. CentWave\\noisy_ms1_controller.p'))

### Set up a Top-N controller

We can run the dataset through a top-N DDA controller

In [None]:
# set_log_level_warning()
# set_log_level_info()
set_log_level_debug()

In [None]:
ps = load_obj(os.path.join(base_dir, 'Trained Models\\peak_sampler_19_beers_fragmentation.p'))

In [None]:
min_ms1_intensity = 1.75E5
rt_range = [(3*60, 21*60)]
mz_range = [(0, 1050)]
n_ms1_peaks = 1000

# alpha = 0.1
# compound_list = hmdb
# chemical_type = 'Known'

alpha = math.inf
compound_list = None
chemical_type = 'Unknown'
ms_levels = 2

chemicals = ChemicalCreator(ps)
dataset = chemicals.sample(cc, mz_range, rt_range, min_ms1_intensity, n_ms1_peaks, ms_levels=ms_levels, 
                           chemical_type=chemical_type, formula_list=None, compound_list=compound_list, alpha=alpha, fixed_mz=False)

In [None]:
N = 10                          # top-5 DDA fragmentation
isolation_window = 1            # the mz isolation window (Dalton) around a selected precursor ion
mz_tol = 10                     # the mz tolerance for dynamic exclusion
rt_tol = 15                     # the rt window for dynamic exclusion
min_ms1_intensity = 1.75E5      # the minimum ms1 peak intensity to be fragmented

In [None]:
mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, density=ps.density_estimator)
controller = TopNController(mass_spec, N, mz_tol, rt_tol, min_ms1_intensity)
controller.make_plot = False

set_log_level_warning()
# set_log_level_info()
# set_log_level_debug()

controller.run(rt_range[0][0], rt_range[0][1])

In [None]:
controller.write_mzML('my_analysis', os.path.join(base_dir, 'Manuscript\\B. CentWave\\no_noise_top_N_controller.mzML'))