In [5]:
%load_ext autoreload
%autoreload 2

import pickle
import typing
import specxplore.specxplore_data
from specxplore.specxplore_data import specxplore_data, Spectrum
import numpy as np
import copy
with open("data_import_testing/results/phophe_specxplore.pickle", 'rb') as handle:
    data = pickle.load(handle).spectra
spectra = [Spectrum(spec.peaks.mz, max(spec.peaks.mz),idx, spec.peaks.intensities) for idx, spec in enumerate(data)]
spec = spectra[0]
spec

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  data = pickle.load(handle).spectra


Spectrum(mass_to_charge_ratios=array([103.05424627, 131.04923381, 149.02349973, 149.08232625]), precursor_mass_to_charge_ratio=149.082326253255, identifier=0, intensities=array([0.64292227, 1.        , 0.44513617, 0.62423902]), mass_to_charge_ratio_aggregate_list=(), intensity_aggregate_list=())

In [6]:
test_mz = np.array([0.9, 1.4, 4, 3.6, 5.05, 9.9,8])
test_int = np.array([1,1,1,1,1,1,1])


In [7]:
def bin_spectrum(spectrum : Spectrum, bin_map : np.ndarray):
    """ Applies binning to mz and intensity values and preserves aggregation information. 

    :param Spectrum: A spectrum object with mass_to_charge_ratios confined to the range of the bin_map.
    :param bin_map: An array with bins for mass to charge ratios, usually between 0 and 1000 with step_size of 0.1,
    :returns: A new spectrum object with binned spectra and aggregated data.

    """
    mz_values = spectrum.mass_to_charge_ratios
    intensities = spectrum.intensities
    mz_value_bin_assignments = np.digitize(mz_values, bin_map, right=True)

    unique_assignments = np.unique(mz_value_bin_assignments)
    number_of_unique_assignments = len(unique_assignments)
    mass_to_charge_ratio_aggregate_list = [None] * number_of_unique_assignments
    intensity_aggregate_list = [None] * number_of_unique_assignments
    intensity_list = [0.0 for _ in range(0, number_of_unique_assignments)]
    mass_to_charge_ratio_list = [None for _ in range(0, number_of_unique_assignments)]

    for idx_outer in range(0, number_of_unique_assignments):
        current_unique_assignment = unique_assignments[idx_outer] # this is an index for the bin_map
        for idx_inner in range(0, len(mz_value_bin_assignments)): # for all mz values / mz_bin_assignments
            # check whether the current_unique_assignment is a match to the current_assignment
            current_assignment = mz_value_bin_assignments[idx_inner]
            if current_unique_assignment == current_assignment:
                tmp_mz_bin = bin_map[current_assignment]
                tmp_mass_to_charge_ratio = mz_values[idx_inner]
                tmp_intensity = intensities[idx_inner]
                if mass_to_charge_ratio_aggregate_list[idx_outer] is None:
                    mass_to_charge_ratio_aggregate_list[idx_outer] = [tmp_mass_to_charge_ratio]
                else: 
                    mass_to_charge_ratio_aggregate_list[idx_outer].append(tmp_mass_to_charge_ratio)
                if intensity_aggregate_list[idx_outer] is None:
                    intensity_aggregate_list[idx_outer] = [tmp_intensity]
                else:
                    intensity_aggregate_list[idx_outer].append(tmp_intensity)
                intensity_list[idx_outer] += tmp_intensity
                if mass_to_charge_ratio_list[idx_outer] is None:
                    mass_to_charge_ratio_list[idx_outer] = tmp_mz_bin
    intensity_list = intensity_list / max(intensity_list)
    output_spectrum = Spectrum(
        mass_to_charge_ratios=np.array(mass_to_charge_ratio_list),
        precursor_mass_to_charge_ratio=copy.deepcopy(spectrum.precursor_mass_to_charge_ratio),
        identifier=copy.deepcopy(spectrum.identifier),
       intensities=np.array(intensity_list),
       intensity_aggregate_list=intensity_aggregate_list,
       mass_to_charge_ratio_aggregate_list=mass_to_charge_ratio_aggregate_list
       )
    return output_spectrum

print(bin_spectrum(spec, bin_map=np.round(np.arange(0, 1000, step = 0.1), decimals= 2)))

Spectrum(mass_to_charge_ratios=array([103.1, 131.1, 149.1]), precursor_mass_to_charge_ratio=149.082326253255, identifier=0, intensities=array([0.60121301, 0.93512549, 1.        ]), mass_to_charge_ratio_aggregate_list=[[103.054246266683], [131.049233809761], [149.023499731924, 149.082326253255]], intensity_aggregate_list=[[0.6429222736342746], [1.0], [0.4451361657851017, 0.6242390240442007]])
