In [1]:
%matplotlib notebook
import os
import os.path as path
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
from faker import Factory
from scipy import interpolate
from sklearn.preprocessing import MinMaxScaler
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import math 
import itertools 
from ipywidgets import interact, interactive, fixed, interact_manual, widgets
from ipywidgets import HBox, VBox
from ipywidgets import IntSlider, Output
from IPython.display import clear_output
import h5py
import re

# Local module and scripts
from pyccapt.calibration.calibration_tools import tools, data_tools, variables, calibration, data_loadcrop
from pyccapt.calibration.calibration_tools import widgets as wd
from pyccapt.calibration.calibration_tools import dataset_path_qt

In [2]:
# Disable showing logging in Jupyter notebook
import logging, sys
logging.disable(sys.maxsize)
save_fig=False
plot_fig=True 

In [3]:
button = widgets.Button(
    description='load dataset',
)

@button.on_click
def open_file_on_click(b):
    global dataset_path
    dataset_path = dataset_path_qt.gui_fname().decode('ASCII')
button

Button(description='load dataset', style=ButtonStyle())

In [4]:
# number_sample = 300
# save_fig=False
# plot_fig=True 

In [5]:
tdc, pulse_mode, flightPathLength_d, t0_d, max_tof = wd.dataset_tdc_selection()
display(tdc, pulse_mode)

Dropdown(description='TDC model:', options=('surface_concept', 'roentdec'), value='surface_concept')

Dropdown(description='Pulse mode:', options=('voltage', 'laser'), value='voltage')

$$\textbf{You can specify which dataset to use in below block}$$

In [6]:
tdc_model = tdc.value
pulse_mode_ini = pulse_mode.value


dataset_main_path = os.path.dirname(dataset_path)
dataset_name_with_extention = os.path.basename(dataset_path)
dataset_name = os.path.splitext(dataset_name_with_extention)[0]


variables.init()
# variables.path = os.path.join(p, 'tests//data')
variables.result_path = os.path.dirname(dataset_main_path) + '/ion_type_selection/'
if not os.path.isdir(variables.result_path):
    os.makedirs(variables.result_path, mode=0o777, exist_ok=True)
        
filename = dataset_path

head, tail = os.path.split(filename)
figname = os.path.splitext(tail)[0]

data = data_tools.read_hdf5_through_pandas(filename)

In [7]:
# tdc_model = tdc.value
# pulse_mode_ini = pulse_mode.value

# dataset_name = dataset.value

# flightPathLength = float(flightPathLength_d.value) # mm 
# # The initial value for t_0
# t0 = float(t0_d.value) # ns

# p = path.abspath(path.join("", "../../../.."))

# variables.init()

# path_main = os.path.join(p, 'tests//data')
# filename_main = path_main + '//' + dataset_name + '.h5'

# # variables.path = os.path.join(p, 'tests//results//mc_vol_bowl_calibratin')
# variables.path = os.path.join(p, 'tests//results//tof_calibration')
# variables.result_path = os.path.join(p, 'tests/results/ion_type_selection/' + dataset_name)
# if not os.path.isdir(variables.result_path):
#         os.makedirs(variables.result_path, mode=0o777, exist_ok=True)
        
# filename = variables.path + '//' + dataset_name + '//' + dataset_name + '.h5'



# head, tail = os.path.split(filename)
# figname = os.path.splitext(tail)[0]

# data = data_tools.read_hdf5_through_pandas(filename)


In [8]:
data

Unnamed: 0,high_voltage (V),pulse (V),start_counter,t (ns),mc (Da),x (mm),y (mm),pulse_pi,ion_pp),mc_c (Da),tof_c (ns)
0,5267.001953,5267.001953,29960.0,623.755674,27.055124,5.666939,-12.129796,0.0,1.0,26.658743,0.0
1,5267.001953,5267.001953,30485.0,640.132578,27.394052,6.335510,26.297143,525.0,1.0,26.597992,0.0
2,5267.001953,5267.001953,30713.0,620.539272,26.954815,2.897143,-8.850612,228.0,1.0,26.478062,0.0
3,5267.001953,5267.001953,30926.0,625.621050,27.320712,-10.569796,-5.253061,213.0,1.0,26.636381,0.0
4,5267.001953,5267.001953,31281.0,639.830826,27.450552,-26.010612,-3.947755,355.0,1.0,26.595731,0.0
...,...,...,...,...,...,...,...,...,...,...,...
1889157,5710.319336,5710.319336,46216.0,604.347534,27.487638,-3.470204,10.824490,68.0,1.0,26.653180,0.0
1889158,5710.319336,5710.319336,46992.0,603.572580,27.493286,2.451429,-9.296327,776.0,1.0,26.999085,0.0
1889159,5710.319336,5710.319336,47106.0,611.802180,27.594512,-15.154286,-13.530612,114.0,1.0,26.997074,0.0
1889160,5710.319336,5710.319336,48016.0,619.099092,27.111540,28.812245,11.811429,910.0,1.0,26.788568,0.0


In [9]:
variables.mc_calib = data['mc_c (Da)'].to_numpy()

In [222]:
def find_nearest(a, a0, num):
    "Element in nd array `a` closest to the scalar value `a0`"
    idx = []
    for i in range(num):
        idx.append(np.abs(a - a0).argmin())
        a[idx] = -200 #some dmmy negative value
    return idx

def find_close_element(target_elem, num_c, aboundance_threshold=1, charge=4):

    data_table = '../../../files/isotopeTable.h5'
    dataframe = data_tools.read_hdf5_through_pandas(data_table)

    elements = dataframe['element'].to_numpy()
    isotope_number = dataframe['isotope'].to_numpy()
    abundance = dataframe['abundance'].to_numpy()
    element_abundance = np.repeat(abundance, charge)

    element_wights = np.zeros((len(elements), charge))
    elements_w = dataframe['weight'].to_numpy()
    for i in range(charge):
        element_wights[:,i] = elements_w/(i+1)
    element_wights = element_wights.flatten()

    elem = np.core.defchararray.add(elements.astype('U'), isotope_number.astype('U'))
    element_list = np.zeros(len(elem)*charge).astype('U')
    for i in range(len(elem)):
        for j in range(charge):
            element_list[i+j+((charge-1)*i)] = elem[i] + '+'*(j+1)

    idxs = find_nearest(np.copy(element_wights), target_elem, num_c)

    element_c = element_list[idxs]
    element_wights_c = element_wights[idxs]
    abundance_c = element_abundance[idxs]

    index_sort = np.argsort(abundance_c)
    index_sort = np.flip(index_sort)

    element_c = element_c[index_sort]
    element_wights_c = element_wights_c[index_sort]
    abundance_c = abundance_c[index_sort] 
    # make the formula in latex format
    for i in range(len(element_c)):
        num_plus = element_c[i].count('+')
        cc = re.findall('(\d+|[A-Za-z]+)', element_c[i])
        if num_plus == 1:
            cc.append('+')
        else:
            cc.append('%s+' %num_plus)
        for j in range(len(cc)):
            if cc[j].isnumeric():
                cc[j] = int(cc[j])
        element_c[i] = '$${}^{%s}%s^{%s}$$' %(cc[1], cc[0], cc[2])
        
    if aboundance_threshold < 1.0:
        element_c = element_c[abundance_c < aboundance_threshold]
        element_wights_c = element_wights_c[abundance_c < aboundance_threshold]
        abundance_c = abundance_c[abundance_c < aboundance_threshold] 
    
    df = pd.DataFrame({'element': element_c, 'weight': element_wights_c, 'abundance': abundance_c})
    return df

In [292]:
def find_close_molecule(target_mole, num_c, aboundance_threshold=1, charge=4):
    
    data_table = '../../../files/molecule_table.h5'
    dataframe = data_tools.read_hdf5_through_pandas(data_table)
    
    elements = dataframe['molecule'].to_numpy()
    weight_number = dataframe['weight'].to_numpy()
    abundance = dataframe['abundance'].to_numpy()
    element_abundance = np.repeat(abundance, charge)

    element_wights = np.zeros((len(elements), charge))
    elements_w = dataframe['weight'].to_numpy()
    for i in range(charge):
        element_wights[:,i] = elements_w/(i+1)
    element_wights = element_wights.flatten()
    elem = elements
#     elem = np.core.defchararray.add(elements.astype('U'), weight_number.astype('U'))
    element_list = np.zeros(len(elem)*charge).astype('U')
    for i in range(len(elem)):
        for j in range(charge):
            element_list[i+j+((charge-1)*i)] = elem[i] + '+'*(j+1)

    idxs = find_nearest(np.copy(element_wights), target_mole, num_c)

    element_c = element_list[idxs]
    element_wights_c = element_wights[idxs]
    abundance_c = element_abundance[idxs]

    index_sort = np.argsort(abundance_c)
    index_sort = np.flip(index_sort)

    element_c = element_c[index_sort]
    element_wights_c = element_wights_c[index_sort]
    abundance_c = abundance_c[index_sort] 
    # make the formula in latex format
    for i in range(len(element_c)):
        ff = element_c[i]
        num_charge = ff.count('+')
        ff = ff.replace('+', '')
        element_c[i] = create_formula_latex(ff, num_charge)
        
    if aboundance_threshold < 1.0:
        element_c = element_c[abundance_c < aboundance_threshold]
        element_wights_c = element_wights_c[abundance_c < aboundance_threshold]
        abundance_c = abundance_c[abundance_c < aboundance_threshold] 
    df = pd.DataFrame({'molecule': element_c, 'weight': element_wights_c, 'abundance': abundance_c})
    return df

In [302]:
find_close_molecule(34, 6, aboundance_threshold=1, charge=4)


Unnamed: 0,molecule,weight,abundance
0,$$({}^{13}C_{2}{}^{19}F_{4})^{3+,34.0,1.51807e-08
1,$$({}^{27}Al_{ }{}^{7}Li_{ }{}^{,34.0,1.784094e-14
2,$${}^{12}C_{ }{}^{1}H_{2}{}^{37},34.0,2.545068e-24
3,$${}^{12}C_{ }{}^{2}H_{2}{}^{37},34.0,3.693951e-37
4,$${}^{12}C_{ }{}^{2}H_{2}{}^{37},34.0,3.693951e-37
5,$${}^{12}C_{ }{}^{2}H_{2}{}^{35},34.0,1.2607029999999998e-38


In [269]:
def create_formula_latex(aa, num_charge=0):
    aa = list(aa)
    # add one after parantezes if there is no higher number
    for i in range(len(aa)):
        if aa[i] == ')':
            if i+1 == len(aa):
                aa.insert(i+1, '1')
            else:
                if not aa[i+1].isnumeric():
                    aa.insert(i+1, '1')
    aa = ''.join(aa)
    aa = re.findall('(\d+|[A-Za-z]+)', aa)
    for i in range(int(len(aa)/3)):
        if aa[i*3+2].isnumeric():
            aa[i*3+2] = int(aa[i*3+2])
    # replace 1 with''
    for i in range(len(aa)):
        if aa[i]==1:
            aa[i] = ' '
    for i in range(int(len(aa)/3)):

        if i == 0:
            bb = '{}^{%s}%s_{%s}' %(aa[(i*3)+1], aa[(i*3)], aa[(i*3)+2])
        else:
            bb += '{}^{%s}%s_{%s}' %(aa[(i*3)+1], aa[(i*3)], aa[(i*3)+2])
    if num_charge == 0:
        bb = '$$' + bb + '$$'
    else:
        bb = '$$(' + bb + ')^{%s+}' %num_charge + '$$'
    return bb

In [300]:

display(Latex(create_formula_latex('Bi(209)2O(16)2C(12)O(16)3')))

<IPython.core.display.Latex object>

In [203]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
def fix_prantesis(c):
    index = []
    for i in range(len(c)):
        if c[i]== '(':
            index.append(i+1)
        if c[i]== ')':
            index.append(i)
            index.append(int(c[i+1]))
    index = list(chunks(index, 3))
    list_parantesis = []
    for i in range(len(index)):
        tmp = c[index[i][0]:index[i][1]]
        tmp = re.findall('[A-Z][^A-Z]*', tmp)
        for j in range(len(tmp)):
            if tmp[j].isalpha():
                tmp[j] = tmp[j] + str(index[i][-1])
            elif not tmp[j].isalpha():
                dd = int(re.findall(r'\d+', tmp[j])[0])*index[i][-1]
                tmp[j] = ''.join([p for p in tmp[j] if not p.isdigit()]) + str(dd)
        list_parantesis.append("".join(tmp))

    for i in range(len(list_parantesis)):
        gg = list_parantesis[i]
        c = list(c)
        c[index[i][0]-1-(2*i):index[i][1]+2] = list_parantesis[i]
    
    return ''.join(c)
def molecule_isotop_list(dataframe, target_element, latex=True):
    target_element = fix_prantesis(target_element)
        
    elements = dataframe['element'].to_numpy()
    isotope_number = dataframe['isotope'].to_numpy()
    abundance = dataframe['abundance'].to_numpy()
    weight = dataframe['weight'].to_numpy()

    molecule_formula = re.findall('(\d+|[A-Za-z]+)', target_element)
    molecule_formula = [re.split('(?<=.)(?=[A-Z])', item) for item in molecule_formula]
    molecule_formula = list(itertools.chain(*molecule_formula))
    
    elem_wights = []
    elem_aboundance = []
    elem_compo = []

    for i in range(len(molecule_formula)):
        if not molecule_formula[i].isnumeric():
            idx_element = np.where(elements == molecule_formula[i])
            elem_compo_temp = []
            elem_wights_tmp = []
            elem_aboundance_tmp = []
            for j in range(len(idx_element[0])):
                if i+1 < len(molecule_formula):
                    if molecule_formula[i+1].isnumeric():
                        number_of_elem = int(molecule_formula[i+1])
                        elem_compo_temp.append(elements[idx_element[0][j]] + '('+str(isotope_number[idx_element[0][j]])+')'+str(number_of_elem))
                    else:
                        number_of_elem = 1
                        elem_compo_temp.append(elements[idx_element[0][j]] + '('+str(isotope_number[idx_element[0][j]])+')')
                else:
                    number_of_elem = 1
                    elem_compo_temp.append(elements[idx_element[0][j]] + '('+str(isotope_number[idx_element[0][j]])+')')
                
                
                elem_wights_tmp.append(weight[idx_element[0][j]]*number_of_elem)
                aboundance_i = abundance[idx_element[0][j]]/100
                for k in range(number_of_elem):
                    aboundance_i = aboundance_i * aboundance_i
                elem_aboundance_tmp.append(aboundance_i)

            elem_compo.append(elem_compo_temp)
            elem_wights.append(elem_wights_tmp)
            elem_aboundance.append(elem_aboundance_tmp)

    list_elem_compo = list(itertools.product(*elem_compo))
    list_elem_wights = list(itertools.product(*elem_wights))
    list_elem_aboundance = list(itertools.product(*elem_aboundance))

    list_elem_compo = [''.join(item) for item in list_elem_compo]
    if latex:
        for i in range(len(list_elem_compo)):
            list_elem_compo[i] = create_formula_latex(list_elem_compo[i])
    list_elem_wights = [sum(item) for item in list_elem_wights]
    list_elem_aboundance = [math.prod(item) for item in list_elem_aboundance]

    df = pd.DataFrame({'molecule': list_elem_compo, 'weight': list_elem_wights, 'abundance': list_elem_aboundance})
    return df


In [298]:
isotopeTableFile = '../../../files/isotopeTable.h5'
dataframe = data_tools.read_hdf5_through_pandas(isotopeTableFile)

molecule_isotop_list(dataframe,'H2O')

Unnamed: 0,molecule,weight,abundance
0,$${}^{1}H_{2}{}^{16}O_{ }$$,18.015,0.9946487
1,$${}^{1}H_{2}{}^{17}O_{ }$$,19.02,1.443134e-07
2,$${}^{1}H_{2}{}^{18}O_{ }$$,20.02,3.997601e-06
3,$${}^{2}H_{2}{}^{16}O_{ }$$,20.015,5.038431e-16
4,$${}^{2}H_{2}{}^{17}O_{ }$$,21.02,7.310250000000001e-23
5,$${}^{2}H_{2}{}^{18}O_{ }$$,22.02,2.025e-21


In [193]:
bin_size=widgets.FloatText(value=0.1, description='bin size:')
prominence=widgets.IntText(value=60, description='peak prominance:')
distance=widgets.IntText(value=50, description='peak distance:')
lim_tof=widgets.IntText(value=150, description='lim tof/mc:')
percent=widgets.IntText(value=50, description='percent MRP:')

def hist_plot(figname, plot):
    peaks_sides_p = np.zeros(0)
    with out:
        clear_output(True)

        bin_size_p = bin_size.value
        prominence_p = prominence.value
        distance_p = distance.value
        lim_tof_p = lim_tof.value
        percent_p = percent.value
        peaks_ini, peaks_y_ini, peak_widths_p_ini = tools.hist_plot(variables.mc_calib[variables.mc_calib < lim_tof_p], bin_size_p, distance=distance_p, percent=percent_p, prominence=prominence_p, selector='peak', plot=plot, label='mc', fig_name=figname)
        index_max_ini = np.argmax(peaks_y_ini)
        variables.max_peak = peaks_ini[index_max_ini]
        variables.peak = peaks_ini
        mrp = (peaks_ini[index_max_ini] / (peak_widths_p_ini[index_max_ini][2] - peak_widths_p_ini[index_max_ini][1]))
        print('Mass resolving power for the highest peak (MRP --> m/m_2-m_1):', mrp)
        for i in range(len(peaks_ini)):
            print('Peaks ', i, 'is at location and height: ({:.2f}, {:.2f})'.format(peaks_ini[i], peaks_y_ini[i]), 'peak window sides (half-maximum) are: ({:.2f}, {:.2f})'.format(peak_widths_p_ini[i][1], peak_widths_p_ini[i][2]))

In [295]:
peak_val = widgets.FloatText(value=1.1, description='peak value:')
charge = widgets.Dropdown(
    options=[('1', 1), ('2', 2), ('3', 3), ('4', 4)],
    value=3,
    description='charge:'
)
aboundance_threshold = widgets.FloatText(value=1, description='aboundance threshold:')
mode = widgets.Dropdown(
    options=[('elements', 'elements'), ('molecules', 'molecules')],
    value='elements',
    description='mode:'
)

num_element = widgets.IntText(value=1, description='num element:')
# element_threshold = widgets.FloatText(value=1.0, description='Aboundance threshold:')

def element_finder():
    with out:
        peak_val_s = peak_val.value
        charge_s = charge.value
        num_element_s = num_element.value
#         num_molecule_s = num_molecule.value
        mode_s = mode.value
#         element_threshold_s = element_threshold.value
        aboundance_threshold_s = aboundance_threshold.value
        clear_output(True)
        if mode_s == 'elements':
            df = find_close_element(peak_val_s, num_element_s, aboundance_threshold_s, charge=charge_s)
        elif mode_s == 'molecules':
            df = find_close_molecule(peak_val_s, num_element_s, aboundance_threshold_s, charge=charge_s)
        display(df)

In [296]:
isotope_formula = widgets.Text(
    value='',
    placeholder='Type a formula',
    description='Isotope formula:',
    disabled=False
)

def manual_formula_calculator():
    isotopeTableFile = '../../../files/isotopeTable.h5'
    dataframe = data_tools.read_hdf5_through_pandas(isotopeTableFile)
    df = molecule_isotop_list(dataframe, isotope_formula.value)
    with out:
        df = molecule_isotop_list(dataframe, isotope_formula.value)
        clear_output(True)
        display(df)

In [297]:
plot_button = widgets.Button(
    description='plot hist',
)

find_button = widgets.Button(
    description='find element',
)

formula_find_button = widgets.Button(
    description='molecule calculate',
)

peak_lable = widgets.Text(
    value='',
    placeholder='Type peak element',
    description='peak elem:',
    disabled=False
)
    
@plot_button.on_click
def plot_on_click(b, figname=figname, plot=True):
    hist_plot(figname, plot)

    
@find_button.on_click
def vol_on_click(b,):
    element_finder()
    
@formula_find_button.on_click
def manual_formula(b,):
    manual_formula_calculator()
    
tab1 = VBox(children=[bin_size, prominence, distance, lim_tof, percent])
tab2 = HBox(children=[VBox(children=[peak_val, num_element, charge, aboundance_threshold, mode, element_threshold, 
                      find_button]),HBox(children=[isotope_formula, formula_find_button])])


tab = widgets.Tab(children=[tab1, tab2])
tab.set_title(0, 'mc plot')
tab.set_title(1, 'element finder')



display(VBox(children=[tab,HBox(children=[plot_button, peak_lable])]))
out = Output()
display(out)

VBox(children=(Tab(children=(VBox(children=(FloatText(value=0.1, description='bin size:'), IntText(value=60, d…

Output()

In [179]:
peak_lable.value

''

In [None]:
peaks_chos = []
for i in range(len(variables.peaks_idx)):
    peaks_chos.append(variables.peak[variables.peaks_idx[i]])

In [None]:
mc = variables.mc_calib

In [None]:
# GMM number of componnents
from sklearn import mixture
tt = np.copy(mc[mc<100])
tt = np.expand_dims(tt, axis=1)
tt = np.float32(tt)


n_components = np.arange(2, 6)
models = [mixture.GaussianMixture(n, covariance_type='full', random_state=0).fit(tt)
          for n in n_components]

fig1, ax1 = plt.subplots(figsize=(4, 3))
bic_test = [m.bic(tt) for m in models]
aic_test = [m.aic(tt) for m in models]
plt.plot(n_components, bic_test, label='BIC')
plt.plot(n_components, aic_test, label='AIC')
plt.legend(loc='best')
plt.xlabel('n_components')
plt.show()

In [None]:
n_components = min(np.argmin(bic_test), np.argmin(aic_test))
print(n_components)


In [None]:
# n_components = 4

In [None]:
tt = np.copy(mc[mc<100])
tt = np.expand_dims(tt, axis=1)
tt = np.float32(tt)


bins = 0.1
bins = np.linspace(np.min(tt), np.max(tt), round(np.max(tt) / bins))

gmm = mixture.GaussianMixture(n_components=n_components, covariance_type='full').fit(tt)
labels = gmm.predict(tt)


fig1, ax1 = plt.subplots(figsize=(6, 4))
for i in range(n_components):
    A = tt[labels==i]
    plt.hist(A, bins, log=True, histtype='step')
plt.show()

In [None]:
import sys
import numpy as np
import matplotlib.pyplot as plt


class line_drwaer:
    
    def on_button_press(self, event):
        """Callback for mouse button presses."""
        if not self.showverts:
            return
        if event.inaxes is None:
            return
        if event.button != 1:
            return
        self._ind = self.get_ind_under_point(event)

    def on_button_release(self, event):
        """Callback for mouse button releases."""
        if not self.showverts:
            return
        if event.button != 1:
            return
        self._ind = None
        
def on_press(event):
#     print('press', event.key)
#     sys.stdout.flush()
#     if event.key == 'x':
#         visible = xl.get_visible()
#         xl.set_visible(not visible)
#         fig.canvas.draw()
    global line
    line = plt.axvline(x = event.xdata, color = 'b', label = 'axvline - full height')

def onpick1(event):
    if isinstance(event.artist, Line2D):
        thisline = event.artist
        xdata = thisline.get_xdata()
        ydata = thisline.get_ydata()
        ind = event.ind
        global ff
        ff =  np.column_stack([xdata[ind], ydata[ind]])
        print('onpick1 line:', np.column_stack([xdata[ind], ydata[ind]]))
    elif isinstance(event.artist, Rectangle):
        patch = event.artist
        global ff
        ff = event.artist
        print('onpick1 patch:', patch.get_path())
    elif isinstance(event.artist, Text):
        text = event.artist
        global ff
        ff =  event.artist
        print('onpick1 text:', text.get_text())



# Fixing random state for reproducibility
np.random.seed(19680801)

fig, ax = plt.subplots()

fig.canvas.mpl_connect('button_press_event', on_press)
fig.canvas.mpl_connect('pick_event', onpick1)


ax.plot(np.random.rand(12), np.random.rand(12), 'go')
xl = ax.set_xlabel('easy come, easy go')
ax.set_title('Press a key')
plt.show()

In [None]:
props

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class MoveGraphLine(object):
    def __init__(self, ax, graf, obj, eubv):
        self.ax = ax
        self.figcanvas = self.ax.figure.canvas
        self.graf = graf
        self.obj = obj
        self.kayma = 0.0
        self.ebv, self.eub = eubv
        self.moved = None
        self.point = None
        self.pressed = False
        self.start = False

        self.figcanvas.mpl_connect('button_press_event', self.mouse_press)
        self.figcanvas.mpl_connect('button_release_event', self.mouse_release)
        self.figcanvas.mpl_connect('motion_notify_event', self.mouse_move)

    def mouse_release(self, event):
        if self.ax.get_navigate_mode()!= None: return
        if not event.inaxes: return
        if event.inaxes != self.ax: return
        if self.pressed: 
            self.pressed = False
            self.start = False
            self.point = None
            self.ebv -= self.kayma
            self.obj = self.moved
            return

    def mouse_press(self, event):
        if self.ax.get_navigate_mode()!= None: return
        if not event.inaxes: return
        if event.inaxes != self.ax: return
        if self.start: return
        self.point = event.xdata
        self.pressed = True

    def mouse_move(self, event):
        if self.ax.get_navigate_mode()!= None: return
        if not event.inaxes: return
        if event.inaxes != self.ax: return
        if not self.pressed: return
        self.start = True

        self.kayma = self.point - event.xdata
        keub = 0.72 * self.kayma + 0.025 * self.kayma**2
        self.moved = self.obj - [self.kayma, keub]
        mvdx, mvdy = self.moved[:, 0], self.moved[:, 1]

        self.graf.remove()
        self.graf, = self.ax.plot(mvdx, mvdy, linewidth=1.1, c="b")

        self.figcanvas.draw()


model = np.array([(0.310, -0.687), (0.407, -0.355), (0.455, -0.142), (0.504, 0.061), (0.552, 0.238), 
                  (0.601,  0.380), (0.698,  0.549), (0.746,  0.581), (0.795, 0.587), (0.859, 0.567), 
                  (0.956,  0.511), (1.053,  0.473), (1.150,  0.489), (1.199, 0.523), (1.296, 0.640), 
                  (1.393,  0.812), (1.490,  0.981), (1.587,  1.189), (1.684, 1.386), (1.781, 1.572), 
                  (1.878, 1.766)])

obser = np.array([(0.212, -0.114), (0.199, 0.017), (0.259, 0.020), (0.199, 0.076), (0.297, 0.082), 
                  (0.735, 0.085), (0.641, 0.104), (0.791, 0.104), (0.681, 0.109), (0.606, 0.132), 
                  (0.262, 0.135), (0.813, 0.137), (0.334, 0.157), (0.565, 0.165), (0.647, 0.170), 
                  (0.876, 0.174), (0.746, 0.186), (0.509, 0.197), (0.398, 0.203), (0.693, 0.207), 
                  (0.829, 0.215), (0.299, 0.226), (0.585, 0.228), (0.549, 0.242), (0.430, 0.242), 
                  (0.637, 0.253), (0.511, 0.257), (0.918, 0.268), (0.813, 0.269), (0.746, 0.271), 
                  (0.336, 0.288), (0.449, 0.297), (0.398, 0.299), (0.783, 0.306), (0.578, 0.312), 
                  (0.871, 0.330), (0.515, 0.345), (0.468, 0.353), (0.818, 0.380), (0.936, 0.391), 
                  (0.889, 0.416), (0.876, 0.503), (1.027, 0.522), (1.040, 0.601), (0.965, 0.656), 
                  (1.130, 0.796), (1.224, 0.845), (1.261, 0.964), (1.378, 1.149)])


obv, oub =obser[:, 0], obser[:, 1]

fig, ax1 = plt.subplots(nrows=1, ncols=1, figsize=(5.31,6.79))
fig.subplots_adjust(left=0.14, bottom=0.08, right=0.95, top=0.97, wspace=0, hspace=0)

shift = (model.mean(axis=0) - obser.mean(axis=0))
sebv = shift[0]
seub = 0.72 * sebv + 0.025 * sebv**2

ax1.invert_yaxis()
ax1.set_xlabel("BmV")
ax1.set_ylabel("UmB")
ax1.set_ylim(2, -1)
ax1.set_xlim(-0.5, 2)
ax1.scatter(obv, oub, s=2, c="k")
gmdl, = ax1.plot(model[:, 0], model[:, 1], linewidth=1.1, c="b")

moveline = MoveGraphLine(ax1, gmdl, model, (sebv, seub))

plt.show()

In [None]:
print('onpick points:', ff)

In [None]:
from sklearn import cluster
from scipy.spatial import distance
import sklearn.datasets
from sklearn.preprocessing import StandardScaler
import numpy as np

def compute_bic(kmeans,X):
    """
    Computes the BIC metric for a given clusters

    Parameters:
    -----------------------------------------
    kmeans:  List of clustering object from scikit learn

    X     :  multidimension np array of data points

    Returns:
    -----------------------------------------
    BIC value
    """
    # assign centers and labels
    centers = [kmeans.cluster_centers_]
    labels  = kmeans.labels_
    #number of clusters
    m = kmeans.n_clusters
    # size of the clusters
    n = np.bincount(labels)
    #size of data set
    N, d = X.shape

    #compute variance for all clusters beforehand
    cl_var = (1.0 / (N - m) / d) * sum([sum(distance.cdist(X[np.where(labels == i)], [centers[0][i]], 
             'euclidean')**2) for i in range(m)])

    const_term = 0.5 * m * np.log(N) * (d+1)

    BIC = np.sum([n[i] * np.log(n[i]) -
               n[i] * np.log(N) -
             ((n[i] * d) / 2) * np.log(2*np.pi*cl_var) -
             ((n[i] - 1) * d/ 2) for i in range(m)]) - const_term

    return(BIC)





# IRIS DATA
iris = sklearn.datasets.load_iris()
X = iris.data[:, :4]  # extract only the features
#Xs = StandardScaler().fit_transform(X)
Y = iris.target

ks = range(1,10)

# run 9 times kmeans and save each result in the KMeans object
KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(X) for i in ks]

# now run for each cluster the BIC computation
BIC = [compute_bic(kmeansi,X) for kmeansi in KMeans]
plt.plot(ks, BIC, 'r-o')
plt.title("iris data  (cluster vs BIC)")
plt.xlabel("# clusters")
plt.ylabel("# BIC")
plt.show()

In [None]:
# K-means

tt = np.copy(dld_t[dld_t < 1500])
tt = np.expand_dims(tt, axis=1)
tt = np.float32(tt)
bins = 0.1
num_k = 4
bins = np.linspace(np.min(tt), np.max(tt), round(np.max(tt) / bins))
fig1, ax1 = plt.subplots(figsize=(8, 6))
plt.hist(tt, bins, log=True)
plt.show()


# Define criteria = ( type, max_iter = 10 , epsilon = 1.0 )
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
# Set flags (Just to avoid line break in the code)
flags = cv2.KMEANS_RANDOM_CENTERS
# Apply KMeans
compactness,labels,centers = cv2.kmeans(tt , num_k, None, criteria,10,flags)

fig1, ax1 = plt.subplots(figsize=(8, 6))
for i in range(num_k):
    A = tt[labels==i]
    plt.hist(A, bins, log=True)
plt.hist(centers,bins, color='black')
plt.show()
print(centers)

In [None]:
# isotopeTableFile = '../../../files/isotopeTable.h5'
# dataframe = data_tools.read_hdf5_through_pandas(isotopeTableFile)
# elementsList = dataframe['element']
# elementIsotopeList = dataframe['isotope']
# elementMassList =  dataframe['weight']
# abundanceList = dataframe['abundance']

# elements = list(zip(elementsList, elementIsotopeList, elementMassList, abundanceList))
# dropdownList = []
# for element in elements:
#     tupleElement = ("{} ({}) ({:.2f})".format(element[0],element[1],element[3]), "{}{}".format(element[0],element[1]))
#     dropdownList.append(tupleElement)

# chargeList = [(1,1,),(2,2,),(3,3,),(4,4,)]
# dropdown = wd.dropdownWidget(dropdownList, "Elements")
# dropdown.observe(wd.on_change_ions_selection)


# chargeDropdown = wd.dropdownWidget(chargeList, "Charge")
# chargeDropdown.observe(wd.on_change_charge_ions_selection)

# wd.compute_element_isotope_values_according_to_selected_charge(mode='ions_selection')

# buttonAdd = wd.buttonWidget("ADD")
# buttonDelete = wd.buttonWidget("DELETE")
# buttonReset = wd.buttonWidget("RESET")

# display(dropdown)
# display(chargeDropdown)
# display(buttonAdd)
# display(buttonDelete)
# display(buttonReset)
# listMaterial = buttonAdd.on_click(wd.onClickAdd)
# buttonDelete.on_click(wd.onClickDelete)
# buttonReset.on_click(wd.onClickReset)

In [None]:
molecule_dataframe

In [None]:
fake = Factory.create()
color = np.zeros(len(mc), dtype='object' )
element = np.zeros(len(mc), dtype='object')

for index, elemen in enumerate(variables.listMaterial):
    mask = np.logical_and((peaks_sides_p[index,2] < mc), (mc < peaks_sides_p[index,3])).squeeze()
    print(mask[mask==True].shape)

    index_true = np.where(mask==True)
    index_get_to_false = np.random.choice(index_true[0], size=int(index_true[0].shape[0] - index_true[0].shape[0]), replace=False)
    mask[index_get_to_false] = False
    print('ploted ions', mask[mask==True].shape)
    
    color[mask] = fake.hex_color()
    element[mask] = elemen



In [None]:
molecul_list_file = '../../../files/list_of_chemical.csv'
pd.read_csv(molecul_list_file, encoding= 'utf-8', header = 0) 

In [None]:
data['element'] = element
data['color'] = color

In [None]:
data

In [None]:
data.dtypes

In [None]:
# save the new data
name_save_file = variables.result_path + '//' + dataset_name + '.h5'
data_tools.store_df_to_hdf(name_save_file, data, 'df')

In [None]:
# save data in csv format
data_tools.store_df_to_csv(data, variables.result_path + dataset_name + '.csv')