In [None]:
import os
import h5py
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
import lineid_plot
from ramandecompy import spectrafit
from ramandecompy import peakidentify
from ramandecompy import dataprep
from ramandecompy import datavis
from ramandecompy import dataimport
from scipy import interpolate

In [None]:
dataprep.view_hdf5('ML_quad_calibration.hdf5')

In [None]:
hdf5_filename = 'ML_quad_calibration.hdf5'

In [None]:
hdf5 = h5py.File(hdf5_filename, 'r')

In [None]:
len(hdf5.keys())

In [None]:
df.keys()[156:][0]

In [None]:
def clean_spectra(df,compound):
    """
    Function that cleans the data of any duplicate x-values that will cause
    errors for interpolation.
    Args:
        compound (str): shoyu_data_dict key for the desired compound
    Returns:
        comp_data_clean (list): list of tuples containing all the non-repeated
                                x and y values
    """
    # handling errors in inputs
    if not isinstance(compound, str):
        raise TypeError("Passed value of `compound` is not a dictionary! Instead, it is: "
                        + str(type(compound)))
    # extract data from dictionary
    x_comp = df['Wavelength'] # because it wasw 
    y_comp = df[compound]
    # zip x and y values
    comp_data = list(zip(x_comp, y_comp))
    #clean comp1
    comp_data_clean = []
    for i in range(1, len(comp_data)-1):
        if comp_data[i][0] == comp_data[i-1][0]:
            pass
        else:
            comp_data_clean.append(comp_data[i])
    return comp_data_clean


def interpolate_spectra(comp_data_clean):
    """
    Function that produces interpolated values for the spectra at integer values
    across the range of the data.
    Args:
        comp_data_clean (list): list of tuples containing all the non-repeated
                                x and y values
    Returns:
        comp_data_int (list): list of interpolated values for the spectra at integer
                              values across the range of the input data
    """
    # handling errors in inputs
    if not isinstance(comp_data_clean, list):
        raise TypeError('Passed value of `comp_data_clean` is not a list! Instead, it is: '
                        + str(type(comp_data_clean)))
    for i, _ in enumerate(comp_data_clean):
        if not isinstance(comp_data_clean[i], tuple):
            raise TypeError('Component of the passed value is not a tuple! Instead, it is: '
                            + str(type(comp_data_clean[i])))
    # unzip data
    x_comp, y_comp = zip(*comp_data_clean)
    # interpolate data
    comp_int = interpolate.interp1d(x_comp, y_comp, kind='linear')
    # 'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
#     'previous', 'next',
# cubic and quadratic dont work
    # define ranges
    comp_range = np.arange(int(min(x_comp))+1, int(max(x_comp)), 1)
    # run interpolations
    y_comp_interp = comp_int(comp_range)
    # zip interpolated values
    comp_data_int = list(zip(comp_range, y_comp_interp))
    return comp_data_int
def sum_spectra(comp1_data_int, comp2_data_int):
    """
    Function that adds the interpolated values for two spectra together.
    Args:
        comp1_data_int (list): list of tuples containing all the non-repeated
                               x and y values for compound 1
        comp2_data_int (list): list of tuples containing all the non-repeated
                               x and y values for compound 2
    Returns:
        x_combined (list): list of summed x-values across the range of
                           of the two compounds
        y_combined (list): list of summed y-values across the range of
                           of the two compounds
    """
    # handling errors in inputs
    if not isinstance(comp1_data_int, list):
        raise TypeError('Passed value of `comp1_data_int` is not a list! Instead, it is: '
                        + str(type(comp1_data_int)))
    # add the two spectra
    combined = sorted(comp1_data_int + comp2_data_int)
    # add by like
    same_x = {x:0 for x, _ in combined}
    for name, num in combined:
        same_x[name] += num
    sum_combined = list(map(tuple, same_x.items()))
    # unzip
    x_combined, y_combined = zip(*sum_combined)
    # set as arrays
    x_combined = np.asarray(x_combined)
    y_combined = np.asarray(y_combined)
    return x_combined, y_combined
def combine_spectra(df,compound_1, compound_2, plot=False):
    """
    Wrapping function that sums two compounds from shoyu_data_dict.p
    together. There is an optional plotting function embedded.
    Args:
         compound_1 (str): dictionary key for the compound in shoyu_data_dict.p
         compound_2 (str): dictionary key for the compound in shoyu_data_dict.p
         plot (boolean): (Optional) This argument is used to dictate whether or not you
                         would like to output a plot which shows the combined spectra,
                         as well as the two original spectra, overlaid on the same plot.
                         Defaults to False.
     Returns:
         x_combined (numpy array): The x-values of the new spectra that contains the
                                   combined values of the two spectra that were input.
         y_combined (numpy array): The y-values of the new spectra that contains the
                                   combined values of the two spectra that were input.
    """
    # handling errors in inputs
#     if not isinstance(compound_1, str):
#         raise TypeError("Passed value of `compound_1` is not a dictionary! Instead, it is: "
#                         + str(type(compound_1)))
#     if not isinstance(compound_2,str):
#         raise TypeError("Passed value of `compound_2` is not a dictionary! Instead, it is: "
#                         + str(type(compound_1)))
    data1 = clean_spectra(df,compound_1)
    data2 = clean_spectra(df,compound_2)
    maxwavenumber = len(df['Wavelength'])
    comp1_data_int = interpolate_spectra(data1)
    comp2_data_int = interpolate_spectra(data2)
    x_combined, y_combined = sum_spectra(comp1_data_int, comp2_data_int)
    y_combined = pd.DataFrame(y_combined)
    column = str(compound_1)+'+'+str(compound_2)
    print(column)
    if plot:
        # plot original data and combined plot
        plt.figure(figsize=(15, 5))
        plt.plot([i[0] for i in data1], [i[1] for i in data1], 'b--', label=df[compound_1])
        plt.plot([i[0] for i in data2], [i[1] for i in data2], 'g--', label=df[compound_2])
        plt.plot(x_combined, y_combined, 'r', label='Combination', linewidth=2, alpha=0.7)
        plt.legend()
        plt.xlabel('cm$^{-1}$', fontsize=14)
        plt.ylabel('Absoprtion', fontsize=14)
    return y_combined,column

In [None]:
len(df['Wavelength'])

In [None]:
# key_list = ['Hydrogen','Sapphire','CarbonDioxide','CarbonMonoxide','Formic Acid','Water','Acetaldehyde','Ethane','Methane','Propane']
# frames = [ combine_spectra(df, key_x, key_y, plot=False)[0] for key_x in key_list for key_y in key_list ]
# columns = [ combine_spectra(df, key_x, key_y, plot=False)[1] for key_x in key_list for key_y in key_list ]

In [None]:
# frames

In [None]:
# columns

In [None]:
key_list =list(hdf5.keys())
from sklearn import preprocessing
# create the Labelencoder object
le = preprocessing.LabelEncoder()
#convert the categorical columns into numeric
encoded_value = le.fit_transform(key_list)
print(encoded_value)
print(key_list)

In [None]:
# result = pd.concat(frames,axis=1, join='outer', join_axes=None, ignore_index=False,
#           keys=columns, levels=None, names=None, verify_integrity=False,
#           copy=True,sort=True)

In [None]:
# result = result.join(df['Wavelength'])

In [None]:
# result

In [None]:
# result.to_excel("double.xlsx",sheet_name='Sheet_name_1')

In [None]:
# df = pd.read_excel('../examples/double.xlsx')

In [None]:
# df

In [None]:
# combine_spectra(df, 'Hydrogen+Hydrogen','Hydrogen+Formic Acid', plot=True)

In [None]:
# key_list = columns
# frames = [ combine_spectra(df, key_x, key_y, plot=False)[0] for key_x in key_list for key_y in key_list ]
# columns = [ combine_spectra(df, key_x, key_y, plot=False)[1] for key_x in key_list for key_y in key_list ]


In [None]:
# result

In [None]:
# columns

In [None]:
hdf5_calfilename = 'ML_quad_calibration-Copy2.hdf5' #update to hdf5_calfilename
hdf5_expfilename = 'ML_quad_calibration-Copy2.hdf5'
frames = []
for _, key in enumerate(key_list):
    df =peakidentify.peak_assignment(hdf5_expfilename, key, hdf5_calfilename, 50, plot =False)
    frames.append(df)

In [None]:
frames

In [None]:
df.T

In [None]:
result = pd.concat(frames,axis=0, join='outer', join_axes=None, ignore_index=False,
          keys=None, levels=None, names=None, verify_integrity=False,
          copy=True,sort=True)

In [None]:
result

In [None]:
# result.to_excel("triple.xlsx",sheet_name='Sheet_name_1')