In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pickle
from ramannoodles import spectrafit
from ramannoodles import shoyu
import pandas as pd
import math
from ramannoodles import peakidentify
from ramannoodles import test_peakidentify

ImportError: cannot import name 'test_peakidentify'

In [None]:
#If you re-call the method, it will tell you that the files are already downloaded.
shoyu.initialize_standard_library()
# open spectra library
shoyu_data_dict = pickle.load(open('../raman_spectra/shoyu_data_dict.p', 'rb'))
# list keys
sorted(shoyu_data_dict.keys())

In [None]:
compound_1 = shoyu_data_dict['WATER']
compound_2 = shoyu_data_dict['CARBON MONOXIDE']
spectra_x, spectra_y = shoyu.combine_spectra(compound_1, compound_2, plot = True)

In [None]:
def peak_1D_score(rowA,rowB,scoremax):
    """
    Returns scores with respect to the repricoal of the 
    calculated Euclidean distance between peaks
    #√((x1-x2)^2) in 1D
    #√((x1-x2)^2 + (y1-y2)^2) in 2D

    Parameters:
        row A (list):  input list
        row B (list): input list
        scoremax (float): Euclidean reciprocal score divided by max score

    Returns:
        scores (list): Euclidean reciprocal scores
        peaks (tuple): peaks associated with scores
    """
    scores = []
    peaks=[]
    

    for i in range(len(rowA)):
        for j in range(len(rowB)):
            distance = np.where((rowA[i] - rowB[j]>50),np.nan,math.sqrt(sum([math.pow(rowA[i] - rowB[j], 2)])))
            if (1/(distance + 1)>.02): # Score for peaks less than 50 units apart
                scores.append(((1/(distance + 1))/scoremax))
                peaks.append((rowA[i],rowB[j]))
            else:
                pass
    return scores,peaks

def score_max(list_input, row,k):
    """
    Returns list of scores sorted with respect to the peaks related to its output max score

    Parameters:
        list_input (list):  input list
        row (list): input list
        k (int): input integer used to sort the scores / kth highest score

    Returns:
        maxscores (list): Euclidean reciprocal score divided by max score
        maxpeaks (tuple): peaks associated with max scores
    """
    try:
        scoremax= sorted(set(peak_1D_score(list_input,row,1)[0][:]))[-k]
        maxscores,maxpeaks = peak_1D_score(list_input,row,scoremax)
    
    except Exception as e:
        
        maxscores,maxpeaks = peak_1D_score(list_input,row, scoremax=1)
        
    return maxscores,maxpeaks
def score_sort(list_input, row,k):
    """
    Returns list of scores sorted

    Parameters:
        list_input (list):  input list
        row (list): input list
        k (int): input integer used to sort the scores / kth highest score

    Returns:
        sortedscores (list): sorted Euclidean distances
    """
    sortedscores = []
    sortedscores.append(score_max(list_input,row,k))
    return sortedscores

In [None]:
def test_peak_1D_score():
    """Evaluates the functionality of the peak_1D_score function"""
    # Initialize the test arguments 
    row_i=[0,1]
    row_j=[2,1]
    rowcat=row_i+row_j
    ArrayA=np.array([[0,1], [2,1],[0,3]])
#     print(type(ArrayA))
    # Run Function for lists
    testscore=peak_1D_score(row_i,row_j,1)[0][:]
#     print(testscore)
    testpeaks=peak_1D_score(row_i,row_j,1)[1][:]
#     print(testpeaks)
    # Run Function for arrays
    Arrayscore=peak_1D_score(ArrayA[0],ArrayA[2],1)[0][:]
    print(Arrayscore)
    print(ArrayA[0])
    arraycat=np.concatenate((ArrayA[0],ArrayA[2]))
    print(arraycat)
    # make assertions
    assert len(row_i) == len(row_j), 'Input lengths do not match'
    assert len(Arrayscore) == len(arraycat), 'Output list length different than concatenated lists length'
    for i in range(len(rowcat)):
        assert testscore[i] <= 1, 'Output value outside acceptable range'

def test_score_max():
    """Evaluates the functionality of the score_max function"""
    # Initialize the test arguments 
    row_i=[0,1]
    row_j=[2,1]
    rowcat=row_i+row_j
    ArrayA=np.array([[0,1], [2,1],[0,3]])
    print(type(ArrayA))
    k=2
    arraycat=np.concatenate((ArrayA[0],ArrayA[1]))
    print(arraycat)
    # Run Function
    maxscores,maxpeaks = score_max(row_i,row_j,k)
    Arrmaxscores,Arrmaxpeaks = score_max(ArrayA[0],ArrayA[1],k)
    # make assertions
    assert len(Arrmaxscores) == len(arraycat), 'Output list length different than array length'
    for i in range(len(arraycat)):
        assert Arrmaxscores[i] <= 2, 'Output value outside acceptable range'
        
def test_score_sort():
    """Evaluates the functionality of the score_sort function"""
    # Initialize the test arguments 
    row_i=[0,1]
    row_j=[2,1]
    rowcat=row_i+row_j
    ArrayA=np.array([[0,1], [2,1],[0,3]])
    k=2
    arraycat=np.concatenate((ArrayA[0],ArrayA[1]))
    # Run Previous Function to get max score normalization
    maxscores,maxpeaks = score_max(row_i,row_j,k)
    # Run Function
    sortedscores=score_sort(row_i,row_j,max(maxscores))[0][0]
    Arrsortedscores=score_sort(ArrayA[0],ArrayA[1],max(maxscores))[0][0]
    print(max(maxscores))
    print(Arrsortedscores)
    # make assertions
    assert len(arraycat) == len(Arrsortedscores), 'Output list length different than concatenated lists length'

In [None]:
peakidentifytest_peak_1D_score()
# I feel like most of my thing could be a pairwise correlation function

In [None]:
test_score_max()

In [None]:
test_score_sort()

In [None]:
compound_1 = shoyu_data_dict['WATER']
x_water = compound_1['x']
y_water = compound_1['y']
compound_2 = shoyu_data_dict['CARBON MONOXIDE']
x_co = compound_1['x']
y_co = compound_1['y']
compound_3 = shoyu_data_dict['CARBON DIOXIDE']
x_co2 = compound_1['x']
y_co2 = compound_1['y']
peaks_centers1 = spectrafit.compound_report(compound_1)[0]
print(peaks_centers1)
peaks_centers2 = spectrafit.compound_report(compound_2)[0] 
peaks_centers3 = spectrafit.compound_report(compound_3)[0] 
centerlist= [peaks_centers1,peaks_centers2,peaks_centers3]
print(centerlist)

In [None]:
combined_x12,combined_y12 = shoyu.combine_spectra(compound_1,compound_2, plot = True)

In [None]:
data_peaks_combine12 = spectrafit.data_report(combined_x12,combined_y12)[0]
peak_1D_score(peaks_centers1,data_peaks_combine12,1)
peak_1D_score(centerlist[0],data_peaks_combine12,1)
peak_1D_score(peaks_centers1,data_peaks_combine12,max(peak_1D_score(peaks_centers1,data_peaks_combine12,1)[0][:]))

In [None]:
peak_1D_score(peaks_centers1,data_peaks_combine12,1)

In [None]:
peak_1D_score(centerlist[0],data_peaks_combine12,1)

In [None]:
peak_1D_score(peaks_centers1,data_peaks_combine12,max(peak_1D_score(peaks_centers1,data_peaks_combine12,1)[0][:]))

In [None]:
peak_1D_score(peaks_centers1,data_peaks_combine12,sorted(set(peak_1D_score(peaks_centers1,data_peaks_combine12,1)[0][:]))[-2])

In [None]:
combined_x13,combined_y13  = shoyu.combine_spectra(compound_1, compound_3, plot = True)

In [None]:
combined_x23,combined_y23  = shoyu.combine_spectra(compound_2, compound_3, plot = True)

In [None]:
data_peaks_combine12 = spectrafit.data_report(combined_x12,combined_y12)


In [None]:
data_peaks_combine13 = spectrafit.data_report(combined_x13,combined_y13)


In [None]:
data_peaks_combine23 = spectrafit.data_report(combined_x23,combined_y23)


In [None]:
combinedlist=[data_peaks_combine12,data_peaks_combine13,data_peaks_combine23]

In [None]:
print(score_sort(data_peaks_combine12[0],peaks_centers1,1))

In [None]:
print(peaks_centers1)
print(data_peaks_combine12)
print(data_peaks_combine12[0])

In [None]:
print(peaks_centers2)
print(list(combinedlist[0][:][1]))
print(centerlist[0])

In [None]:
data=score_sort(data_peaks_combine13[0],peaks_centers1,2)[0][1][:]
print(data)

In [None]:

compdf = pd.DataFrame(data=score_sort(data_peaks_combine12[0],peaks_centers1,2)[0][0][:],columns=['WATER_vs_CO_WATER_Scores'])
compdf=compdf.assign(WATER_vs_CO_WATER_Peaks=score_sort(data_peaks_combine12[0],peaks_centers1,2)[0][1][:])
compdf2=pd.DataFrame(data=score_sort(data_peaks_combine12[0],peaks_centers2,2)[0][0][:],columns=['CO_vs_CO_WATER_Scores'])
compdf2=compdf2.assign(CO_vs_CO_WATER_scores=score_sort(data_peaks_combine12[0],peaks_centers2,2)[0][1][:])
data=score_sort(data_peaks_combine13[0],peaks_centers1,2)
print(data)


In [None]:
print(compdf)
print(compdf2)

In [None]:
k_range = range(1,len(data_peaks_combine12))
for k in k_range:
    compdf = pd.DataFrame(data=score_sort(data_peaks_combine12[0],peaks_centers1,k)[0][0][:],columns=['Score for max peak #'+str(k)])
    compdf=compdf.assign(peaks=score_sort(data_peaks_combine12[0],peaks_centers1,k)[0][1][:])
    print(compdf)

In [None]:
k_range = range(1,len(data_peaks_combine23))
for i in range(len(combinedlist)):
    for j in range(len(centerlist)):
        for k in k_range:
            compdf = pd.DataFrame(data=score_sort(centerlist[j],combinedlist[0][:][i],k)[0][0][:],columns=['Score for max peak k#'+str(k)])
            compdf=compdf.assign(peaks=score_sort(centerlist[j],combinedlist[0][:][i],k)[0][1][:])
            print(compdf)

In [None]:
data=score_sort(centerlist[0],combinedlist[0][:][0],1)[0]
print(data)

In [None]:
scores=score_sort(centerlist[0],combinedlist[0][:][0],1)[0][0][:]
peaks=score_sort(centerlist[0],combinedlist[0][:][0],1)[0][1][:]
print(peaks)
compdf = pd.DataFrame(data=scores,columns=['WATER_comp_CO_Scores'])
compdf=compdf.assign(WATER__comp_CO_Peaks=peaks)
scores=score_sort(centerlist[0],combinedlist[0][:][0],1)[0][0][:]
peaks=score_sort(centerlist[0],combinedlist[0][:][0],1)[0][1][:]
print(peaks)
compdf=compdf.assign(Peaks=peaks)
print(compdf)

In [None]:
compdf = pd.DataFrame(data=scores,columns=['WATER_comp_CO_Scores'])

In [None]:
# you will need to download the file yourself from the team google drive and edit location
df = pd.read_excel('../examples/FormicAcid_3percentconc_400C_5s_00000.xlsx', names=('x', 'y'))

In [None]:
fig = plt.figure(figsize=(6,4), dpi = 300)
x_data = df['x'].values
y_data = df['y'].values

In [None]:
# Exp_peaks = spectrafit.data_report(x_data, y_data)a

In [None]:
compound_1 = shoyu_data_dict['WATER']
compound_2 = shoyu_data_dict['CARBON MONOXIDE']
compound_3 = shoyu_data_dict['ETHYL ALCOHOL']
compound_4 = shoyu_data_dict['FORMIC ACID']
compound_5 = shoyu_data_dict['CARBON DIOXIDE']
H2O_CO_x, H2O_CO_y = shoyu.combine_spectra(compound_1, compound_2, plot = True)
H2O_CO2_x, H2O_CO2_y = shoyu.combine_spectra(compound_1, compound_5, plot = True)

In [None]:
H2O_CO_x = np.asarray(H2O_CO_x)
H2O_CO_y = np.asarray(H2O_CO_y)
H2O_CO2_x = np.asarray(H2O_CO2_x)
H2O_CO2_y = np.asarray(H2O_CO2_y)

In [None]:
water_peaks = spectrafit.compound_report(compound_1)[0]
co_peaks = spectrafit.compound_report(compound_2)[0]
co2_peaks = spectrafit.compound_report(compound_5)[0]
H2O_CO_peaks = spectrafit.data_report(H2O_CO_x, H2O_CO_y)
print(H2O_CO_peaks)
H2O_CO2_peaks = spectrafit.data_report(H2O_CO2_x, H2O_CO2_y)


In [None]:
print(H2O_CO2_peaks)

In [None]:
fig = plt.figure(figsize=(6,4), dpi = 300)
plt.plot(x_data, y_data, color = 'blue',label ='Experimental')
#plt.plot(H2O_CO_x, H2O_CO_y, color = 'red', label = 'Sample Spectra')
plt.xlabel('cm$^{-1}$', fontsize=14)
plt.ylabel('Absoprtion', fontsize=14)