## Pipeline Flavor Profile

In [880]:
import pandas as pd
import numpy as np
from fuzzywuzzy import fuzz
import csv
import numpy as np


In [881]:
'''
does convert a pandas dataframe into a matrix where 
keys = row name
values = col name
data = either 0 or 1 
'''
def dict_to_matrix(dictionary):
    unique_values = set(val for sublist in dictionary.values() for val in sublist)
    data_dict = {key: {val: 1 if val in values else 0 for val in unique_values} for key, values in dictionary.items()}
    df = pd.DataFrame(data_dict).T
    return(df)


In [882]:
'''
find overlapp between to lists with a threshold (=70)
'''
def overlapping_elements(list1, list2, threshold=70):
    overlap = []
    for elem1 in list1:
        for elem2 in list2:
            ratio = fuzz.ratio(elem1, elem2)
            if ratio >= threshold:
                overlap.append(elem1)
    return overlap

In [883]:
'''
Convert DataFrame to dictionary
'''
def dataframe_to_dict(df):
    result_dict = {}
    for index, row in df.iterrows():
        if row[0] in result_dict:
            result_dict[row[0]].append(row[1])
        else:
            result_dict[row[0]] = [row[1]]
    return result_dict     

In [884]:
'''
find match between dictionary entries and the rows of a dataframe
'''
def find_matching_entries(dictionary,df):
    threshold = 80
    found_match = []
    found_match_per_entry = {}
    for key, value in dictionary.items():
        all_matches = set()
        for index, row in df.iterrows():
            for keyword in value:
                if fuzz.ratio(keyword, str(index).lower()) >= threshold:
                    all_matches.add(index)
                    found_match.append(row)
        found_match_per_entry[key] = all_matches
    found_match = pd.DataFrame(found_match)
    found_match = found_match.drop_duplicates()
    return(found_match, found_match_per_entry)

In [885]:
'''
create a new matrix by finding matches and filtering right columns
'''
def create_matrix_from_dict(dictionary,df, index):
    matrix = pd.DataFrame(columns=list(df), index=index)
    final_dict = {}
    for key, value in dictionary.items():
        current_val = []
        for v in value:
            for index, row in df.iterrows():
                if index == v:
                    current_val.append(','.join(row.loc[row.eq(1)].index.tolist()))
                    # todo: maybe exception -> do change if two ones in a row
                    matrix.loc[key][row.loc[row.eq(1)].index.tolist()] = 1
        final_dict[key] = set(current_val)
    matrix = matrix.fillna(0)
    matrix = matrix.loc[:, (matrix != 0).any(axis=0)]
    return(final_dict, matrix)
   

#### 1. MS Data ----- Massbank -----> Molecules

-> Chris

save which intensities belong to which molecule and give each the % 
2 approaches for one molecule with multiple OQs
1.  each gets same % 
2.  divide % by count of OQs per molecule

<span style="color:lightblue">mol_int</span> : dict where key is the molecule name and value is the intensites from the mass spec data

In [886]:
# DUMMY DATA
intensities = [80, 40, 5]
ms_cas =['(E)-3-hexenol', "coumarin",'citral']
mol_int = {'(E)-3-hexenol': 80, "coumarin": 40 ,'citral': 5}

In [887]:
# actual ms data
file_path = 'data/cas_intensities.csv'
data = pd.read_csv(file_path, delimiter=',')
# only use first repliate
data = data[data["replicate"]==1]

In [888]:
sample_key = "000920"

In [889]:
used_data = data[data["sample_key"]==sample_key]
#filter columns that are 0
used_data = used_data.loc[:, (used_data != 0).any(axis=0)]
used_data

Unnamed: 0,sample_key,replicate,6485-40-1,3033-23-6,21284-22-0,38427-78-0,495-61-4,29873-99-2,6909-30-4,99-49-0,...,13466-78-9,483-76-1,36564-42-8,705-86-2,2825-91-4,713-95-1,20307-84-0,120021-96-7,586-62-9,710-04-3
0,920,1,194354200.0,1240854000.0,104829500.0,90873190.0,183122300.0,183122300.0,604309100.0,194354200.0,...,252885400.0,183122300.0,104829500.0,464570600.0,464570600.0,189377700.0,183122300.0,183122300.0,252885400.0,266212400.0


In [890]:
'''# find rows with biggest variances
df = used_data
row_combinations = list(itertools.combinations(df.index, 2))
distances = [abs(df.loc[i[0]] - df.loc[i[1]]).sum() for i in row_combinations]
max_distance_indices = row_combinations[distances.index(max(distances))]
print(df.loc[max_distance_indices[0]])
print(df.loc[max_distance_indices[1]])
#=> sample: 000879 and 000373'''

'# find rows with biggest variances\ndf = used_data\nrow_combinations = list(itertools.combinations(df.index, 2))\ndistances = [abs(df.loc[i[0]] - df.loc[i[1]]).sum() for i in row_combinations]\nmax_distance_indices = row_combinations[distances.index(max(distances))]\nprint(df.loc[max_distance_indices[0]])\nprint(df.loc[max_distance_indices[1]])\n#=> sample: 000879 and 000373'

In [891]:
n=150

In [892]:
used_data = used_data.drop(columns=['sample_key', 'replicate'])
largest_entries = used_data.iloc[0].nlargest(n)
#largest_entries = np.log1p(used_data.iloc[0].nlargest(n))

# Create a new DataFrame with only the largest entries
data_top50 = pd.DataFrame({'Intensities': largest_entries})
data_top50

Unnamed: 0,Intensities
3033-23-6,1.240854e+09
876-17-5,1.240854e+09
07.11.5258,1.240854e+09
21662-09-9,1.240854e+09
491-04-3,1.240854e+09
...,...
68039-26-9,2.996598e+08
112-42-5,2.996598e+08
39638-67-0,2.984695e+08
80041-01-6,2.984695e+08


In [893]:
ms_cas = list(data_top50.index)

In [894]:
intensities = list(data_top50['Intensities'])

<span style="color:#FBB714">I_vector</span> : dataframe with *molecules x intensities*

In [895]:
#I_vector = pd.DataFrame(data = mol_int, index = ["intensities"]).T
I_vector = pd.DataFrame(data= intensities,index = ms_cas, columns=["intensities"])
I_vector

Unnamed: 0,intensities
3033-23-6,1.240854e+09
876-17-5,1.240854e+09
07.11.5258,1.240854e+09
21662-09-9,1.240854e+09
491-04-3,1.240854e+09
...,...
68039-26-9,2.996598e+08
112-42-5,2.996598e+08
39638-67-0,2.984695e+08
80041-01-6,2.984695e+08


#### 2. Molecules ----- Flavornet -----> OD 
Find ODs by matching molecules with databases using Flavornet

<span style="color:lightblue">CAS_to_odorants.csv</span> : table with following columns - molecule, CAS, Mol wt, odorant


##### table manipualtion done in R

```r
library(rlang)
library(tidyverse)

CAS_to_odorants <- read.table("~/Bioinformatik_20.21/Bachelorarbeit/CAS_to_mol.txt", sep = "\t", header = FALSE)
OD_to_mol <- read.table("~/Bioinformatik_20.21/Bachelorarbeit/OD_mol.txt", sep = "\t", header = FALSE)

OD_to_mol_sep <- OD_to_mol %>%
  separate_rows(V2, sep = ";\\s*") %>%
  mutate(V2 = trimws(V2))
OD_to_mol_final <- OD_to_mol_sep[, c("V2","V1")]
colnames(OD_to_mol_final) <- c("molecule", "OD")

CAS_to_odorants_sep <- CAS_to_odorants %>%
  separate_rows(V3, sep = ";\\s*") %>%
  mutate(V3 = trimws(V3))
colnames(CAS_to_odorants_sep) <- c("CAS","Mol Wt", "molecule")


merged_df <- merge(CAS_to_odorants_sep, OD_to_mol_final, by = "molecule")
write.csv(merged_df, file = "~/Bioinformatik_20.21/Bachelorarbeit/CAS_mol_OD.csv", row.names = FALSE)
```

*Flavornet only*

<span style="color:lightblue">mol_to_OD</span> : dict where key is the molecule and the descriptors for these molecules are the values

In [896]:
'''
read flavornet data and save it in mol_to_OD
'''
# only using flavornet data
mol_to_OD = {}
descriptors = []
CAS_numbers= []
with open('./data/CAS_mol_OD.csv', 'r',encoding='utf-8') as tabfile:
    reader = csv.reader(tabfile, delimiter=',')
    for row in reader: 
        CAS = row[1]
        OD = row[3]
        CAS_numbers.append(CAS)
        descriptors.append(OD)
        if CAS in mol_to_OD:
            mol_to_OD[CAS].add(OD)
        else:
            mol_to_OD[CAS]  = set([OD])


CAS_numbers = list(set(CAS_numbers))
# list of all individual descriptors/ODs
descriptors = list(set(descriptors))

<span style="color:#FBB714">D</span>: Dataframe with all *molecules x ODs* - rigth now only flavornet database

In [897]:
D = pd.DataFrame(index=CAS_numbers, columns=descriptors)

In [898]:
len(CAS_numbers)

732

In [899]:
for col in D.columns:
    for index, row in D.iterrows():
        # Check if value in  current row matches any entry in the dictionary
        if index in mol_to_OD.keys() and col in mol_to_OD[index]:
            # Set to 1
            D.at[index, col] = 1

D = D.fillna(0)
D

Unnamed: 0,pear,butter,baked,urine,paint,sweet,cotton candy,hummus,crushed bug,pea,...,potato,marshmallow,melon,cologne,cooked meat,caramel,tart lime,hazelnut,grass,plastic
122-00-9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
105-54-4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28588-75-2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
120-72-9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
104-61-0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118-61-6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
143-07-7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
24415-26-7,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
105-66-8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


match molecules from MS data with flavors

In [900]:
len(ms_cas)

150

In [901]:
flavors = []
mol_flavors = {}
for mol in ms_cas:
    if mol in mol_to_OD.keys():
        flavors += mol_to_OD[mol]
        mol_flavors[mol] = mol_to_OD[mol]
    else:
        mol_flavors[mol] = {}

found_flavors = set(flavors)



<span style="color:lightblue">mol_flavors</span>: contains the molecule from where each OD is coming from 

In [902]:
mol_flavors

{'3033-23-6': {'rose', 'sweet'},
 '876-17-5': {'flower', 'green'},
 '07.11.5258': {'flower'},
 '21662-09-9': {'green', 'must'},
 '491-04-3': {'herb'},
 '562-74-3': {'must', 'nutmeg', 'turpentine'},
 '470-67-7': {'spice'},
 '470-82-6': {'mint', 'sweet'},
 '2497-25-8': {'orange', 'tallow'},
 '3913-71-1': {'orange', 'tallow'},
 '3913-81-3': {'orange', 'tallow'},
 '4861-58-9': {'fruit', 'sweet'},
 '586-82-3': {'must'},
 '105683-99-6': {'cucumber', 'green', 'tallow'},
 '147159-48-6': {'cucumber', 'green', 'tallow'},
 '29887-38-5': {'herb'},
 '507-70-0': {'camphor'},
 '465-31-6': {'camphor'},
 '15537-55-0': {'balsamic'},
 '106-23-0': {'fat'},
 '619-01-2': {'mint', 'spice'},
 '128386-31-2': {'metal'},
 '1632-73-1': {'camphor'},
 '106-24-1': {'geranium', 'rose'},
 '124-76-5': {'camphor', 'must'},
 '18675-35-9': {'spice', 'wood'},
 '16750-94-0': {'rose'},
 '5944-20-7': {'rose'},
 '498-16-8': {'herb'},
 '78-70-6': {'flower', 'lavender'},
 '89-80-5': {'fresh', 'green', 'mint'},
 '10458-14-7': {'f

<span style="color:lightblue">found_flavors</span>: list of the ODs found from the identified molecules 


In [903]:
found_flavors

{'alkane',
 'almond',
 'anise',
 'apple',
 'apple peel',
 'balsamic',
 'banana',
 'beet',
 'bread',
 'burnt',
 'burnt sugar',
 'butterscotch',
 'camphor',
 'caramel',
 'caraway',
 'cheese',
 'citrus',
 'clove',
 'coconut',
 'coriander',
 'cotton candy',
 'cucumber',
 'dill',
 'dust',
 'earth',
 'ester',
 'fat',
 'flower',
 'fresh',
 'fried',
 'fruit',
 'geranium',
 'grape',
 'grapefruit',
 'green',
 'herb',
 'hot milk',
 'hummus',
 'hyacinth',
 'lactone',
 'lavender',
 'lemon',
 'licorice',
 'mandarin',
 'medicine',
 'metal',
 'mildew',
 'mint',
 'mold',
 'mothball',
 'mushroom',
 'must',
 'nut',
 'nutmeg',
 'oil',
 'orange',
 'pea',
 'peach',
 'peppermint',
 'phenol',
 'pungent',
 'rancid',
 'roast',
 'rose',
 'rubber',
 'seaweed',
 'smoke',
 'soap',
 'solvent',
 'spearmint',
 'spice',
 'straw',
 'sweat',
 'sweet',
 'tallow',
 'turpentine',
 'vanilla',
 'walnut',
 'warm',
 'wax',
 'wine',
 'wood'}

<span style="color:#FBB714">OD</span>: Dataframe - *molecules x ODs*

In [904]:
# filters only relevant rows - only those ODs coming from indentified molecules
OD = D[(D.index).isin(I_vector.index)]
# get only important/matching columns
OD = OD.loc[:, (OD != 0).any(axis=0)]
OD 

Unnamed: 0,sweet,cotton candy,hummus,pea,solvent,alkane,burnt,roast,mold,ester,...,lavender,green,caraway,burnt sugar,mandarin,peach,mildew,oil,orange,caramel
120-72-9,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
134346-43-3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
65767-22-8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
72755-76-1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
113486-29-6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6378-65-0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
5989-33-3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
142-92-7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
705-86-2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


#### 3. OD ----------> OQ


### Whisky Research institute

In [905]:
Research_data = pd.read_csv('./data/swri_flavorwheel.csv', sep=';')
Research_OQ_OD =  Research_data.drop(columns=Research_data.columns[0])


In [906]:
OD_OQ = dict_to_matrix(dataframe_to_dict(Research_OQ_OD)).T

<span style="color:#FBB714">OD_OQ</span>: Dataframe - all ODs x OQs from research institute

In [907]:
OD_OQ

Unnamed: 0,Burnt,Smoky,Medicinal,Leathery,Tobacco,Sweaty,Dry cereals,Wet cereals,leafy,Herbal,...,Solvently,Soapy,Sweet,Woody,Nutty,Spicy,Oily,Sour,Sulphury,Stale
TCP,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
leather upholstery,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Burnt toast,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Green/grassy,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Raisins,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Tyres,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
spent fireworks,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Geranium leaves,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Horse saddle,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [908]:
# find matching OQs to found ODs from massspec x flavornet
matching_OQs, matching_OQs_dict = find_matching_entries(mol_flavors, OD_OQ)

In [909]:
# get only important columns
matching_OQs = matching_OQs.loc[:, (matching_OQs != 0).any(axis=0)]
matching_OQs

Unnamed: 0,Sweaty,Green vegetables,Fresh flowers,Fresh fruit,Citrus,Artificial fruit flavourings,Toffee,Nuts,Dried spice,Vomit,Earthy,Fruity,Solvently,Soapy,Sweet,Woody,Spicy,Oily
Roses,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Sweet,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
nutmeg,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
Spicy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
oranges,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
Fruity,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
cucumber,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Woody,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
Oily,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
Butterscotch,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [910]:
len(matching_OQs)

18

In [911]:
# to know which OD comes from which molecule
#remove empty entries
matching_OQs_dict = {key: value for key, value in matching_OQs_dict.items() if value}
matching_OQs_dict

{'3033-23-6': {'Roses', 'Sweet'},
 '562-74-3': {'nutmeg'},
 '470-67-7': {'Spicy'},
 '470-82-6': {'Sweet'},
 '2497-25-8': {'oranges'},
 '3913-71-1': {'oranges'},
 '3913-81-3': {'oranges'},
 '4861-58-9': {'Fruity ', 'Sweet'},
 '105683-99-6': {'cucumber'},
 '147159-48-6': {'cucumber'},
 '619-01-2': {'Spicy'},
 '106-24-1': {'Roses'},
 '18675-35-9': {'Spicy', 'Woody'},
 '16750-94-0': {'Roses'},
 '5944-20-7': {'Roses'},
 '78-70-6': {'lavender'},
 '106-25-2': {'Sweet'},
 '18479-68-0': {'Fruity '},
 '17699-16-0': {'Woody'},
 '98-55-5': {'Oily'},
 '20407-84-5': {'Sweet'},
 '1189-09-9': {'Fruity '},
 '3943-74-6': {'Butterscotch'},
 '589-75-3': {'Fruity '},
 '112-17-4': {'Oily', 'oranges'},
 '50862-12-9': {'Apples'},
 '21391-99-1': {'Woody'},
 '1197-07-5': {'Solvently'},
 '92760-25-3': {'Lemons'},
 '2785-89-9': {'Clove', 'Spicy'},
 '5392-40-5': {'Lemons'},
 '141-27-5': {'Lemons'},
 '58615-39-7': {'Fruity ', 'Sweet'},
 '1195-92-2': {'Fruity '},
 '106-26-3': {'Lemons'},
 '1786-08-9': {'Oily'},
 '31

<span style="color:#FBB714">OQs</span>: Dataframe - ODs x OQs

In [912]:
OQ_dict, OQs = create_matrix_from_dict(matching_OQs_dict, matching_OQs,matching_OQs_dict.keys())

In [913]:
OQs

Unnamed: 0,Sweaty,Green vegetables,Fresh flowers,Fresh fruit,Citrus,Artificial fruit flavourings,Toffee,Nuts,Dried spice,Vomit,Earthy,Fruity,Solvently,Soapy,Sweet,Woody,Spicy,Oily
3033-23-6,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
562-74-3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
470-67-7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
470-82-6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2497-25-8,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
334-48-5,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
106-32-1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2051-50-5,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
68039-26-9,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [914]:
OQ_dict

{'3033-23-6': {'Fresh flowers', 'Sweet'},
 '562-74-3': {'Dried spice'},
 '470-67-7': {'Spicy'},
 '470-82-6': {'Sweet'},
 '2497-25-8': {'Citrus'},
 '3913-71-1': {'Citrus'},
 '3913-81-3': {'Citrus'},
 '4861-58-9': {'Fruity ', 'Sweet'},
 '105683-99-6': {'Green vegetables'},
 '147159-48-6': {'Green vegetables'},
 '619-01-2': {'Spicy'},
 '106-24-1': {'Fresh flowers'},
 '18675-35-9': {'Spicy', 'Woody'},
 '16750-94-0': {'Fresh flowers'},
 '5944-20-7': {'Fresh flowers'},
 '78-70-6': set(),
 '106-25-2': {'Sweet'},
 '18479-68-0': {'Fruity '},
 '17699-16-0': {'Woody'},
 '98-55-5': {'Oily'},
 '20407-84-5': {'Sweet'},
 '1189-09-9': {'Fruity '},
 '3943-74-6': {'Toffee'},
 '589-75-3': {'Fruity '},
 '112-17-4': {'Citrus', 'Oily'},
 '50862-12-9': {'Fresh fruit'},
 '21391-99-1': {'Woody'},
 '1197-07-5': {'Solvently'},
 '92760-25-3': set(),
 '2785-89-9': {'Spicy'},
 '5392-40-5': set(),
 '141-27-5': set(),
 '58615-39-7': {'Fruity ', 'Sweet'},
 '1195-92-2': {'Fruity '},
 '106-26-3': set(),
 '1786-08-9': {'

#### 4.  OQ -----------> OSA


<span style="color:#FBB714">OQ_OSA</span>: Dataframe - all OQs x OSAs from research institute

In [915]:
Research_OSA_OQ = Research_data.drop(columns=Research_data.columns[2])
OQ_OSA = dict_to_matrix(dataframe_to_dict(Research_OSA_OQ)).T

 #### 4.1 Binary Matrix

In [916]:
# find matching OQs to found ODs from massspec x flavornet
matching_OSAs, matching_OSAs_dict = find_matching_entries(OQ_dict, OQ_OSA)

In [917]:
len(matching_OSAs_dict)

65

In [972]:
matching_OSAs = matching_OSAs.loc[:, (matching_OSAs != 0).any(axis=0)]
matching_OSAs

Unnamed: 0,feinty,Green/grassy,Floral,Fruity,Solvently,Soapy,Sweet,Woody,Spicy,Sour,Stale
Sweet,0,0,0,0,0,0,1,0,0,0,0
Fresh flowers,0,0,1,0,0,0,0,0,0,0,0
Dried spice,0,0,0,0,0,0,0,0,1,0,0
Citrus,0,0,0,1,0,0,0,0,0,0,0
Green vegetables,0,1,0,0,0,0,0,0,0,0,0
Woody,0,0,0,0,0,0,0,1,0,0,0
Solvently,0,0,0,0,1,0,0,0,0,0,0
Soapy,0,0,0,0,0,1,0,0,0,0,0
Sweaty,1,0,0,0,0,0,0,0,0,0,0
Earthy,0,0,0,0,0,0,0,0,0,0,1


In [920]:
matching_OSAs_dict=  {key: value for key, value in matching_OSAs_dict.items() if value}

In [973]:
matching_OSAs_dict

{'3033-23-6': {'Fresh flowers', 'Sweet'},
 '562-74-3': {'Dried spice'},
 '470-67-7': {'Spicy'},
 '470-82-6': {'Sweet'},
 '2497-25-8': {'Citrus'},
 '3913-71-1': {'Citrus'},
 '3913-81-3': {'Citrus'},
 '4861-58-9': {'Fruity ', 'Sweet'},
 '105683-99-6': {'Green vegetables'},
 '147159-48-6': {'Green vegetables'},
 '619-01-2': {'Spicy'},
 '106-24-1': {'Fresh flowers'},
 '18675-35-9': {'Spicy', 'Woody'},
 '16750-94-0': {'Fresh flowers'},
 '5944-20-7': {'Fresh flowers'},
 '106-25-2': {'Sweet'},
 '18479-68-0': {'Fruity '},
 '17699-16-0': {'Woody'},
 '20407-84-5': {'Sweet'},
 '1189-09-9': {'Fruity '},
 '3943-74-6': {'Toffee'},
 '589-75-3': {'Fruity '},
 '112-17-4': {'Citrus'},
 '50862-12-9': {'Fresh fruit'},
 '21391-99-1': {'Woody'},
 '1197-07-5': {'Solvently'},
 '2785-89-9': {'Spicy'},
 '58615-39-7': {'Fruity ', 'Sweet'},
 '1195-92-2': {'Fruity '},
 '31823-43-5': {'Green vegetables'},
 '698-10-2': {'Spicy'},
 '4077-47-8': {'Sweet'},
 '4630-82-4': {'Artificial fruit flavourings', 'Fruity '},
 '2

<span style="color:#FBB714">OSAs</span>: Dataframe - OQs x OSAs

In [978]:
OSA_dict, OSAs = create_matrix_from_dict(matching_OSAs_dict, matching_OSAs,ms_cas)

In [979]:
print(OSAs)

            feinty  Green/grassy  Floral  Fruity   Solvently  Soapy  Sweet  \
3033-23-6        0             0       1        0          0      0      1   
876-17-5         0             0       0        0          0      0      0   
07.11.5258       0             0       0        0          0      0      0   
21662-09-9       0             0       0        0          0      0      0   
491-04-3         0             0       0        0          0      0      0   
...            ...           ...     ...      ...        ...    ...    ...   
68039-26-9       0             0       0        0          0      0      0   
112-42-5         0             0       0        0          0      0      0   
39638-67-0       0             0       0        0          0      0      0   
80041-01-6       0             0       0        0          0      0      0   
55013-32-6       0             0       0        0          0      0      0   

            Woody  Spicy  Sour  Stale  
3033-23-6       0      

In [992]:
from sklearn.preprocessing import Normalizer
normalizer = Normalizer()
I_vector_normalized = normalizer.fit_transform(I_vector.T).T
I_vector_normalized

array([[0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.1250692 ],
       [0.12453967],
       [0.12453967],
       [0.08075173],
       [0.08075173],
       [0.08075173],
       [0.08075173],
       [0.080

In [993]:
OSAs

Unnamed: 0,feinty,Green/grassy,Floral,Fruity,Solvently,Soapy,Sweet,Woody,Spicy,Sour,Stale
3033-23-6,0,0,1,0,0,0,1,0,0,0,0
876-17-5,0,0,0,0,0,0,0,0,0,0,0
07.11.5258,0,0,0,0,0,0,0,0,0,0,0
21662-09-9,0,0,0,0,0,0,0,0,0,0,0
491-04-3,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
68039-26-9,0,0,0,0,0,0,0,0,0,0,0
112-42-5,0,0,0,0,0,0,0,0,0,0,0
39638-67-0,0,0,0,0,0,0,0,0,0,0,0
80041-01-6,0,0,0,0,0,0,0,0,0,0,0


In [994]:
len(I_vector_normalized)

150

In [995]:
final_OSA = OSAs*I_vector_normalized
final_OSA

Unnamed: 0,feinty,Green/grassy,Floral,Fruity,Solvently,Soapy,Sweet,Woody,Spicy,Sour,Stale
3033-23-6,0.0,0.0,0.125069,0.0,0.0,0.0,0.125069,0.0,0.0,0.0,0.0
876-17-5,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
07.11.5258,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
21662-09-9,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
491-04-3,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
68039-26-9,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
112-42-5,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
39638-67-0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
80041-01-6,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0


In [996]:
final_OSA.sum(axis=0)*10

feinty          0.476931
Green/grassy    3.885389
Floral          5.002768
Fruity          4.946345
Solvently       0.609100
Soapy           0.510637
Sweet           8.782281
Woody           3.695653
Spicy           1.250692
Sour            0.302036
Stale           0.873368
dtype: float64

#### 4.2 Fuzzy Logic 
Use Fuzzy logic to weight OQ with expert knowledge/Intensity from Masspec


In [930]:
# list with OSA and corresponding OQs -> look at inner circle of flavorwheel
OSAs_corresponding_OQs = dataframe_to_dict(Research_OSA_OQ.drop_duplicates())
OSAs_corresponding_OQs

{'peaty': ['Burnt', 'Smoky', 'Medicinal', 'peaty'],
 'feinty': ['Leathery', 'Tobacco', 'Sweaty', 'feinty'],
 'cereal': ['Dry cereals', 'Wet cereals', 'cereal'],
 'Green/grassy': ['leafy', 'Herbal', 'Green vegetables', 'Green/grassy'],
 'Floral': ['Fresh flowers', 'Perfumed', 'Floral'],
 'Fruity ': ['Fresh fruit',
  'Dried fruit',
  'Citrus',
  'Artificial fruit flavourings',
  'Fruity '],
 'Solvently': ['Nail varnish remover', 'Solvently'],
 'Soapy': ['Unperfumed soap', 'Soapy'],
 'Sweet': ['Vanilla', 'Honey', 'Toffee', 'Sweet'],
 'Woody': ['New wood', 'Okay', 'Woody'],
 'Nutty': ['Coconut', 'Nuts', 'Toasted', 'Nutty'],
 'Spicy': ['Dried spice', 'Peppery', 'Spicy'],
 'Oily': ['Buttery ', 'Waxy', 'Oils/fats', 'Oily'],
 'Sour': ['Vinegary', 'Cheesy', 'Vomit', 'Sour'],
 'Sulphury': ['Cooked vegetables ',
  'Rubbery',
  'Struck match ',
  'Decaying',
  'Meaty ',
  'Sulphury'],
 'Stale': ['Earthy', 'Musty', 'Metallic', 'Stale']}

In [931]:
# all found OQs for our mass spec data
found_OQs =set(item for sublist in OQ_dict.values() for item in sublist)
found_OQs

{'Artificial fruit flavourings',
 'Citrus',
 'Dried spice',
 'Earthy',
 'Fresh flowers',
 'Fresh fruit',
 'Fruity ',
 'Green vegetables',
 'Nuts',
 'Oily',
 'Soapy',
 'Solvently',
 'Spicy',
 'Sweaty',
 'Sweet',
 'Toffee',
 'Vomit',
 'Woody'}

1. normalize OQs

In [932]:
CAS_intensities_OQs  = final_I_vector.copy()
CAS_intensities_OQs["OQs"]= OQ_dict
CAS_intensities_OQs

Unnamed: 0,intensities,OQs
3033-23-6,1240854000.0,"{Fresh flowers, Sweet}"
562-74-3,1240854000.0,{Dried spice}
470-67-7,1240854000.0,{Spicy}
470-82-6,1240854000.0,{Sweet}
2497-25-8,1240854000.0,{Citrus}
3913-71-1,1240854000.0,{Citrus}
3913-81-3,1240854000.0,{Citrus}
4861-58-9,1240854000.0,"{Fruity , Sweet}"
105683-99-6,1240854000.0,{Green vegetables}
147159-48-6,1240854000.0,{Green vegetables}


add up all intensities per OQ 

intensity_per_OQ: dict that contains the OQ and the summed up intensities

In [933]:
intensity_per_OQ = {}
for found_OQ in found_OQs:
    for index, row in CAS_intensities_OQs.iterrows():
        if found_OQ in row["OQs"]:
            if found_OQ in intensity_per_OQ:
                intensity_per_OQ[found_OQ] += row["intensities"]
            else:
                intensity_per_OQ[found_OQ] = row["intensities"]

In [934]:
'''
normalizes the OQs by dividing each row/cell by the overall intensities, meaning the sum of all intensities
'''
def normalize_OQs(df_intensities):
    normalized_df = pd.DataFrame(columns=["normalized_intensities"])
    overall_intensities = df_intensities["intensities"].sum()
    normalized_df["normalized_intensities"] = df_intensities/overall_intensities
    return(normalized_df)

In [935]:
OQ_intensities = pd.DataFrame(intensity_per_OQ.values(), index = intensity_per_OQ.keys(), columns=["intensities"])

In [936]:
OQ_intensities

Unnamed: 0,intensities
Artificial fruit flavourings,506620200.0
Oily,1184875000.0
Sweaty,473179400.0
Dried spice,1240854000.0
Citrus,4907438000.0
Fruity,9368004000.0
Fresh fruit,1019965000.0
Solvently,604309100.0
Soapy,506620200.0
Vomit,299659800.0


In [937]:
normalized_OQ_intensities = normalize_OQs(OQ_intensities)
normalized_OQ_intensities

Unnamed: 0,normalized_intensities
Artificial fruit flavourings,0.010531
Oily,0.024629
Sweaty,0.009836
Dried spice,0.025792
Citrus,0.102006
Fruity,0.194724
Fresh fruit,0.021201
Solvently,0.012561
Soapy,0.010531
Vomit,0.006229


2. Looking at each OSA indiviudally to find intensity

In [938]:
'''
find all OQs that are corresponding to one specific OSA
'''
def find_OQ_per_OSA_with_intensities(OSA):
    OQ_per_OSA = {}
    for consisting_OQs in OSAs_corresponding_OQs[OSA]:
        if consisting_OQs in normalized_OQ_intensities.index:
            OQ_per_OSA[consisting_OQs] = normalized_OQ_intensities.loc[consisting_OQs]
    # normalize so that all intensities per OSA sums up to 1
    df_OQ_per_OSA = pd.DataFrame(OQ_per_OSA).T
    if len(OQ_per_OSA)==0:
        df_OQ_per_OSA['normalized_intensities']=0
    df_OQ_per_OSA = df_OQ_per_OSA.div(df_OQ_per_OSA['normalized_intensities'].sum())
    return(df_OQ_per_OSA)

equation_2:
The function µ(𝑥) is defined as:

$$
\mu(𝑥) =
\begin{cases}
0 & \text{if } 𝑥 ≤ 𝑎_1 \\
\frac{𝑥−𝑎_1}{𝑎_2−𝑎_1} & \text{if } 𝑎_1 < 𝑥 ≤ 𝑎_2 \\
1 & \text{if } 𝑎_2 < 𝑥 ≤ 𝑎_3 \\
\frac{𝑎_4−𝑥}{𝑎_4−𝑎_3} & \text{if } 𝑎_3 < 𝑥 ≤ 𝑎_4 \\
0 & \text{if } 𝑎_4 < 𝑥 \\
\end{cases}
$$


In [939]:
#most of the time -> TODO: optimize these with cross validation  
a1 = 0
a2 = 0
a3 = 0.33
a4 = 1

In [940]:
def equation_2(x,a1,a2,a3,a4):
    if x <= a1:
        µ = 0
    elif a1 < x and x <= a2:
        µ = (x-a1)/(a2-a1)
    elif a2 < x and x <= a3:
        µ = 1
    elif a3 < x and x <= a4:
        µ = (a4-x)/(a4-a3)
    elif a4 < x:
        µ = 0
    return µ

equation3: 

𝐼𝑛𝑡𝑒𝑛𝑠𝑖𝑡𝑦(𝑂𝑆𝐴) = 10 × ∏^i 1 µ(𝑂𝑄𝑖)

In [941]:
def equation_3(µ_OQs_list):
    intensity_OSA = 10 * np.prod(µ_OQs_list)
    return(intensity_OSA)


e.g. with OSA sweet

In [942]:
# e.g sweet consists of: OQs ['Vanilla', 'Honey', 'Toffee', 'Sweet']
OSAs_corresponding_OQs['Sweet']

['Vanilla', 'Honey', 'Toffee', 'Sweet']

In [943]:
# sweet 
sweet = find_OQ_per_OSA_with_intensities('Sweet')

In [944]:
sweet

Unnamed: 0,normalized_intensities
Toffee,0.084206
Sweet,0.915794


In [945]:
'''OQ1 = equation_2(sweet.loc["Sweet"]["normalized_intensities"],a1,a2,0.33,a4)
OQ2= equation_2(sweet.loc["Vanilla"]["normalized_intensities"],a1,a2,0.033,a4)
OQ3= equation_2(sweet.loc["Toffee"]["normalized_intensities"],a1,a2,0.043,a4)
equation_3([OQ1,OQ2,OQ3]) '''

'OQ1 = equation_2(sweet.loc["Sweet"]["normalized_intensities"],a1,a2,0.33,a4)\nOQ2= equation_2(sweet.loc["Vanilla"]["normalized_intensities"],a1,a2,0.033,a4)\nOQ3= equation_2(sweet.loc["Toffee"]["normalized_intensities"],a1,a2,0.043,a4)\nequation_3([OQ1,OQ2,OQ3]) '

for all OSAs

In [946]:
all_OSAs = list(OSAs_corresponding_OQs.keys())

In [947]:
OSAs_int = {}
for OSA in all_OSAs: 
   OSAs_int[OSA] = 0
   current_OSA = find_OQ_per_OSA_with_intensities(OSA)
   OQ_int = []
   for OQ in current_OSA.index:
      OQ_int.append(equation_2(current_OSA.loc[OQ]["normalized_intensities"],a1,a2,a3,a4))
   OSAs_int[OSA] = equation_3(OQ_int)

In [948]:
OSAs_int

{'peaty': 10.0,
 'feinty': 0.0,
 'cereal': 10.0,
 'Green/grassy': 0.0,
 'Floral': 0.0,
 'Fruity ': 6.07708062340634,
 'Solvently': 0.0,
 'Soapy': 0.0,
 'Sweet': 1.256803921146227,
 'Woody': 0.0,
 'Nutty': 10.0,
 'Spicy': 2.906127058957573,
 'Oily': 0.0,
 'Sour': 0.0,
 'Sulphury': 10.0,
 'Stale': 0.0}

In [949]:
df_OSAs_int = pd.DataFrame(OSAs_int.values(), index = OSAs_int.keys(), columns = ["Intensities"])

In [950]:
df_OSAs_int

Unnamed: 0,Intensities
peaty,10.0
feinty,0.0
cereal,10.0
Green/grassy,0.0
Floral,0.0
Fruity,6.077081
Solvently,0.0
Soapy,0.0
Sweet,1.256804
Woody,0.0


In [951]:
plot_fuzzy_OSA = df_OSAs_int[df_OSAs_int['Intensities'] != 0]
plot_fuzzy_OSA

Unnamed: 0,Intensities
peaty,10.0
cereal,10.0
Fruity,6.077081
Sweet,1.256804
Nutty,10.0
Spicy,2.906127
Sulphury,10.0


In [952]:
plot_fuzzy_OSA.index

Index(['peaty', 'cereal', 'Fruity ', 'Sweet', 'Nutty', 'Spicy', 'Sulphury'], dtype='object')

40,30,50,40,30,40,30,40,60
40,30,50,30,40,30,50,20,40
40,30,50,30,40,30,50,20,40
floral,fruit,wood,complexity,malt,sweet,peat,dried_fruits,aftertaste,date

[3,4,0,0,0,0,0]

optimization with cross validation LOO

#### 4.3 Comparison BM and FL

In [997]:
np.array(final_OSA.sum()*10)

array([0.47693085, 3.88538917, 5.00276794, 4.94634517, 0.60910024,
       0.51063685, 8.78228128, 3.69565319, 1.25069199, 0.3020356 ,
       0.87336835])

In [998]:
final_OSA.columns

Index(['feinty', 'Green/grassy', 'Floral', 'Fruity ', 'Solvently', 'Soapy',
       'Sweet', 'Woody', 'Spicy', 'Sour', 'Stale'],
      dtype='object')

In [999]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
  r=(plot_fuzzy_OSA["Intensities"]),
  theta=list(plot_fuzzy_OSA.index),
  fill="toself",
  name='ms fuzzy'
))
fig.add_trace(go.Scatterpolar(
  r=([ 0.7776613, 23.58432344,1.00636248, 16.65879529, 0.50338256,  1.16029695,  0.75458068]),
  theta=list(plot_fuzzy_OSA.index),
  fill="toself",
  name='ms binary'
))
fig.add_trace(go.Scatterpolar(
  r=([3,4,0,0,0,0,0]),
  theta=list(plot_fuzzy_OSA.index),
  fill="toself",
  name='panelist 1'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
       range=[0, 10]
    ),
  ),
  showlegend=False
)

fig.show()

#### 5. Show Flavourprofile

In [1000]:
final_OSA.sum()*10

feinty          0.476931
Green/grassy    3.885389
Floral          5.002768
Fruity          4.946345
Solvently       0.609100
Soapy           0.510637
Sweet           8.782281
Woody           3.695653
Spicy           1.250692
Sour            0.302036
Stale           0.873368
dtype: float64

In [1001]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Scatterpolar(
  r=(np.array(final_OSA.sum()*10)),
  theta=list(final_OSA.columns),
  fill='toself'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
       range=[0, 10]
    ),
  ),
  showlegend=False
)

fig.show()

In [1002]:
final_OSA.columns

Index(['feinty', 'Green/grassy', 'Floral', 'Fruity ', 'Solvently', 'Soapy',
       'Sweet', 'Woody', 'Spicy', 'Sour', 'Stale'],
      dtype='object')

In [1003]:
# filter out our 8 comparable OSAs 
comparable_OSAs = ["floral","fruit","wood","complexity","malt","sweet","peat","dried fruits"]
filtered_OSAs = pd.DataFrame(columns=comparable_OSAs)
threshold = 80
# Populate the new DataFrame based on original DataFrame
for column in final_OSA.columns:
    for comparable_OSA in comparable_OSAs:
        if fuzz.ratio(str(column).lower(), comparable_OSA) >= threshold:
            filtered_OSAs[comparable_OSA] = final_OSA[column]

#filtered_OSAs



In [1004]:
import plotly.graph_objects as go

panelist_scores = [40,30,50,40,30,40,30]
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=[value / 10 for value in panelist_scores],
      theta=comparable_OSAs,
      fill='toself',
      name='Product A'
))
fig.add_trace(go.Scatterpolar(
      r=list(filtered_OSAs.sum()*10),
      theta=comparable_OSAs,
      fill='toself',
      name='Product B'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 10]
    )),
  showlegend=False
)

fig.show()

In [1005]:
#only compare only columns from final_OSA
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    r=[0,4,3,4,5,0],
    theta=list(final_OSA.columns),
    fill='toself',
    name="panelist"
))

fig.add_trace(go.Scatterpolar(
  r=(np.array(final_OSA.sum()*10)),
  theta=list(final_OSA.columns),
  fill='toself',
  name= "MS_prediction"))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 10]
    )),
  showlegend=False
)

fig.show()

In [1006]:
import plotly.graph_objects as go

#only compare only columns from final_OSA
def compare_profiles(final_OSA, panelists):
    fig = go.Figure()

    fig.add_trace(go.Scatterpolar(
        r=panelists,
        theta=list(final_OSA.index),
        fill='toself',
        name="panelist"
    ))

    fig.add_trace(go.Scatterpolar(
    r=(np.array(final_OSA)),
    theta=list(final_OSA.index),
    fill='toself',
    name= "MS_prediction"))

    fig.update_layout(
    polar=dict(
        radialaxis=dict(
        visible=True,
        range=[0, 10]
        )),
    showlegend=False
    )

    fig.show()

In [1007]:
# look at panelist 1
panelist_data_df = pd.read_table("data/vlb_data_combined.txt", delimiter=',', index_col = 1)
panelist_data_df = panelist_data_df[panelist_data_df["panelist"]==1]
panelist_data_df = panelist_data_df.drop(columns=["panelist", "date"])

In [1008]:
overlap_OSAs = ["floral", "fruit", "sweet", "wood", "peat"]

In [1009]:
def get_panelist_profile(sample_key):
    panelist_profile=[]
    for OSA in overlap_OSAs :
        panelist_profile.append(int(panelist_data_df.loc[sample_key][OSA]/10))
    return panelist_profile

In [1010]:
def find_overlapping_profiles(final_OSA,comparable_OSAs):
    # show only overlap meaning: floral, fruit, sweet, wood, peat
    new_series = pd.Series() 
    threshold = 80
    for comparable_OSA in comparable_OSAs:
        for index in final_OSA.index:
            if fuzz.ratio(str(index).lower(), comparable_OSA) >= threshold:
                new_series[comparable_OSA] = final_OSA[index]
                break
            else:
                new_series[comparable_OSA] = 0
    return(new_series)


In [1011]:
def compare_visual(OSA_plotting,sample_key):
    OSA_compare = find_overlapping_profiles(OSA_plotting, overlap_OSAs)
    panelist_compare = get_panelist_profile(sample_key)
    compare_profiles(OSA_compare.T, panelist_compare)

In [1012]:
compare_visual(final_OSA.sum()*10,"000920")





In [1017]:
compare_visual(final_OSA.mean(axis=0)*700,"000920")





In [1014]:

fig = go.Figure()
fig.add_trace(go.Scatterpolar(
  r=[0,4,3,4,5,0],
  theta=["Green","floral","fruit","sweet","wood","spicy"],
  fill='toself'
))


fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 10]
    ),
  ),
  showlegend=False
)

fig.show()