## Pipeline Flavor Profile

In [8]:
import pandas as pd
import numpy as np
from fuzzywuzzy import fuzz
import csv

In [9]:
'''
does convert a pandas dataframe into a matrix where 
keys = row name
values = col name
data = either 0 or 1 
'''
def dict_to_matrix(dictionary):
    unique_values = set(val for sublist in dictionary.values() for val in sublist)
    data_dict = {key: {val: 1 if val in values else 0 for val in unique_values} for key, values in dictionary.items()}
    df = pd.DataFrame(data_dict).T
    return(df)


In [10]:
'''
find overlapp between to lists with a threshold (=70)
'''
def overlapping_elements(list1, list2, threshold=70):
    overlap = []
    for elem1 in list1:
        for elem2 in list2:
            ratio = fuzz.ratio(elem1, elem2)
            if ratio >= threshold:
                overlap.append(elem1)
    return overlap

In [11]:
'''
Convert DataFrame to dictionary
'''
def dataframe_to_dict(df):
    result_dict = {}
    for index, row in df.iterrows():
        if row[0] in result_dict:
            result_dict[row[0]].append(row[1])
        else:
            result_dict[row[0]] = [row[1]]
    return result_dict     

In [12]:
'''
find match between dictionary entries and the rows of a dataframe
'''
def find_matching_entries(dictionary,df):
    threshold = 50
    found_match = []
    found_match_per_entry = {}
    for key, value in dictionary.items():
        all_matches = []
        for index, row in df.iterrows():
            for keyword in value:
                if fuzz.ratio(keyword, str(index).lower()) >= threshold:
                    all_matches.append(index)
                    found_match.append(row)
        found_match_per_entry[key] = all_matches
    found_match = pd.DataFrame(found_match)
    return(found_match, found_match_per_entry)

In [13]:
'''
create a new matrix by finding matches and filtering right columns
'''
def create_matrix_from_dict(dictionary,df):
    matrix = pd.DataFrame(columns=list(df), index=ms_molecules_ex)
    final_dict = {}
    for key, value in dictionary.items():
        current_val = []
        for v in value:
            for index, row in df.iterrows():
                if index == v:
                    current_val.append(','.join(row.loc[row.eq(1)].index.tolist()))
                    # todo: maybe exception -> do change if two ones in a row
                    matrix.loc[key][row.loc[row.eq(1)].index.tolist()] = 1
        final_dict[key] = current_val
    matrix = matrix.fillna(0)
    matrix = matrix.loc[:, (matrix != 0).any(axis=0)]
    return(final_dict, matrix)
   

#### 1. MS Data ----- Massbank -----> Molecules

-> Chris

save which intensities belong to which molecule and give each the % 
2 approaches for one molecule with multiple OQs
1.  each gets same % 
2.  divide % by count of OQs per molecule

<span style="color:lightblue">mol_int</span> : dict where key is the molecule name and value is the intensites from the mass spec data

In [14]:
# DUMMY DATA
intensities = [80, 40, 5,15]
# only flavornet data use : ['(E)-3-hexenol', "2-dodecenal",'δ-muurolene']
ms_molecules_ex =['(E)-3-hexenol', "2-dodecenal",'δ-muurolene', 'dihydromyrcenol']
# big book x flavorent: 
# ms_molecules_ex = ['dimethylethyl pyrazine', "3-mercaptothiophene",'δ-muurolene']
mol_int = {'(E)-3-hexenol': 80, "2-dodecenal": 40 ,'δ-muurolene': 5, 'dihydromyrcenol': 15}

<span style="color:#FBB714">I_vector</span> : dataframe with *molecules x intensities*

In [15]:
#I_vector = pd.DataFrame(data = mol_int, index = ["intensities"]).T
I_vector = pd.DataFrame(data= intensities,index = ms_molecules_ex, columns=["intensities"])
I_vector

Unnamed: 0,intensities
(E)-3-hexenol,80
2-dodecenal,40
δ-muurolene,5
dihydromyrcenol,15


#### 2. Molecules ----- Flavornet -----> OD 
Find ODs by matching molecules with databases using Flavornet

<span style="color:lightblue">CAS_to_odorants.csv</span> : table with following columns - molecule, CAS, Mol wt, odorant


##### table manipualtion done in R

```r
library(rlang)
library(tidyverse)

CAS_to_odorants <- read.table("~/Bioinformatik_20.21/Bachelorarbeit/CAS_to_mol.txt", sep = "\t", header = FALSE)
OD_to_mol <- read.table("~/Bioinformatik_20.21/Bachelorarbeit/OD_mol.txt", sep = "\t", header = FALSE)

OD_to_mol_sep <- OD_to_mol %>%
  separate_rows(V2, sep = ";\\s*") %>%
  mutate(V2 = trimws(V2))
OD_to_mol_final <- OD_to_mol_sep[, c("V2","V1")]
colnames(OD_to_mol_final) <- c("molecule", "OD")

CAS_to_odorants_sep <- CAS_to_odorants %>%
  separate_rows(V3, sep = ";\\s*") %>%
  mutate(V3 = trimws(V3))
colnames(CAS_to_odorants_sep) <- c("CAS","Mol Wt", "molecule")


merged_df <- merge(CAS_to_odorants_sep, OD_to_mol_final, by = "molecule")
write.csv(merged_df, file = "~/Bioinformatik_20.21/Bachelorarbeit/CAS_mol_OD.csv", row.names = FALSE)
```

*Flavornet only*

<span style="color:lightblue">mol_to_OD</span> : dict where key is the molecule and the descriptors for these molecules are the values

In [16]:
'''
read flavornet data and save it in mol_to_OD
'''
# only using flavornet data
mol_to_OD = {}
molecules = []
descriptors = []
with open('./data/CAS_mol_OD.csv', 'r',encoding='utf-8') as tabfile:
    reader = csv.reader(tabfile, delimiter=',')
    for row in reader: 
        molecule = row[0]
        OD = row[3]
        molecules.append(molecule)
        descriptors.append(OD)
        if molecule in mol_to_OD:
            mol_to_OD[molecule].add(OD)
        else:
            mol_to_OD[molecule]  = set([OD])

molecules = list(set(molecules))
descriptors = list(set(descriptors))

In [17]:
big_book =  pd.read_table('./data/bigBook.txt', sep=',')

*Flavornet x Big Book*

In [18]:
'''# filter only those descriptor that are available in flavorent and in our big book
mol_to_OD = {}
molecules = []
descriptors = []
with open('./data/CAS_mol_OD.csv', 'r',encoding='utf-8') as tabfile:
    reader = csv.reader(tabfile, delimiter=',')
    for row in reader: 
        molecule = row[0]
        OD = row[3]
        for item in big_book['label']:
            if (fuzz.ratio(OD, item) >= 60):
                molecules.append(molecule)
                descriptors.append(item)
                if molecule in mol_to_OD:
                    mol_to_OD[molecule].add(item)
                else:
                    mol_to_OD[molecule]  = set([item])

molecules = list(set(molecules))
descriptors = list(set(descriptors))'''

"# filter only those descriptor that are available in flavorent and in our big book\nmol_to_OD = {}\nmolecules = []\ndescriptors = []\nwith open('./data/CAS_mol_OD.csv', 'r',encoding='utf-8') as tabfile:\n    reader = csv.reader(tabfile, delimiter=',')\n    for row in reader: \n        molecule = row[0]\n        OD = row[3]\n        for item in big_book['label']:\n            if (fuzz.ratio(OD, item) >= 60):\n                molecules.append(molecule)\n                descriptors.append(item)\n                if molecule in mol_to_OD:\n                    mol_to_OD[molecule].add(item)\n                else:\n                    mol_to_OD[molecule]  = set([item])\n\nmolecules = list(set(molecules))\ndescriptors = list(set(descriptors))"

<span style="color:#FBB714">D</span>: Dataframe with all *molecules x descriptors* - rigth now only flavornet database

In [19]:
D = pd.DataFrame(index=molecules, columns=descriptors)

In [20]:
for col in D.columns:
    for index, row in D.iterrows():
        # Check if value in  current row matches any entry in the dictionary
        if index in mol_to_OD.keys() and col in mol_to_OD[index]:
            # Set to 1
            D.at[index, col] = 1

D = D.fillna(0)
D

Unnamed: 0,soap,earth,caramel,seaweed,orange peel,thiamin,pea,mint,malt,ether,...,moss,melon,roasted nut,smoke,bitter,plum,coriander,apple. rose,strawberry,mandarin
"2-methoxy-3,6-dimethylpyrazine",0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
"ethyl-(E,Z)-2,4-decadienoate",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
methyl epijasmonate,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
neral,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
piperitone,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
methyl anthranilate,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
lauric aldehyde,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
methyl-2-butenal,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
trans-sabinene hydrate,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


match molecules from MS data with flavors

In [21]:
#TODO: Maybe also use a fuzzy ratio here depending on the writing of the moleucles -> reduce mistakes
flavors = []
mol_flavors = {}
for mol in ms_molecules_ex:
    if mol in mol_to_OD.keys():
        flavors += mol_to_OD[mol]
        mol_flavors[mol] = mol_to_OD[mol]
    else:
        mol_flavors[mol] = {}

found_flavors = set(flavors)


<span style="color:lightblue">found_flavors</span>: list of the ODs found from the identified molecules 

In [22]:
found_flavors

{'citrus',
 'cologne',
 'fat',
 'fresh',
 'green',
 'moss',
 'oil',
 'sweet',
 'tart lime'}



<span style="color:lightblue">mol_flavors</span>: contains the molecule from where each OD is coming from 

In [23]:
mol_flavors

{'(E)-3-hexenol': {'fresh', 'moss'},
 '2-dodecenal': {'fat', 'green', 'sweet'},
 'δ-muurolene': {'oil'},
 'dihydromyrcenol': {'citrus', 'cologne', 'tart lime'}}

<span style="color:#FBB714">OD</span>: Dataframe - *molecules x ODs*

In [24]:
# filters only relevant rows - only those ODs coming from indentified molecules
OD = D[(D.index).isin(I_vector.index)]
# get only important/matching columns
OD = OD.loc[:, (OD != 0).any(axis=0)]
OD 

Unnamed: 0,oil,fat,tart lime,fresh,cologne,green,sweet,citrus,moss
δ-muurolene,1,0,0,0,0,0,0,0,0
2-dodecenal,0,1,0,0,0,1,1,0,0
dihydromyrcenol,0,0,1,0,1,0,0,1,0
(E)-3-hexenol,0,0,0,1,0,0,0,0,1


#### 3. OD ----------> OQ


##### Roche data
Use ontology to find matching OQ

In [25]:
# filter out 
OD_OQ =  pd.read_csv('./data/DATA_OD-2-OQ.csv', index_col=0)
# filter out rows with only 0 
OD_OQ = OD_OQ.loc[(OD_OQ.iloc[:,1:] != 0).any(axis=1)]


<span style="color:#FBB714">OD_OQ</span>: Dataframe - all ODs x OQs

In [26]:
OD_OQ

Unnamed: 0,Almond,Woody,Camphor,Leather,Cooked,Spicy,Floral,Fresh,Fruity,Smoky,...,Lactonic,Vegetable,Honey,Peel,Sulfurous,Toasty,Vanilla,Green,Vinous,Violet
AGRUMES,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Apple,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Apple peels,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,0,0,0,0,0
Apricot,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Banana,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wine,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
Wine-yeast,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
Winey,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
Woody,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
# find matching OQs to found ODs from massspec x flavornet
matching_OQs, matching_OQs_dict = find_matching_entries(mol_flavors, OD_OQ)

In [28]:
# get only important columns
matching_OQs = matching_OQs.loc[:, (matching_OQs != 0).any(axis=0)]
matching_OQs

Unnamed: 0,Woody,Leather,Cooked,Spicy,Floral,Fresh,Fruity,Cut-grass,Lactonic,Vegetable,Honey,Sulfurous,Toasty,Green,Vinous
AGRUMES,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
FRUITE,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
FRUITS SECS,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
Fruity-fresh,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0
Mimosa,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
Radish,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
Rose,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
SOUFRE,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
AGRUMES,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
Eugenol,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [29]:
# to know which OD comes from which molecule
matching_OQs_dict

{'(E)-3-hexenol': ['AGRUMES',
  'FRUITE',
  'FRUITS SECS',
  'Fruity-fresh',
  'Mimosa',
  'Radish',
  'Rose',
  'SOUFRE'],
 '2-dodecenal': ['AGRUMES',
  'Eugenol',
  'Grape',
  'Green',
  'Green-fruity',
  'Green fruity',
  'Greener gassy',
  'Leafy-green',
  'Sweet-ethereal',
  'Weedy'],
 'δ-muurolene': ['BOISE', 'Tropical'],
 'dihydromyrcenol': ['AGRUMES',
  'CARAMEL',
  'Castoreum',
  'Citrus',
  'Clove',
  'Corn',
  'CUIR',
  'FRUITS ROUGES',
  'Fruity-rum',
  'Honey',
  'LACTONE',
  'Narcissus',
  'Toasted']}

<span style="color:#FBB714">OQs</span>: Dataframe - ODs x OQs

In [30]:
OQ_dict, OQs = create_matrix_from_dict(matching_OQs_dict, matching_OQs)

In [31]:
OQs

Unnamed: 0,Woody,Leather,Cooked,Spicy,Floral,Fresh,Fruity,Cut-grass,Lactonic,Vegetable,Honey,Sulfurous,Toasty,Green,Vinous
(E)-3-hexenol,0,0,0,0,1,1,1,0,0,1,0,1,0,0,0
2-dodecenal,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0
δ-muurolene,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0
dihydromyrcenol,0,1,1,1,1,0,1,0,1,1,1,0,1,0,1


<span style="color:lightblue">OQ_dict</span>: dictionary with key = molecule, value = OQs

In [32]:
OQ_dict

{'(E)-3-hexenol': ['Fruity',
  'Fruity',
  'Fruity',
  'Fruity',
  'Fruity',
  'Fresh,Fruity',
  'Floral',
  'Vegetable',
  'Floral',
  'Sulfurous'],
 '2-dodecenal': ['Fruity',
  'Fruity',
  'Fruity',
  'Spicy',
  'Fruity',
  'Green',
  'Fruity,Green',
  'Fruity,Green',
  'Green',
  'Green',
  'Fruity',
  'Cut-grass'],
 'δ-muurolene': ['Woody', 'Fruity'],
 'dihydromyrcenol': ['Fruity',
  'Fruity',
  'Fruity',
  'Cooked',
  'Leather',
  'Fruity',
  'Spicy',
  'Vegetable',
  'Leather',
  'Fruity',
  'Fruity,Vinous',
  'Honey',
  'Lactonic',
  'Floral',
  'Toasty']}

#### 4.1 OQ ------ Binary Matrix -----> OSA


In [33]:
OQ_OSA = pd.read_csv('./data/OQ_OSA.csv', delimiter=';', index_col=0)
OQ_OSA

Unnamed: 0,floral,fruit,solventy,soapy,sweet,wood,nutty,spicy,oily,sour,...,feinty,cereal,green/grassy,malt,primary taste,mouthfeel,nasal effects,dried fruits,aftertaste,complexity
fresh flowers,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
perfumed,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
fresh fruit,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
dried fruit,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
citrus,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
artificial fruit flavorings,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
nail varnish remover,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
unperfumed soap,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
vanilla,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
honey,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
# find matching OQs to found ODs from massspec x flavornet
matching_OSAs, matching_OSAs_dict = find_matching_entries(OQ_dict, OQ_OSA)

In [35]:
matching_OSAs

Unnamed: 0,floral,fruit,solventy,soapy,sweet,wood,nutty,spicy,oily,sour,...,feinty,cereal,green/grassy,malt,primary taste,mouthfeel,nasal effects,dried fruits,aftertaste,complexity
fresh fruit,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
dried fruit,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
cooked vegetables,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
earthy,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
earthy,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
earthy,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
earthy,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
earthy,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
musty,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
musty,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


<span style="color:lightblue">OSA_dict</span>: dict - keys = molecules x value = OSAs

In [36]:
OSA_dict, OSAs = create_matrix_from_dict(matching_OSAs_dict, matching_OSAs)

In [37]:
# todo: count appeareance of OSA in each odorant 

In [38]:
OSA_dict

{'(E)-3-hexenol': ['fruit',
  'fruit,dried fruits',
  'sulphury',
  'sulphury',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stal

<span style="color:#FBB714">OSAs</span>: Dataframe - OQs x OSAs

In [39]:
OSAs

Unnamed: 0,fruit,sweet,wood,nutty,sulphury,stale,peat,feinty,green/grassy,dried fruits
(E)-3-hexenol,1,0,0,0,1,1,0,0,1,1
2-dodecenal,0,0,0,0,0,1,0,0,1,0
δ-muurolene,0,0,0,0,0,1,0,0,0,0
dihydromyrcenol,0,1,1,1,1,1,1,1,1,0


In [40]:
OSA_dict

{'(E)-3-hexenol': ['fruit',
  'fruit,dried fruits',
  'sulphury',
  'sulphury',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stale',
  'stal

In [41]:
I_vector

Unnamed: 0,intensities
(E)-3-hexenol,80
2-dodecenal,40
δ-muurolene,5
dihydromyrcenol,15


In [42]:
from sklearn.preprocessing import Normalizer
normalizer = Normalizer()
I_vector_normalized = normalizer.fit_transform(I_vector.T).T
I_vector_normalized

array([[0.88077101],
       [0.44038551],
       [0.05504819],
       [0.16514456]])

In [120]:
final_OSA = OSAs*I_vector_normalized
final_OSA

Unnamed: 0,fruit,sweet,wood,nutty,sulphury,stale,peat,feinty,green/grassy,dried fruits
(E)-3-hexenol,0.880771,0.0,0.0,0.0,0.880771,0.880771,0.0,0.0,0.880771,0.880771
2-dodecenal,0.0,0.0,0.0,0.0,0.0,0.440386,0.0,0.0,0.440386,0.0
δ-muurolene,0.0,0.0,0.0,0.0,0.0,0.055048,0.0,0.0,0.0,0.0
dihydromyrcenol,0.0,0.165145,0.165145,0.165145,0.165145,0.165145,0.165145,0.165145,0.165145,0.0


In [44]:
final_OSA*10

Unnamed: 0,fruit,sweet,wood,nutty,sulphury,stale,peat,feinty,green/grassy,dried fruits
(E)-3-hexenol,8.80771,0.0,0.0,0.0,8.80771,8.80771,0.0,0.0,8.80771,8.80771
2-dodecenal,0.0,0.0,0.0,0.0,0.0,4.403855,0.0,0.0,4.403855,0.0
δ-muurolene,0.0,0.0,0.0,0.0,0.0,0.550482,0.0,0.0,0.0,0.0
dihydromyrcenol,0.0,1.651446,1.651446,1.651446,1.651446,1.651446,1.651446,1.651446,1.651446,0.0


In [58]:
list(final_OSA.sum())

[0.8807710121010885,
 0.1651445647689541,
 0.1651445647689541,
 0.1651445647689541,
 1.0459155768700426,
 1.541349271176905,
 0.1651445647689541,
 0.1651445647689541,
 1.486301082920587,
 0.8807710121010885]

In [63]:
#normalizer.fit_transform(np.array(final_OSA.sum()).reshape(1,- 1))*10

array([[3.24509566, 0.60845544, 0.60845544, 0.60845544, 3.85355109,
        5.6789174 , 0.60845544, 0.60845544, 5.47609892, 3.24509566]])

#### 4.2 OQ ------ Fuzzy Logic -----> OSA
Use Fuzzy logic to weigh OQ with expert knowledge/Intensity from Masspec


#### 4.3 Comparison BM and FL

#### 5. Show Flavourprofile

*overall flavorprofile of predicted OSAs*

In [96]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Scatterpolar(
  r=(normalizer.fit_transform(np.array(final_OSA.sum()).reshape(1,- 1))*10)[0]
  theta=list(final_OSA.columns),
  fill='toself'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    ),
  ),
  showlegend=False
)

fig.show()

*comparing OSAs with panelist scores*

In [157]:
# filter out our 8 comparable OSAs 
comparable_OSAs = ["floral","fruit","wood","complexity","malt","sweet","peat","dried fruits"]
filtered_OSAs = pd.DataFrame(columns=comparable_OSAs)
# Populate the new DataFrame based on original DataFrame
for column in filtered_OSAs:
    if column in final_OSA.columns:
        filtered_OSAs[column] = final_OSA[column]
    else:
        filtered_OSAs[column] = 0

filtered_OSAs



Unnamed: 0,floral,fruit,wood,complexity,malt,sweet,peat,dried fruits
(E)-3-hexenol,,0.880771,0.0,0,0,0.0,0.0,0.880771
2-dodecenal,,0.0,0.0,0,0,0.0,0.0,0.0
δ-muurolene,,0.0,0.0,0,0,0.0,0.0,0.0
dihydromyrcenol,,0.0,0.165145,0,0,0.165145,0.165145,0.0


In [164]:
import plotly.graph_objects as go

panelist_scores = [50,20,60,50,30,30,50,50] 
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=[value / 10 for value in panelist_scores],
      theta=comparable_OSAs,
      fill='toself',
      name='Product A'
))
fig.add_trace(go.Scatterpolar(
      r=list(filtered_OSAs.sum()*10),
      theta=comparable_OSAs,
      fill='toself',
      name='Product B'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 10]
    )),
  showlegend=False
)

fig.show()