# Assessment in a variational autoencoder 
Assessment of FSL language in an autoencoder
## Reloading the pretrained neural network

In [1]:
#!pip install tensorflow pandas textdistance
import pickle,gzip,sys
from rdmediationvaert import AE
import pandas as pd
cocktails,encodeur=pickle.load(gzip.open("cocktails.pklz"))
dataset=[]
for m in encodeur:
    if len(m)>2:
        dataset.append(m)
print(f"{len(dataset)} formulae for training")
model=AE(name='cocktailsvae')
model.reload('cocktailsvae')

829 formulae for training
____ ___    _  _ ____ ___  _ ____ ___ _ ____ __ _
 |--<  |__>   |\/| |=== |__> | |--|  |  | [__] | \|
Tensorflow version 2.7.0
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU True
Metal device set to: Apple M1


2021-12-29 08:59:47.905353: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2021-12-29 08:59:47.905726: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2021-12-29 08:59:47.922338: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2021-12-29 08:59:47.922355: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


## Load a formula

In [2]:
c=dataset[0]
print(f"FSL encoded formula : {c}")
print("Decoded formula:")
cocktails.decode([c])[0].formulationlist

FSL encoded formula : AAAAAAAABBBBαβ
Decoded formula:


Unnamed: 0,Component,Rate,minor
0,Light Rum,0.633,False
1,Lemon Juice,0.365,False
2,Passion Fruit Syrup,0.001,True
3,Lime Juice,0.001,True
4,Formulation,1.0,Non additive


## Find it in the latent space

In [3]:
latent=model.encode(c)
latent

2021-12-29 08:59:50.370382: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2021-12-29 08:59:50.418114: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


array([[ 1.3890834 , -0.13870159, -0.00822407, -0.00487889, -0.46605322,
        -0.79323816,  0.38904732,  0.3041486 ,  0.11699133,  0.273327  ,
        -0.09223687,  0.1689527 ,  0.15887997, -0.02809681, -0.21979149,
         1.4856585 ,  2.5984235 ,  0.10420097, -0.10993379,  0.44843948,
         0.31948787, -0.09654102,  0.31869823, -0.6928068 , -0.618227  ,
        -1.1512997 , -0.58362055,  0.09300974,  0.04692227, -0.29087883,
         0.08301675, -0.15936494]], dtype=float32)

## Rebuild it back

In [4]:
model.decode(latent)

2021-12-29 08:59:51.667248: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


'AAAAAAAABBBBαβ'

## Assess performance

In [5]:
rebuilt=[model.decode(model.encode(formula)) for formula in dataset]

comparison=pd.DataFrame([[original,new] for original,new in zip(dataset,rebuilt)],
                       columns=["Formula","Rebuilt"])
comparison.head(20)

Unnamed: 0,Formula,Rebuilt
0,AAAAAAAABBBBαβ,AAAAAAAABBBBαβ
1,AAAAAAAACCDγ,AAAAAAAACCDγ
2,AAAAAAAADDEE,AAAAAAAADDDE
3,FFFFFFFFFFFζηθι,FFFFFFFFFFFζηθι
4,GGGGGGHHHIIκλ,GGGGGGHHHIIκλ
5,AAAAAAAAAAAμν,AAAAAAAAAAAον
6,AAAAAAAJJJJJβξ,AAAAAAAJJJJJβξ
7,AAAAAAAAAAAοπ,AAAAAAAAAAAοπ
8,HHHHHIIIIIIρ,HHHHHIIIIIIρ
9,HHHHHHHHHHHστυφ,HHHHHHHHHHHστυφ


## Sørensen text distance


In [6]:
from statistics import mean 
import textdistance
train=mean([textdistance.sorensen(orig,new) 
            for orig,new in zip(dataset[:663],rebuilt[:663])])
test=mean([textdistance.sorensen(orig,new) 
            for orig,new in zip(dataset[663:],rebuilt[663:])])
print(f"Sørensen similarity for training set: {train*100:.2f} %")
print(f"Sørensen similarity for test set: {test*100:.2f} %")

Sørensen similarity for training set: 97.79 %
Sørensen similarity for test set: 97.95 %


## Examples of use
### Ingredient replacement
Select a Formula

In [7]:
c=dataset[2]
print(f"FSL encoded formula : {c}")
print("Decoded formula:")
cocktails.decode([c])[0].formulationlist

FSL encoded formula : AAAAAAAADDEE
Decoded formula:


Unnamed: 0,Component,Rate,minor
0,Light Rum,0.594,False
1,Juice of a Lime,0.206,False
2,Powdered Sugar,0.2,False
3,Formulation,1.0,Non additive


### Find an ingredient in the latent space

In [8]:
cc="EEEEE"
cocktails.decode([cc])[0].formulationlist

Unnamed: 0,Component,Rate,minor
0,Powdered Sugar,1.0,False
1,Formulation,1.0,Non additive


In [9]:
B_latent=model.encode(cc)
B_latent

array([[-6.45386398e-01,  4.85044532e-02,  8.29209983e-02,
         3.41801457e-02,  7.71266997e-01,  5.36016107e-01,
         2.08929375e-01,  7.18495250e-02, -3.53245795e-01,
         1.99218929e-01,  4.12274413e-02, -8.70564654e-02,
         1.17326975e-01, -2.18493879e-01, -2.59110242e-01,
        -4.27905977e-01, -2.94935942e-01, -1.74721386e-02,
         6.90681040e-02, -2.25325441e+00, -1.64082974e-01,
        -7.02380240e-02,  4.02717918e-01,  6.12576544e-01,
        -1.44361891e-03,  1.13856137e+00,  2.85031438e-01,
         5.24719916e-02, -2.52416462e-01,  6.97316080e-02,
         2.07967505e-01, -2.75261998e-02]], dtype=float32)

### Remove the ingredient and brew a new cocktail

In [10]:
new=model.decode(latent-B_latent)
new=''.join(sorted(new))
new

'AAAAAAABBBBFαβ'

In [11]:
cocktails.decode([new])[0].formulationlist


Unnamed: 0,Component,Rate,minor
0,Light Rum,0.511,False
1,Lemon Juice,0.335,False
2,Pineapple Juice,0.153,False
3,Passion Fruit Syrup,0.001,True
4,Lime Juice,0.001,True
5,Formulation,1.0,Non additive


## Create a new cocktail

### Locate a random latent space vector

In [43]:
brandnew=model.generate()
cocktails.decode([brandnew])[0].formulationlist

Unnamed: 0,Component,Rate,minor
0,Sweet Vermouth,0.23,False
1,Triple Sec,0.124,False
2,Powdered Sugar,0.141,False
3,Gin,0.505,False
4,Formulation,1.0,Non additive
