# Assessment in a variational autoencoder 
Assessment of FSL language in an autoencoder
## Reloading the pretrained neural network

In [18]:
#!pip install tensorflow pandas textdistance
import pickle,gzip,sys
from rdmediationvaert import AE
import pandas as pd
cocktails,encodeur=pickle.load(gzip.open("cocktails.pklz"))
dataset=[]
for m in encodeur:
    if len(m)>2:
        dataset.append(m)
print(f"{len(dataset)} formulae for training")
model=AE(name='cocktailsvae')
model.reload('cocktailsvae')

829 formulae for training
____ ___    _  _ ____ ___  _ ____ ___ _ ____ __ _
 |--<  |__>   |\/| |=== |__> | |--|  |  | [__] | \|
Tensorflow version 2.7.0
GPU True


2021-12-28 16:23:59.508854: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2021-12-28 16:23:59.508885: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


## Load a formula

In [7]:
c=dataset[0]
print(f"FSL encoded formula : {c}")
print("Decoded formula:")
cocktails.decode([c])[0].formulationlist

FSL encoded formula : AAAAAAAABBBBαβ
Decoded formula:


Unnamed: 0,Component,Rate,minor
0,Light Rum,0.633,False
1,Lemon Juice,0.365,False
2,Passion Fruit Syrup,0.001,True
3,Lime Juice,0.001,True
4,Formulation,1.0,Non additive


## Find it in the latent space

In [14]:
latent=model.encode(c)
latent

array([[ 1.3890834 , -0.13870159, -0.00822407, -0.00487889, -0.46605322,
        -0.79323816,  0.38904732,  0.3041486 ,  0.11699133,  0.273327  ,
        -0.09223687,  0.1689527 ,  0.15887997, -0.02809681, -0.21979149,
         1.4856585 ,  2.5984235 ,  0.10420097, -0.10993379,  0.44843948,
         0.31948787, -0.09654102,  0.31869823, -0.6928068 , -0.618227  ,
        -1.1512997 , -0.58362055,  0.09300974,  0.04692227, -0.29087883,
         0.08301675, -0.15936494]], dtype=float32)

## Rebuild it back

In [12]:
model.decode(latent)

2021-12-28 16:18:37.736899: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


'AAAAAAAABBBBαβ'

## Assess performance

In [20]:
rebuilt=[model.decode(model.encode(formula)) for formula in dataset]

comparison=pd.DataFrame([[original,new] for original,new in zip(dataset,rebuilt)],
                       columns=["Formula","Rebuilt"])
comparison.head(20)

2021-12-28 16:26:32.896602: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-12-28 16:26:32.931957: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,Formula,Rebuilt
0,AAAAAAAABBBBαβ,AAAAAAAABBBBαβ
1,AAAAAAAACCDγ,AAAAAAAACCDγ
2,AAAAAAAADDEE,AAAAAAAADDDE
3,FFFFFFFFFFFζηθι,FFFFFFFFFFFζηθι
4,GGGGGGHHHIIκλ,GGGGGGHHHIIκλ
5,AAAAAAAAAAAμν,AAAAAAAAAAAον
6,AAAAAAAJJJJJβξ,AAAAAAAJJJJJβξ
7,AAAAAAAAAAAοπ,AAAAAAAAAAAοπ
8,HHHHHIIIIIIρ,HHHHHIIIIIIρ
9,HHHHHHHHHHHστυφ,HHHHHHHHHHHστυφ


## Sørensen text distance


In [24]:
from statistics import mean 
import textdistance
train=mean([textdistance.sorensen(orig,new) 
            for orig,new in zip(dataset[:663],rebuilt[:663])])
test=mean([textdistance.sorensen(orig,new) 
            for orig,new in zip(dataset[663:],rebuilt[663:])])
print(f"Sørensen similarity for training set: {train*100:.2f} %")
print(f"Sørensen similarity for test set: {test*100:.2f} %")

Sørensen similarity for training set: 97.79 %
Sørensen similarity for test set: 97.95 %


## Examples of use
### Ingredient replacement
Select a Formula

In [31]:
c=dataset[2]
print(f"FSL encoded formula : {c}")
print("Decoded formula:")
cocktails.decode([c])[0].formulationlist

FSL encoded formula : AAAAAAAADDEE
Decoded formula:


Unnamed: 0,Component,Rate,minor
0,Light Rum,0.594,False
1,Juice of a Lime,0.206,False
2,Powdered Sugar,0.2,False
3,Formulation,1.0,Non additive


### Find an ingredient in the latent space

In [33]:
cc="E"
cocktails.decode([cc])[0].formulationlist

Unnamed: 0,Component,Rate,minor
0,Powdered Sugar,1.0,False
1,Formulation,1.0,Non additive


In [34]:
B_latent=model.encode(cc)
B_latent

array([[ 0.07749687,  0.06850921, -0.12150503,  0.3076481 ,  0.21906431,
         0.36602306, -0.23047219, -0.0314557 , -0.11696757,  0.30448595,
         0.18171558, -0.35609794,  0.08173011, -0.49204585,  0.01729178,
        -0.84248275, -0.36493543,  0.2195618 ,  0.13346879, -1.3478712 ,
        -0.31198242, -0.09321836,  0.47335526,  0.7934676 ,  0.14510019,
         1.1782285 ,  0.26321998,  0.22008964, -0.24455835,  0.29434013,
         0.1674507 , -0.0353418 ]], dtype=float32)

### Remove the ingredient and brew a new cocktail

In [38]:
new=model.decode(latent-B_latent)
new=''.join(sorted(new))
new

'AAAAAAAABBBBαβ'

In [39]:
cocktails.decode([new])[0].formulationlist


Unnamed: 0,Component,Rate,minor
0,Light Rum,0.633,False
1,Lemon Juice,0.365,False
2,Passion Fruit Syrup,0.001,True
3,Lime Juice,0.001,True
4,Formulation,1.0,Non additive


## Create a new cocktail

### Locate a random latent space vector

In [43]:
brandnew=model.generate()
cocktails.decode([brandnew])[0].formulationlist

Unnamed: 0,Component,Rate,minor
0,Sweet Vermouth,0.23,False
1,Triple Sec,0.124,False
2,Powdered Sugar,0.141,False
3,Gin,0.505,False
4,Formulation,1.0,Non additive
