In [2]:
import re
import pandas as pd
from rdkit import Chem

from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')

from tqdm import tqdm
tqdm.pandas()

from eval_functions import *

Check SMILES validity

In [2]:
smiles_validity = check_smiles_validity(directories=
    [
    'not_augmented',                                             
    'not_augmented_randomized', 
    'augmented_2x', 
    'augmented_2x_randomized',
    'augmented_5x', 
    'augmented_5x_randomized',
    'augmented_10x', 
    'augmented_10x_randomized',
    'augmented_20x', 
    'augmented_20x_randomized',
    'augmented_50x',
    'augmented_50x_randomized',
    'partial_augmented_5x', 
    'partial_augmented_5x_randomized',
    'mixed_augmented', 
    'mixed_augmented_randomized', 
    'not_augmented_scrambled',
    'not_augmented_scrambled_randomized',
    'partial_augmented_5x_scrambled', 
    'partial_augmented_5x_scrambled_randomized',
    'partial_augmented_5x_partial', 
    'partial_augmented_5x_partial_randomized',
    'mixed_augmented_partial', 
    'mixed_augmented_partial_randomized',
    'partial_augmented_5x_partial_scrambled', 
    'partial_augmented_5x_partial_scrambled_randomized',
    ])

smiles_validity

Unnamed: 0,augmentation,top1,top2,top3
0,not_augmented,0.990343,0.992965,0.994206
1,not_augmented_randomized,0.767002,0.793075,0.808249
2,augmented_2x,0.994344,0.995861,0.996689
3,augmented_2x_randomized,0.979583,0.983308,0.987171
4,augmented_5x,0.995586,0.996413,0.996965
5,augmented_5x_randomized,0.991309,0.992965,0.993792
6,augmented_10x,0.996827,0.997655,0.997793
7,augmented_10x_randomized,0.99393,0.995724,0.996413
8,augmented_20x,0.996551,0.997793,0.998069
9,augmented_20x_randomized,0.993792,0.995586,0.995861


Full-assignment accuracy

In [None]:
accuracy = full_assignment_accuracy(directories=
    [
    'not_augmented',                                             
    'not_augmented_randomized', 
    'augmented_2x', 
    'augmented_2x_randomized',
    'augmented_5x', 
    'augmented_5x_randomized',
    'augmented_10x', 
    'augmented_10x_randomized',
    'augmented_20x', 
    'augmented_20x_randomized',
    'augmented_50x',
    'augmented_50x_randomized',
    'partial_augmented_5x', 
    'partial_augmented_5x_randomized',
    'partial_augmented_5x_scrambled',
    'partial_augmented_5x_scrambled_randomized',
    'mixed_augmented', 
    'mixed_augmented_randomized', 
    'not_augmented_scrambled',
    'not_augmented_scrambled_randomized',
    ])

accuracy = round(accuracy, 3)
accuracy[['top1', 'top2', 'top3']] = accuracy[['top1', 'top2', 'top3']]*100
accuracy

Unnamed: 0,augmentation,top1,top2,top3
0,not_augmented,56.3,67.4,71.4
1,not_augmented_randomized,22.5,27.3,30.5
2,augmented_2x,36.2,46.6,52.5
3,augmented_2x_randomized,45.4,56.1,61.5
4,augmented_5x,44.2,56.4,62.3
5,augmented_5x_randomized,50.4,62.2,68.3
6,augmented_10x,51.7,65.0,71.2
7,augmented_10x_randomized,54.4,67.3,73.6
8,augmented_20x,55.1,68.6,75.1
9,augmented_20x_randomized,55.6,68.7,74.8


Full-assignment accuracy for partial assignments

In [4]:
partial_accuracy = partial_full_assignment_accuracy(directories=
    ['partial_augmented_5x_partial', 
     'partial_augmented_5x_partial_randomized',
     'mixed_augmented_partial',
     'mixed_augmented_partial_randomized', 
     'partial_augmented_5x_partial_scrambled',
     'partial_augmented_5x_partial_scrambled_randomized'
     ])

partial_accuracy = round(partial_accuracy, 3)
partial_accuracy[['top1', 'top2', 'top3']] = partial_accuracy[['top1', 'top2', 'top3']]*100
partial_accuracy

Unnamed: 0,augmentation,top1,top2,top3
0,partial_augmented_5x_partial,64.6,76.6,80.6
1,partial_augmented_5x_partial_randomized,14.2,22.3,26.5
2,mixed_augmented_partial,51.7,68.6,75.1
3,mixed_augmented_partial_randomized,54.2,70.5,76.5
4,partial_augmented_5x_partial_scrambled,13.9,26.6,32.7
5,partial_augmented_5x_partial_scrambled_randomized,8.1,15.4,19.2


Per-stereocenter accuracy

In [5]:
wt_accuracy = per_stereocenter_accuracy(directories=
    [
    'not_augmented',                                             
    'not_augmented_randomized', 
    'augmented_2x', 
    'augmented_2x_randomized',
    'augmented_5x', 
    'augmented_5x_randomized',
    'augmented_10x', 
    'augmented_10x_randomized',
    'augmented_20x', 
    'augmented_20x_randomized',
    'augmented_50x',
    'augmented_50x_randomized',
    'partial_augmented_5x', 
    'partial_augmented_5x_randomized',
    'partial_augmented_5x_scrambled',
    'partial_augmented_5x_scrambled_randomized',
    'mixed_augmented', 
    'mixed_augmented_randomized', 
    'not_augmented_scrambled',
    'not_augmented_scrambled_randomized',
    ])

wt_accuracy = round(wt_accuracy, 3)
wt_accuracy[['top1', 'top2', 'top3']] = wt_accuracy[['top1', 'top2', 'top3']]*100
wt_accuracy

Unnamed: 0,augmentation,top1,top2,top3
0,not_augmented,78.7,86.9,89.2
1,not_augmented_randomized,41.0,49.7,53.9
2,augmented_2x,67.8,78.3,82.4
3,augmented_2x_randomized,72.5,81.7,85.4
4,augmented_5x,73.9,83.7,87.3
5,augmented_5x_randomized,76.8,85.9,89.3
6,augmented_10x,77.9,87.3,90.8
7,augmented_10x_randomized,79.3,88.2,91.5
8,augmented_20x,79.7,89.0,92.1
9,augmented_20x_randomized,79.8,88.8,91.8


Check chirality weighted accuracy (partial assignment)

In [6]:
weighted_partial_accuracy = partial_per_stereocenter_accuracy(directories=
    ['partial_augmented_5x_partial', 
     'partial_augmented_5x_partial_randomized',
     'mixed_augmented_partial',
     'mixed_augmented_partial_randomized', 
     'partial_augmented_5x_partial_scrambled',
     'partial_augmented_5x_partial_scrambled_randomized'
     ])

weighted_partial_accuracy = round(weighted_partial_accuracy, 3)
weighted_partial_accuracy[['top1', 'top2', 'top3']] = weighted_partial_accuracy[['top1', 'top2', 'top3']]*100
weighted_partial_accuracy

Unnamed: 0,augmentation,top1,top2,top3
0,partial_augmented_5x_partial,86.3,92.6,94.3
1,partial_augmented_5x_partial_randomized,46.0,56.6,61.4
2,mixed_augmented_partial,81.4,90.9,93.5
3,mixed_augmented_partial_randomized,82.1,91.1,93.6
4,partial_augmented_5x_partial_scrambled,53.3,69.1,74.3
5,partial_augmented_5x_partial_scrambled_randomized,37.1,48.8,53.5
