# Looking into the cases for which the SELFIES v0 could not make a valid SMILES

In [None]:
import sys
sys.path.append('../../')
import net.evolution_functions as evo 
from sa_scorer.sascorer import calculate_score
import pandas as pd

In [14]:
def calc_properties(smile):
    """Calculate logP for each molecule in unseen_smile_ls, and record results
    in locked dictionary props_collect
    """
 
    mol, smi_canon, did_convert = evo.sanitize_smiles(smile)
    logP = evo.get_logP(mol)  # Add calculation
    sas = calculate_score(mol)
    ring_penalty = calculate_ring_penalty(mol)
    return {
        'logP': logP, 
        'sas': sas, 
        'ring_penalty': ring_penalty
    }
    
def calculate_ring_penalty(mol):
    """Calculate Ring penalty for each molecule in unseen_smile_ls,
    results are recorded in locked dictionary props_collect
    """

    cycle_list = mol.GetRingInfo().AtomRings()
    if len(cycle_list) == 0:
        cycle_length = 0
    else:
        cycle_length = max([len(j) for j in cycle_list])
    if cycle_length <= 6:
        cycle_length = 0
    else:
        cycle_length = cycle_length - 6
    return cycle_length


def obtained_standardized_properties(
    logPs, SASs, ringPs
):
    """Obtain calculated properties of molecules in molecules_here, and standardize
    values base on properties of the Zinc Data set.
    """
    # Standardize logP based on zinc logP (mean: 2.4729421499641497 & std : 1.4157879815362406)
    logP_norm = (logPs - 2.4729421499641497) / 1.4157879815362406
    logP_norm = logP_norm.reshape((logPs.shape[0], 1))

    # Standardize SAS based on zinc SAS(mean: 3.0470797085649894    & std: 0.830643172314514)
    SAS_norm = (SASs - 3.0470797085649894) / 0.830643172314514
    SAS_norm = SAS_norm.reshape((SASs.shape[0], 1))

    # Standardize RingP based on zinc RingP(mean: 0.038131530820234766 & std: 0.2240274735210179)
    RingP_norm = (ringPs - 0.038131530820234766) / 0.2240274735210179
    RingP_norm = RingP_norm.reshape((ringPs.shape[0], 1))

    return (
        logP_norm,
        SAS_norm,
        RingP_norm,
    )




In [1]:
from selfies import decoder

In [3]:
with open('../../../random_baseline_failures', 'r') as fh:
    lines = fh.readlines()

In [7]:
smiles = []

for line in lines[:100000]:
    smiles.append(decoder(line.split(',')[0].strip()))

In [None]:
results = []
for smile in smiles:
    r = calc_properties(smiles[0])
    r['smile'] = smile
    r['j'] = r['logP'] - r['sas'] - r['ring_penalty']
    
    results.append(r)