# Search for the very best candidates

First we collect all the data from all evolutionary experiments.

In [1]:
import glob
from rdkit import Chem, DataStructs

from rdkit import RDLogger  
RDLogger.DisableLog('rdApp.*')

totFitFiles=0
allCandidates=[]
unqCandidates=[]
unqCandidatesWFitness=[]
fitnesses=[]
molNames=[]
pathNames=[]
unqUIDs=[]
for fitFile in glob.glob("../run_*/*/*_FIT.sdf"):
    totFitFiles=totFitFiles+1;
    supplier = Chem.SDMolSupplier(fitFile);
    i=0
    for mol in supplier:
        i=i+1;
        mol.SetProp('Pathname',fitFile)
        allCandidates.append(mol)
        inchi = Chem.inchi.MolToInchi(mol)
        inchikey = Chem.inchi.InchiToInchiKey(inchi)
        if inchikey == '':
            print ('ERROR! Empty InChIKey.');
            break;
        if not inchikey in unqUIDs:
            unqCandidates.append(mol)
            unqUIDs.append(inchikey)
            if mol.HasProp('FITNESS'):
                unqCandidatesWFitness.append(mol)
                fitnesses.append(float(mol.GetProp('FITNESS')))
                molNames.append(mol.GetProp('_Name'))
                pathNames.append(fitFile)
    if i > 1:
        print ('ERROR! One SDF file is expected to contain only one molecule!');
        break;
    
print('Total imported FIT files:      ',totFitFiles);
print('Total candidates:              ',len(allCandidates));
print('Unique InChIKey:               ',len(unqCandidates));
print('Unique InChIKey with fitness:  ',len(unqCandidatesWFitness));

sorted(zip(fitnesses,pathNames),reverse=True)

print('Candidates listed accoring to decreasing fitness')
allNames = ""
for f,n in sorted(zip(fitnesses,pathNames),reverse=True):
    allNames = allNames + " " + n
    
print(allNames)

Total imported FIT files:       2523
Total candidates:               2523
Unique InChIKey:                1435
Unique InChIKey with fitness:   1164
Candidates listed accoring to decreasing fitness
 ../run_2.0/Gen071/M00000407_FIT.sdf ../run_2.0/Gen082/M00000459_FIT.sdf ../run_2.0/Gen054/M00000317_FIT.sdf ../run_2.0/Gen049/M00000285_FIT.sdf ../run_2.0/Final/M00000167_FIT.sdf ../run_2.0/Gen061/M00000362_FIT.sdf ../run_1.1/Final/M00000299_FIT.sdf ../run_2.0/Final/M00000258_FIT.sdf ../run_1.1/Final/M00000306_FIT.sdf ../run_2.0/Gen031/M00000181_FIT.sdf ../run_1.1/Gen056/M00000369_FIT.sdf ../run_2.0/Gen090/M00000512_FIT.sdf ../run_2.0/Gen037/M00000214_FIT.sdf ../run_2.0/Gen057/M00000340_FIT.sdf ../run_2.0/Final/M00000410_FIT.sdf ../run_2.0/Gen037/M00000217_FIT.sdf ../run_2.0/Final/M00000482_FIT.sdf ../run_2.0/Gen097/M00000545_FIT.sdf ../run_2.0/Gen084/M00000472_FIT.sdf ../run_2.0/Gen039/M00000228_FIT.sdf ../run_1.1/Final/M00000300_FIT.sdf ../run_2.0/Gen065/M00000386_FIT.sdf ../run_2.0/Gen053