In [None]:
import numpy as np
import random
import rdkit as rd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Descriptors
from rdkit.Chem import Draw 
from rdkit.Chem.Draw import IPythonConsole
from rdkit.DataStructs.cDataStructs import TanimotoSimilarity
from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions

In [None]:
#Determines LogP
def logP(smiles):
    
    return(Descriptors.MolLogP(Chem.MolFromSmiles(smiles)))

#Determins molecular weight
def molWt(smiles):
    
    return(Descriptors.MolWt(Chem.MolFromSmiles(smiles)))

#Determine number hydrogen bond acceptors
def numAcc(smiles):
    
    return(Descriptors.NumHAcceptors(Chem.MolFromSmiles(smiles)))

#Determine number hydrogen bond donors
def numDon(smiles):
    
    return(Descriptors.NumHDonors(Chem.MolFromSmiles(smiles)))

#Determine polar surface area
def polSur(smiles):
    
    return(Descriptors.TPSA(Chem.MolFromSmiles(smiles)))

#Determine number of rotatable bonds
def rolBon(smiles):
    return(Descriptors.NumRotatableBonds(Chem.MolFromSmiles(smiles)))

In [None]:
#Properties of random sample of training data molecules
molProps = np.empty((0,5))

#Get random sample of "nummol" molecules in the training data 
nummols = 10000
moleculelines = open("smiles.txt").read().splitlines()
molecules = random.sample(moleculelines, nummols)

for molecule in molecules:
    molecule = molecule.replace("G", "")
    try:
        #Array of properties
        props = np.reshape(np.array([logP(molecule), molWt(molecule), numAcc(molecule), numDon(molecule), rolBon(molecule)]), (1, 5))
        #Append properties
        molProps = np.append(molProps, props, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        molecule.replace(molecule, "")
        continue

In [None]:
#Properties of random sample of training data molecules
molProps = np.empty((0,5))

#Get random sample of "nummol" molecules in the training data 
nummols = 1
moleculelines = open("genmols.txt").read().splitlines()
molecules = random.sample(moleculelines, nummols)

print(molecules)

In [None]:
#Draw molecules
Chem.MolFromSmiles(molecules)

In [None]:
DrawingOptions.bondLineWidth=12
Chem.Draw.MolToFile(Chem.MolFromSmiles("O1OC(=O)C(=C1)NC(=O)c2ccccc2F"), "mol.png", size=(2000, 2000), fitImage = True)

In [None]:
#Properties of generated molecules
molProps1 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols.txt", "r"):
    try:
        #Array of properties
        props1 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps1 = np.append(molProps1, props1, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue

In [None]:
#Properties of generated molecules
molProps2 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols1.txt", "r"):
    try:
        #Array of properties
        props2 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps2 = np.append(molProps2, props2, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue

In [None]:
#Properties of generated molecules
molProps3 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols2.txt", "r"):
    try:
        #Array of properties
        props3 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps3 = np.append(molProps3, props3, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue

In [None]:
#Properties of generated molecules
molProps4 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols3.txt", "r"):
    try:
        #Array of properties
        props4 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps4 = np.append(molProps4, props4, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue

In [None]:
#Properties of generated molecules
molProps5 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols4.txt", "r"):
    try:
        #Array of properties
        props5 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps5 = np.append(molProps5, props5, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue

In [None]:
#Properties of generated molecules
molProps6 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols5.txt", "r"):
    try:
        #Array of properties
        props6 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps6 = np.append(molProps6, props6, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue

In [None]:
#PCA visualization
from sklearn.preprocessing import StandardScaler 
from sklearn.decomposition import PCA
printmols = np.concatenate((molProps1, molProps2, molProps3, molProps4, molProps5, molProps6), axis=0)
#Scale array of properties for all molecules
scaledProps = StandardScaler().fit_transform(printmols)

#Apply PCA
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(scaledProps)
print(np.shape(principalComponents))
print(np.shape(scaledProps))

In [None]:
print(np.shape(molProps1)[0])
print(np.shape(molProps2)[0] + np.shape(molProps1)[0]) 
print(np.shape(molProps3)[0] + np.shape(molProps2)[0] + np.shape(molProps1)[0])
print(np.shape(molProps4)[0] + np.shape(molProps3)[0] + np.shape(molProps2)[0] + np.shape(molProps1)[0])
print(np.shape(molProps5)[0] + np.shape(molProps4)[0] + np.shape(molProps3)[0] + np.shape(molProps2)[0] + np.shape(molProps1)[0])
print(np.shape(molProps6)[0] + np.shape(molProps5)[0] + np.shape(molProps4)[0] + np.shape(molProps3)[0] + np.shape(molProps2)[0] + np.shape(molProps1)[0])

In [None]:
#Graph
import matplotlib.pyplot as plt

fig = plt.figure()
plt.scatter(principalComponents[0:9414,0],principalComponents[0:9414,1], color = "#293352")
plt.scatter(principalComponents[9414:18820,0],principalComponents[9414:18820,1], color = "#52854C")
plt.scatter(principalComponents[18820:28195,0],principalComponents[18820:28195,1], color = "#D16103")
plt.scatter(principalComponents[28195:39778,0],principalComponents[28195:39778,1], color = "#8C8179")
plt.scatter(principalComponents[39778:,0],principalComponents[39778:,1], color = "#4E84C4")
plt.scatter(principalComponents[51207:,0],principalComponents[51207:,1], color = "#A51C30")

plt.show(fig)

In [None]:
fig.savefig("progression.png", dpi=600)

In [None]:
fig.savefig("rolBon.png", dpi=600)

In [None]:
molProps1 = np.empty((0,8))
#Read in data file line by line
for line in open("finestmols.txt", "r"):
    line = line.replace("G", "")
    try:
        #Array of properties
        props1 = np.reshape(np.array(QEDProp(line)), (1, 8))
        #Append properties
        molProps1 = np.append(molProps1, props1, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue
        
molProps2 = np.empty((0,8))
#Read in data file line by line
for line in open("genmols.txt", "r"):
    line = line.replace("G", "")
    try:
        #Array of properties
        props2 = np.reshape(np.array(QEDProp(line)), (1, 8))
        #Append properties
        molProps2 = np.append(molProps2, props2, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue        

from sklearn.preprocessing import StandardScaler 
from sklearn.decomposition import PCA
printmols = np.concatenate((molProps2, molProps1), axis=0)
#Scale array of properties for all molecules
scaledProps = StandardScaler().fit_transform(printmols)
print(np.shape(molProps1))
print(np.shape(molProps2))
print(np.shape(printmols))

#Apply PCA
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(scaledProps)

In [None]:
print(np.shape(principalComponents[np.shape(molProps2)[0]:,0]))
print(np.shape(molProps2))

In [None]:
#Graph
import matplotlib.pyplot as plt

fig = plt.figure()
plt.scatter(printmols[0:np.shape(molProps2)[0],0],printmols[0:np.shape(molProps2)[0],1], color = "#293352")
plt.scatter(printmols[np.shape(molProps2)[0]:,0],printmols[np.shape(molProps2)[0]:,1], facecolors="none", color = "#A51C30")
plt.show(fig)

In [None]:
molProps1 = np.empty((0,5))
#Read in data file line by line
for line in open("finestmols.txt", "r"):
    line = line.replace("G", "")
    try:
        #Array of properties
        props1 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))

        #Append properties
        molProps1 = np.append(molProps1, props1, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue
        
molProps2 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols.txt", "r"):
    line = line.replace("G", "")
    try:
        #Array of properties
        props2 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))

        #Append properties
        molProps2 = np.append(molProps2, props2, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue        

from sklearn.preprocessing import StandardScaler 
from sklearn.decomposition import PCA
printmols = np.concatenate((molProps2, molProps1), axis=0)
#Scale array of properties for all molecules
scaledProps = StandardScaler().fit_transform(printmols)
print(np.shape(molProps1))
print(np.shape(molProps2))
print(np.shape(printmols))

#Apply PCA
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(scaledProps)

In [None]:
#Graph
import matplotlib.pyplot as plt

fig = plt.figure()
plt.scatter(principalComponents[0:np.shape(molProps2)[0],0],principalComponents[0:np.shape(molProps2)[0],1], color = "#293352")
plt.scatter(principalComponents[np.shape(molProps2)[0]:,0],principalComponents[np.shape(molProps2)[0]:,1], facecolors="none", color = "#A51C30")
plt.show(fig)

In [None]:
molProps1 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols.txt", "r"):
    try:
        #Array of properties [partition coefficient, molecular weight, number of hydrogen bond acceptors, number of hydrogen bond donors, number of rotatable bonds]
        props1 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps1 = np.append(molProps1, props1, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue
        
molProps2 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols1.txt", "r"):
    try:
        #Array of properties [partition coefficient, molecular weight, number of hydrogen bond acceptors, number of hydrogen bond donors, number of rotatable bonds]
        props2 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps2 = np.append(molProps2, props2, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue
        
molProps3 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols2.txt", "r"):
    try:
        #Array of properties [partition coefficient, molecular weight, number of hydrogen bond acceptors, number of hydrogen bond donors, number of rotatable bonds]
        props3 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps3 = np.append(molProps3, props3, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue
        
molProps4 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols3.txt", "r"):
    try:
        #Array of properties [partition coefficient, molecular weight, number of hydrogen bond acceptors, number of hydrogen bond donors, number of rotatable bonds]
        props4 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps4 = np.append(molProps4, props4, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue
        
molProps5 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols4.txt", "r"):
    try:
        #Array of properties [partition coefficient, molecular weight, number of hydrogen bond acceptors, number of hydrogen bond donors, number of rotatable bonds]
        props5 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps5 = np.append(molProps5, props5, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue

In [None]:
molProps6 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols5.txt", "r"):
    try:
        #Array of properties [partition coefficient, molecular weight, number of hydrogen bond acceptors, number of hydrogen bond donors, number of rotatable bonds]
        props6 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps6 = np.append(molProps6, props6, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue

In [None]:
molProps7 = np.empty((0,5))
#Read in data file line by line
for line in open("genmols6.txt", "r"):
    try:
        #Array of properties [partition coefficient, molecular weight, number of hydrogen bond acceptors, number of hydrogen bond donors, number of rotatable bonds]
        props7 = np.reshape(np.array([logP(line), molWt(line), numAcc(line), numDon(line), rolBon(line)]), (1, 5))
        #Append properties
        molProps7 = np.append(molProps7, props7, axis=0)
        
    except:
        #Occasionally RDKit bugs don't allow for analyzing the molecule; in these cases, simply remove the molecule
        line.replace(line, "")
        continue

In [None]:
print(np.shape(molProps1))
print(np.shape(molProps2))
print(np.shape(molProps3))
print(np.shape(molProps4))
print(np.shape(molProps5))
print(np.shape(molProps6))

In [None]:
satisfies1 = np.empty((0,5))
for i in range(np.shape(molProps1)[0]):
    satlogp = 0
    satmolwt = 0
    satacc = 0
    satdon = 0
    satbon = 0
    
    if molProps1[i,0] <= 3: 
        satlogp += 1
    if molProps1[i,1] <= 480: 
        satmolwt += 1
    if molProps1[i,2] <= 3: 
        satacc += 1
    if molProps1[i,3] <= 3: 
        satdon += 1
    if molProps1[i,4] <= 3: 
        satbon += 1
    
    satisfy = np.reshape(np.array([satlogp, satmolwt, satacc, satdon, satbon]), (1, 5))
    satisfies1 = np.append(satisfies1, satisfy, axis=0)

In [None]:
satisfies2 = np.empty((0,5))
for i in range(np.shape(molProps2)[0]):
    satlogp = 0
    satmolwt = 0
    satacc = 0
    satdon = 0
    satbon = 0
    
    if molProps2[i,0] <= 3: 
        satlogp += 1
    if molProps2[i,1] <= 480: 
        satmolwt += 1
    if molProps2[i,2] <= 3: 
        satacc += 1
    if molProps2[i,3] <= 3: 
        satdon += 1
    if molProps2[i,4] <= 3: 
        satbon += 1
    
    satisfy = np.reshape(np.array([satlogp, satmolwt, satacc, satdon, satbon]), (1, 5))
    satisfies2 = np.append(satisfies2, satisfy, axis=0)

In [None]:
satisfies3 = np.empty((0,5))
for i in range(np.shape(molProps3)[0]):
    satlogp = 0
    satmolwt = 0
    satacc = 0
    satdon = 0
    satbon = 0
    
    if molProps3[i,0] <= 3: 
        satlogp += 1
    if molProps3[i,1] <= 480: 
        satmolwt += 1
    if molProps3[i,2] <= 3: 
        satacc += 1
    if molProps3[i,3] <= 3: 
        satdon += 1
    if molProps3[i,4] <= 3: 
        satbon += 1
    
    satisfy = np.reshape(np.array([satlogp, satmolwt, satacc, satdon, satbon]), (1, 5))
    satisfies3 = np.append(satisfies3, satisfy, axis=0)

In [None]:
satisfies4 = np.empty((0,5))
for i in range(np.shape(molProps4)[0]):
    satlogp = 0
    satmolwt = 0
    satacc = 0
    satdon = 0
    satbon = 0
    
    if molProps4[i,0] <= 3: 
        satlogp += 1
    if molProps4[i,1] <= 480: 
        satmolwt += 1
    if molProps4[i,2] <= 3: 
        satacc += 1
    if molProps4[i,3] <= 3: 
        satdon += 1
    if molProps4[i,4] <= 3: 
        satbon += 1
    
    satisfy = np.reshape(np.array([satlogp, satmolwt, satacc, satdon, satbon]), (1, 5))
    satisfies4 = np.append(satisfies4, satisfy, axis=0)

In [None]:
satisfies5 = np.empty((0,5))
for i in range(np.shape(molProps5)[0]):
    satlogp = 0
    satmolwt = 0
    satacc = 0
    satdon = 0
    satbon = 0
    
    if molProps5[i,0] <= 3: 
        satlogp += 1
    if molProps5[i,1] <= 480: 
        satmolwt += 1
    if molProps5[i,2] <= 3: 
        satacc += 1
    if molProps5[i,3] <= 3: 
        satdon += 1
    if molProps5[i,4] <= 3: 
        satbon += 1
    
    satisfy = np.reshape(np.array([satlogp, satmolwt, satacc, satdon, satbon]), (1, 5))
    satisfies5 = np.append(satisfies5, satisfy, axis=0)

In [None]:
satisfies6 = np.empty((0,5))
for i in range(np.shape(molProps6)[0]):
    satlogp = 0
    satmolwt = 0
    satacc = 0
    satdon = 0
    satbon = 0
    
    if molProps6[i,0] <= 3: 
        satlogp += 1
    if molProps6[i,1] <= 480: 
        satmolwt += 1
    if molProps6[i,2] <= 3: 
        satacc += 1
    if molProps6[i,3] <= 3: 
        satdon += 1
    if molProps6[i,4] <= 3: 
        satbon += 1
    
    satisfy = np.reshape(np.array([satlogp, satmolwt, satacc, satdon, satbon]), (1, 5))
    satisfies6 = np.append(satisfies6, satisfy, axis=0)

In [None]:
percentgood = np.empty((0,5))

In [None]:
good1 = 0
good2 = 0
good3 = 0
good4 = 0
good5 = 0

for i in range(np.shape(satisfies1)[0]):
    if np.sum(satisfies1[i,:]) >= 1:
        good1 += 1
    if np.sum(satisfies1[i,:]) >= 2:
        good2 += 1
    if np.sum(satisfies1[i,:]) >= 3:
        good3 += 1
    if np.sum(satisfies1[i,:]) >= 4:
        good4 += 1        
    if np.sum(satisfies1[i,:]) >= 5:
        good5 += 1        
        
percentgood1 = 100 * good1 / np.shape(satisfies1)[0]
percentgood2 = 100 * good2 / np.shape(satisfies1)[0]
percentgood3 = 100 * good3 / np.shape(satisfies1)[0]
percentgood4 = 100 * good4 / np.shape(satisfies1)[0]
percentgood5 = 100 * good5 / np.shape(satisfies1)[0]
percentgood = np.append(percentgood, np.reshape(np.array([percentgood1, percentgood2, percentgood3, percentgood4, percentgood5]), (1,5)), axis = 0)
print(percentgood)

In [None]:
good1 = 0
good2 = 0
good3 = 0
good4 = 0
good5 = 0

for i in range(np.shape(satisfies2)[0]):
    if np.sum(satisfies2[i,:]) >= 1:
        good1 += 1
    if np.sum(satisfies2[i,:]) >= 2:
        good2 += 1
    if np.sum(satisfies2[i,:]) >= 3:
        good3 += 1
    if np.sum(satisfies2[i,:]) >= 4:
        good4 += 1        
    if np.sum(satisfies2[i,:]) >= 5:
        good5 += 1        
        
percentgood1 = 100 * good1 / np.shape(satisfies2)[0]
percentgood2 = 100 * good2 / np.shape(satisfies2)[0]
percentgood3 = 100 * good3 / np.shape(satisfies2)[0]
percentgood4 = 100 * good4 / np.shape(satisfies2)[0]
percentgood5 = 100 * good5 / np.shape(satisfies2)[0]

percentgood = np.append(percentgood, np.reshape(np.array([percentgood1, percentgood2, percentgood3, percentgood4, percentgood5]), (1,5)), axis = 0)
print(percentgood)

In [None]:
good1 = 0
good2 = 0
good3 = 0
good4 = 0
good5 = 0

for i in range(np.shape(satisfies3)[0]):
    if np.sum(satisfies3[i,:]) >= 1:
        good1 += 1
    if np.sum(satisfies3[i,:]) >= 2:
        good2 += 1
    if np.sum(satisfies3[i,:]) >= 3:
        good3 += 1
    if np.sum(satisfies3[i,:]) >= 4:
        good4 += 1        
    if np.sum(satisfies3[i,:]) >= 5:
        good5 += 1        
        
percentgood1 = 100 * good1 / np.shape(satisfies3)[0]
percentgood2 = 100 * good2 / np.shape(satisfies3)[0]
percentgood3 = 100 * good3 / np.shape(satisfies3)[0]
percentgood4 = 100 * good4 / np.shape(satisfies3)[0]
percentgood5 = 100 * good5 / np.shape(satisfies3)[0]

percentgood = np.append(percentgood, np.reshape(np.array([percentgood1, percentgood2, percentgood3, percentgood4, percentgood5]), (1,5)), axis = 0)
print(percentgood)

In [None]:
good1 = 0
good2 = 0
good3 = 0
good4 = 0
good5 = 0

for i in range(np.shape(satisfies4)[0]):
    if np.sum(satisfies4[i,:]) >= 1:
        good1 += 1
    if np.sum(satisfies4[i,:]) >= 2:
        good2 += 1
    if np.sum(satisfies4[i,:]) >= 3:
        good3 += 1
    if np.sum(satisfies4[i,:]) >= 4:
        good4 += 1        
    if np.sum(satisfies4[i,:]) >= 5:
        good5 += 1        
        
percentgood1 = 100 * good1 / np.shape(satisfies4)[0]
percentgood2 = 100 * good2 / np.shape(satisfies4)[0]
percentgood3 = 100 * good3 / np.shape(satisfies4)[0]
percentgood4 = 100 * good4 / np.shape(satisfies4)[0]
percentgood5 = 100 * good5 / np.shape(satisfies4)[0]

percentgood = np.append(percentgood, np.reshape(np.array([percentgood1, percentgood2, percentgood3, percentgood4, percentgood5]), (1,5)), axis = 0)
print(percentgood)

In [None]:
good1 = 0
good2 = 0
good3 = 0
good4 = 0
good5 = 0

for i in range(np.shape(satisfies5)[0]):
    if np.sum(satisfies5[i,:]) >= 1:
        good1 += 1
    if np.sum(satisfies5[i,:]) >= 2:
        good2 += 1
    if np.sum(satisfies5[i,:]) >= 3:
        good3 += 1
    if np.sum(satisfies5[i,:]) >= 4:
        good4 += 1        
    if np.sum(satisfies5[i,:]) >= 5:
        good5 += 1        
        
percentgood1 = 100 * good1 / np.shape(satisfies5)[0]
percentgood2 = 100 * good2 / np.shape(satisfies5)[0]
percentgood3 = 100 * good3 / np.shape(satisfies5)[0]
percentgood4 = 100 * good4 / np.shape(satisfies5)[0]
percentgood5 = 100 * good5 / np.shape(satisfies5)[0]

percentgood = np.append(percentgood, np.reshape(np.array([percentgood1, percentgood2, percentgood3, percentgood4, percentgood5]), (1,5)), axis = 0)
print(percentgood)

In [None]:
good1 = 0
good2 = 0
good3 = 0
good4 = 0
good5 = 0

for i in range(np.shape(satisfies6)[0]):
    if np.sum(satisfies6[i,:]) >= 1:
        good1 += 1
    if np.sum(satisfies6[i,:]) >= 2:
        good2 += 1
    if np.sum(satisfies6[i,:]) >= 3:
        good3 += 1
    if np.sum(satisfies6[i,:]) >= 4:
        good4 += 1        
    if np.sum(satisfies6[i,:]) >= 5:
        good5 += 1        
        
percentgood1 = 100 * good1 / np.shape(satisfies6)[0]
percentgood2 = 100 * good2 / np.shape(satisfies6)[0]
percentgood3 = 100 * good3 / np.shape(satisfies6)[0]
percentgood4 = 100 * good4 / np.shape(satisfies6)[0]
percentgood5 = 100 * good5 / np.shape(satisfies6)[0]

percentgood = np.append(percentgood, np.reshape(np.array([percentgood1, percentgood2, percentgood3, percentgood4, percentgood5]), (1,5)), axis = 0)
print(percentgood)

In [None]:
import pandas as pd
import seaborn as sns
pandagood1 = pd.DataFrame(percentgood[:,0])
pandagood2 = pd.DataFrame(percentgood[:,1])
pandagood3 = pd.DataFrame(percentgood[:,2])
pandagood4 = pd.DataFrame(percentgood[:,3])
pandagood5 = pd.DataFrame(percentgood[:,4])

sns.set(style="white")
#percent generated molecules meeting at least 1 criteria
ax = sns.lineplot(data = pandagood1, linewidth = 3, palette=["#A51C30"], legend = False)
#percent generated molecules meeting at least 2 criteria
ax = sns.lineplot(data = pandagood2, linewidth = 3, palette=["#293352"], legend = False)
#percent generated molecules meeting at least 3 criteria
ax = sns.lineplot(data = pandagood3, linewidth = 3, palette=["#52854C"], legend = False)
#percent generated molecules meeting at least 4 criteria
ax = sns.lineplot(data = pandagood4, linewidth = 3, palette=["#D16103"], legend = False)
#percent generated molecules meeting at least 5 criteria
ax = sns.lineplot(data = pandagood5, linewidth = 3, palette=["#8C8179"], legend = False)

In [None]:
fig = ax.get_figure()
fig.savefig("satisfies.png", dpi=600)