In [1]:
import numpy as np
import pandas as pd
import urllib.request 
from rdkit import Chem
from rdkit.Chem import AllChem, Draw, Descriptors, PandasTools
from rdkit.ML.Descriptors import MoleculeDescriptors
from sklearn.model_selection import train_test_split

url = 'https://raw.githubusercontent.com/onecoinbuybus/Database_chemoinformatics/master/smiles_cas_N6512.smi'
urllib.request.urlretrieve(url, 'ames.txt') 
df = pd.read_csv('ames.txt',header=None, sep='\t') 
df.columns = ['smiles', 'CAS_NO', 'activity']
PandasTools.AddMoleculeColumnToFrame(frame=df, smilesCol='smiles')

none_list=[]
for i in range(df.shape[0]):
    if Chem.MolFromSmiles(df['smiles'][i]) is None:
        none_list.append(i)
        
df=df.drop(none_list)
mols=[Chem.MolFromSmiles(smile) for smile in df['smiles']]

maccskeys = []
for m in mols:
    maccskey = [x for x in AllChem.GetMACCSKeysFingerprint(m)]
    maccskeys.append(maccskey)

RDKit ERROR: [20:02:24] SMILES Parse Error: syntax error while parsing: NNC(=O)CNC(=O)\C=N\#N
RDKit ERROR: [20:02:24] SMILES Parse Error: Failed parsing SMILES 'NNC(=O)CNC(=O)\C=N\#N' for input: 'NNC(=O)CNC(=O)\C=N\#N'
RDKit ERROR: [20:02:24] SMILES Parse Error: syntax error while parsing: O=C1NC(=O)\C(=N/#N)\C=N1
RDKit ERROR: [20:02:24] SMILES Parse Error: Failed parsing SMILES 'O=C1NC(=O)\C(=N/#N)\C=N1' for input: 'O=C1NC(=O)\C(=N/#N)\C=N1'
RDKit ERROR: [20:02:24] SMILES Parse Error: syntax error while parsing: NC(=O)CNC(=O)\C=N\#N
RDKit ERROR: [20:02:24] SMILES Parse Error: Failed parsing SMILES 'NC(=O)CNC(=O)\C=N\#N' for input: 'NC(=O)CNC(=O)\C=N\#N'
RDKit ERROR: [20:02:24] SMILES Parse Error: syntax error while parsing: CCCCN(CC(O)C1=C\C(=N/#N)\C(=O)C=C1)N=O
RDKit ERROR: [20:02:24] SMILES Parse Error: Failed parsing SMILES 'CCCCN(CC(O)C1=C\C(=N/#N)\C(=O)C=C1)N=O' for input: 'CCCCN(CC(O)C1=C\C(=N/#N)\C(=O)C=C1)N=O'
RDKit ERROR: [20:02:24] SMILES Parse Error: syntax error while pars

In [2]:
descriptor_names = [x[0] for x in Descriptors._descList]
descriptor_calculator = MoleculeDescriptors.MolecularDescriptorCalculator(descriptor_names)

In [3]:
descriptors = [descriptor_calculator.CalcDescriptors(mol) for mol in mols]

In [4]:
x_1 = np.array(maccskeys)    
x_2 = np.array(descriptors)   

In [5]:
y = df['activity']

In [6]:
print(x_1.shape)
print(x_2.shape)
print(y.shape)

(6506, 167)
(6506, 200)
(6506,)


In [7]:
from random import random

import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, concatenate, Dense
from tensorflow.keras.models import Model

In [8]:
input1 = Input(shape=(x_1.shape[-1],))
input2 = Input(shape=(x_2.shape[-1],))

In [9]:
Layer_1 = Dense(128, activation="linear")(input1)
outputs_1 = Model(inputs=input1, outputs=Layer_1)
Layer_2 = Dense(128, activation="linear")(input2)
outputs_2 = Model(inputs=input2, outputs=Layer_2)

In [10]:
combined = concatenate([outputs_1.output, outputs_2.output])

In [11]:
Combined_layer = Dense(64, activation="relu")(combined)
out_put = Dense(1, activation="sigmoid")(Combined_layer)

In [12]:
model = Model(inputs=[outputs_1.input, outputs_2.input], outputs=out_put)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 167)]        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 200)]        0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 128)          21504       input_1[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 128)          25728       input_2[0][0]                    
____________________________________________________________________________________________

In [13]:
history = model.fit([x_1, x_2], y, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
