# final_model_Keras_without_fs

In [2]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 1

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']= '0'

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
#tf.random.set_seed(seed_value)
# for later versions: 
tf.compat.v1.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session
from keras import backend as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
# for later versions:
# session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
# sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
# tf.compat.v1.keras.backend.set_session(sess)


# Resource
# https://stackoverflow.com/questions/45230448/how-to-get-reproducible-result-when-running-keras-with-tensorflow-backend?rq=1
# https://stackoverflow.com/questions/32419510/how-to-get-reproducible-results-in-keras/52897216#52897216

In [3]:
from rdkit import Chem
from rdkit.Chem import AllChem

def get_ecfc(smiles_list, radius=2, nBits=2048, useCounts=True):
    """
    Calculates the ECFP fingerprint for given SMILES list
    
    :param smiles_list: List of SMILES
    :type smiles_list: list
    :param radius: The ECPF fingerprints radius.
    :type radius: int
    :param nBits: The number of bits of the fingerprint vector.
    :type nBits: int
    :param useCounts: Use count vector or bit vector.
    :type useCounts: bool
    :returns: The calculated ECPF fingerprints for the given SMILES
    :rtype: Dataframe
    """     
    
    ecfp_fingerprints=[]
    erroneous_smiles=[]
    for smiles in smiles_list:
        mol=Chem.MolFromSmiles(smiles)
        if mol is None:
            ecfp_fingerprints.append([None]*nBits)
            erroneous_smiles.append(smiles)
        else:
            mol=Chem.AddHs(mol)
            if useCounts:
                ecfp_fingerprints.append(list(AllChem.GetHashedMorganFingerprint(mol, radius, nBits)))  
            else:    
                ecfp_fingerprints.append(list(AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits).ToBitString()))  
    
    # Create dataframe of fingerprints
    df_ecfp_fingerprints = pd.DataFrame(data = ecfp_fingerprints, index = smiles_list)
    # Remove erroneous data
    if len(erroneous_smiles)>0:
        print("The following erroneous SMILES have been found in the data:\n{}.\nThe erroneous SMILES will be removed from the data.".format('\n'.join(map(str, erroneous_smiles))))           
        df_ecfp_fingerprints = df_ecfp_fingerprints.dropna(how='any')    
    
    return df_ecfp_fingerprints

In [4]:
from sklearn.model_selection import train_test_split
import numpy as np 
import pandas as pd

In [5]:
#Get and Arrange data
import pandas as pd
df_data= pd.read_csv('all_data.csv')

train_data = df_data[df_data['data_type'] == 0]
test1_data = df_data[df_data['data_type'] == 1]
test2_data = df_data[df_data['data_type'] == 2]

In [6]:
train_encoded = get_ecfc(train_data["reactant_smiles"])
test1_encoded = get_ecfc(test1_data["reactant_smiles"])
test2_encoded = get_ecfc(test2_data["reactant_smiles"])

In [7]:
# def modeling(train_data, test1_data, test2_data, encoder, model):  - After deciding the list of the smiles

def modeling(train_encoded, test1_encoded, test2_encoded, model):
    
    from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
    import time
    start_time = time.time()
    
    # Training
    X = train_encoded
    y = train_data['reaction_energy']
    
    model.fit(X.values, y)
    
    # Predicting
    pred_train = model.predict(train_encoded.values)
    pred_test1 = model.predict(test1_encoded.values)
    pred_test2 = model.predict(test2_encoded.values)
    
    
    # Scores of Train Data 
    tr_mae = mean_absolute_error(y, pred_train)
    tr_rmse = mean_squared_error(y ,pred_train , squared=False)
    tr_r2 = r2_score(y, pred_train)
    print('##########################  Scores of Train Data  ##########################')
    print('Train set MAE of {}: {:.3f}'.format(model, tr_mae))
    print('Train set RMSE of {}: {:.3f}'.format(model, tr_rmse))
    print('Train set R2 Score of {}: {:.3f}'.format(model, tr_r2))
    
    print("----------------------------------------------------------------------------")
    
    # Test1 Data
    test1_mae = mean_absolute_error(test1_data['reaction_energy'], pred_test1)
    test1_rmse = mean_squared_error(test1_data['reaction_energy'], pred_test1, squared=False)
    test1_r2 = r2_score(test1_data['reaction_energy'], pred_test1)
    print('##########################  Scores of Test1 Data  ##########################')
    print('Test1 set MAE of {}: {:.3f}'.format(model, test1_mae))
    print('Test1 set RMSE of {}: {:.3f}'.format(model, test1_rmse))
    print('Test1 set R2 Score of {}: {:.3f}'.format(model, test1_r2))
    
    print("----------------------------------------------------------------------------")
    
    # Test2 Data
    test2_mae = mean_absolute_error(test2_data['reaction_energy'], pred_test2)
    test2_rmse = mean_squared_error(test2_data['reaction_energy'], pred_test2, squared=False)
    test2_r2 = r2_score(test2_data['reaction_energy'], pred_test2)
    print('##########################  Scores of Test2 Data  ##########################')
    print('Test2 set MAE of {}: {:.3f}'.format(model, test2_mae))
    print('Test2 set RMSE of {}: {:.3f}'.format(model, test2_rmse))
    print('Test2 set R2 Score of {}: {:.3f}'.format(model, test2_r2))
    
    print("----------------------------------------------------------------------------")

    elapsed_time = time.time() - start_time
    print('##########################  Details  ##########################')
    print(f'{elapsed_time:.2f}s elapsed during modeling')

# INITIAL RUNNING

In [8]:
#import random
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error



def BuildModel(input_dim=None):
    
    def model():
        keras_model = Sequential()
        keras_model.add(Dense(128, input_dim=input_dim,activation='relu')) 
        keras_model.add(Dense(32, activation='relu')) 
        keras_model.add(Dense(8,activation='relu')) 
        keras_model.add(Dense(1,activation='linear'))
        keras_model.summary()
        keras_model.compile(loss="mean_squared_error", optimizer="adam")   
        return keras_model
    return model

In [9]:
# Model

#input_dim = 2048
#model = KerasRegressor(build_fn=BuildModel(input_dim = 2048), nb_epoch=100, batch_size=3)
model = KerasRegressor(build_fn=BuildModel(input_dim = 2048), epochs=10, batch_size=5)


# Training
#np.random.seed(123)
modeling(train_encoded=train_encoded, test1_encoded=test1_encoded, test2_encoded=test2_encoded, model=model)

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               262272    
_________________________________________________________________
dense_2 (Dense)              (None, 32)                4128      
_________________________________________________________________
dense_3 (Dense)              (None, 8)                 264       
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 9         
Total params: 266,673
Trainable params: 266,673
Non-trainable params: 0
_________________________________________________________________
Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
##########################  Scores of

In [None]:
Epoch 10/10
12707/12707 [==============================] - 15s 1ms/step - loss: 6.3266e-05
##########################  Scores of Train Data  ##########################
Train set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000001E1C92ECCC8>: 0.004
Train set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000001E1C92ECCC8>: 0.007
Train set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000001E1C92ECCC8>: 0.967
----------------------------------------------------------------------------
##########################  Scores of Test1 Data  ##########################
Test1 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000001E1C92ECCC8>: 0.004
Test1 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000001E1C92ECCC8>: 0.008
Test1 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000001E1C92ECCC8>: 0.963
----------------------------------------------------------------------------
##########################  Scores of Test2 Data  ##########################
Test2 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000001E1C92ECCC8>: 0.006
Test2 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000001E1C92ECCC8>: 0.008
Test2 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000001E1C92ECCC8>: 0.925
----------------------------------------------------------------------------
##########################  Details  ##########################
140.82s elapsed during modeling

# 1- This is for after Lasso FS !!!  --------   Final Best: 0.957283 using {'activation': 'sigmoid', 'batch_size': 16, 'dropout_rate': 0.0, 'input_dim': 491, 'learn_rate': 0.1, 'momentum': 0.4, 'optimizer': 'RMSprop'}

In [7]:
import random
import numpy as np
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error
import time

import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
# Function to create model, required for KerasClassifier


def create_model(optimizer='RMSprop', learn_rate=0.1, momentum=0.4, activation='sigmoid', dropout_rate=0.0):
    
    keras_model = Sequential()
    keras_model.add(Dense(128, input_dim=train_encoded.shape[1], activation=activation))
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(32, activation=activation)) 
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(8,activation=activation)) 
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(1,activation='linear'))
    keras_model.summary()
    # Compile model
    keras_model.compile(loss='mean_squared_error', optimizer=optimizer)

    return keras_model

In [8]:
# Early stopping
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='loss', patience=10, verbose=1)


# Model

#input_dim = 2048
#model = KerasRegressor(build_fn=BuildModel(input_dim = 2048), nb_epoch=100, batch_size=3)
model = KerasRegressor(build_fn=create_model, batch_size=16, epochs=300, callbacks=[early_stopping])


# Training
#np.random.seed(123)
modeling(train_encoded=train_encoded, test1_encoded=test1_encoded, test2_encoded=test2_encoded, model=model)

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               262272    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                4128      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 8)                 264       
_________________________________________________________________
dropout_3 (Dropout)          (None, 8)                 0         
_________________________________________________________________
dens

## The problem is we optimized for after feature selection. Probably because of the momentum.  

In [None]:
The problem is we optimized for after feature selection.


Epoch 242/300
12707/12707 [==============================] - 5s 355us/step - loss: 4.1171e-05
Epoch 00242: early stopping
##########################  Scores of Train Data  ##########################
Train set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002B28E0A0588>: 0.003
Train set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002B28E0A0588>: 0.005
Train set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002B28E0A0588>: 0.981
----------------------------------------------------------------------------
##########################  Scores of Test1 Data  ##########################
Test1 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002B28E0A0588>: 0.004
Test1 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002B28E0A0588>: 0.009
Test1 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002B28E0A0588>: 0.948
----------------------------------------------------------------------------
##########################  Scores of Test2 Data  ##########################
Test2 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002B28E0A0588>: 0.037
Test2 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002B28E0A0588>: 0.082
Test2 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002B28E0A0588>: -7.292
----------------------------------------------------------------------------
##########################  Details  ##########################
957.48s elapsed during modeling

# 2-  Best for without FS ---------------   : 0.956155 using {'activation': 'sigmoid', 'batch_size': 16, 'dropout_rate': 0.1, 'input_dim': 2048, 'learn_rate': 0.05, 'momentum': 0, 'optimizer': 'RMSprop'}

In [7]:
import random
import numpy as np
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error
import time

import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
# Function to create model, required for KerasClassifier


def create_model(optimizer='RMSprop', learn_rate=0.05, momentum=0, activation='sigmoid', dropout_rate=0.1):
    
    keras_model = Sequential()
    keras_model.add(Dense(128, input_dim=train_encoded.shape[1], activation=activation))
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(32, activation=activation)) 
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(8,activation=activation)) 
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(1,activation='linear'))
    keras_model.summary()
    # Compile model
    keras_model.compile(loss='mean_squared_error', optimizer=optimizer)

    return keras_model

In [8]:
# Early stopping
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='loss', patience=10, verbose=1)


# Model

#input_dim = 2048
model = KerasRegressor(build_fn=create_model, batch_size=16, epochs=300, callbacks=[early_stopping])


# Training
#np.random.seed(123)
modeling(train_encoded=train_encoded, test1_encoded=test1_encoded, test2_encoded=test2_encoded, model=model)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               262272    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                4128      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 8)                 264       
_________________________________________________________________
dropout_3 (Dropout)  

In [None]:
Epoch 64/300
12707/12707 [==============================] - 4s 336us/step - loss: 1.1113e-04
Epoch 00064: early stopping
##########################  Scores of Train Data  ##########################
Train set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000020E73CF5AC8>: 0.003
Train set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000020E73CF5AC8>: 0.007
Train set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000020E73CF5AC8>: 0.966
----------------------------------------------------------------------------
##########################  Scores of Test1 Data  ##########################
Test1 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000020E73CF5AC8>: 0.004
Test1 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000020E73CF5AC8>: 0.008
Test1 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000020E73CF5AC8>: 0.961
----------------------------------------------------------------------------
##########################  Scores of Test2 Data  ##########################
Test2 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000020E73CF5AC8>: 0.008
Test2 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000020E73CF5AC8>: 0.009
Test2 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000020E73CF5AC8>: 0.893
----------------------------------------------------------------------------
##########################  Details  ##########################
246.22s elapsed during modeling

-----------------------------------------------------------

# ---------------------------------------- FINAL MODEL ----------------------------------------

## After decreasing patience from 10 to 5...

In [9]:
import random
import numpy as np
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error
import time

import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
# Function to create model, required for KerasClassifier


def create_model(optimizer='RMSprop', learn_rate=0.05, momentum=0, activation='sigmoid', dropout_rate=0.1):
    
    keras_model = Sequential()
    keras_model.add(Dense(128, input_dim=train_encoded.shape[1], activation=activation))
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(32, activation=activation)) 
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(8,activation=activation)) 
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(1,activation='linear'))
    keras_model.summary()
    # Compile model
    keras_model.compile(loss='mean_squared_error', optimizer=optimizer)

    return keras_model

In [10]:
# Early stopping
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='loss', patience=5, verbose=1)


# Model

#input_dim = 2048
model = KerasRegressor(build_fn=create_model, batch_size=16, epochs=300, callbacks=[early_stopping])


# Training
#np.random.seed(123)
modeling(train_encoded=train_encoded, test1_encoded=test1_encoded, test2_encoded=test2_encoded, model=model)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               262272    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                4128      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 8)                 264       
_________________________________________________________________
dropout_3 (Dropout)  

In [None]:
---------------------------- Reproducible (Checked=4) ----------------------------

Epoch 22/300
12707/12707 [==============================] - 5s 373us/step - loss: 1.2642e-04
Epoch 23/300
12707/12707 [==============================] - 4s 346us/step - loss: 1.2615e-04
Epoch 00023: early stopping
##########################  Scores of Train Data  ##########################
Train set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023382BB9408>: 0.004
Train set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023382BB9408>: 0.008
Train set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023382BB9408>: 0.959
----------------------------------------------------------------------------
##########################  Scores of Test1 Data  ##########################
Test1 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023382BB9408>: 0.004
Test1 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023382BB9408>: 0.008
Test1 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023382BB9408>: 0.962
----------------------------------------------------------------------------
##########################  Scores of Test2 Data  ##########################
Test2 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023382BB9408>: 0.005
Test2 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023382BB9408>: 0.006
Test2 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x0000023382BB9408>: 0.952
----------------------------------------------------------------------------
##########################  Details  ##########################
110.66s elapsed during modeling

----------------------

# Saving Test 1 Predictions

In [13]:
test1_data.head(2)

Unnamed: 0,reaction_id,data_package_id,bond_type,functional_group_stoichiometry,reactant_smiles,product_smiles,reactant_inchiKey,product_inchiKey,reactant_solubility,product_solubility,...,product_single_point_job_id,reactant_optimization_job_id,product_optimization_job_id,UMAP-1,UMAP-2,data_type,reactantUFF,reactantMMFF,productUFF,productMMFF
2,3,1,OH,COOH,O=C1CC(=O)C(C(=O)O)=C1C(=O)O,C1C(O)=C(C(=O)O)C(=C1O)C(=O)O,QLGSJNWSAMBDMI-UHFFFAOYSA-N,AEVQXUUICHCAGT-UHFFFAOYSA-N,-0.966,-0.867,...,26.0,19.0,25.0,13.362195,3.405288,1,33.280029,-113.415199,37.053325,-2.716193
3,4,1,OH,F,O=C1CC(=O)C=C1F,C1C(O)=C(F)C=C1O,XVCHEZRSXGJYDT-UHFFFAOYSA-N,LLYKNQJBRVSOKM-UHFFFAOYSA-N,-0.524,-0.499,...,34.0,21.0,33.0,14.856011,2.316219,1,23.112465,-12.882731,24.103198,34.474933


In [15]:
pred_test1 = model.predict(test1_encoded.values)
keras_result_test1 = test1_data[["reaction_id", "reactant_smiles", "reaction_energy"]]
keras_result_test1["pred_test1"] = pred_test1
keras_result_test1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,reaction_id,reactant_smiles,reaction_energy,pred_test1
2,3,O=C1CC(=O)C(C(=O)O)=C1C(=O)O,-0.06394,-0.046921
3,4,O=C1CC(=O)C=C1F,-0.01642,-0.012994
13,14,O=C1NC(=O)C=C1,-0.00731,-0.006922
14,15,O=C1NC(=O)C(F)=C1F,-0.00118,0.014583
24,25,O=C1SC(=O)C(C(=O)O)=C1C(=O)O,-0.06775,-0.057425
...,...,...,...,...
15759,15897,O=S(=O)(O)c(c1)cc(S(=O)(=O)O)c(c12)c(O)n(c2O)N...,-0.00215,-0.002250
15762,15900,c1cccc(c12)c(O)n(c2O)N(C3=O)C(=O)c(c34)c(S(=O)...,0.02162,0.004420
15768,15906,O=S(=O)(O)c(c1)c(S(=O)(=O)O)c(S(=O)(=O)O)c(c12...,-0.01803,-0.017114
15788,15927,O=S(=O)(O)c1ccc(S(=O)(=O)O)c(c12)c(O)n(c2O)N(C...,-0.03881,-0.014094


In [16]:
pred_test1

array([-0.04692131, -0.01299443, -0.00692186, ..., -0.01711423,
       -0.01409437, -0.0244816 ], dtype=float32)

In [17]:
# saving the dataframe
keras_result_test1.to_csv(r'.\final_models\keras_result_test1.csv', index=False)

# Saving Test 2 Predictions

In [20]:
test2_data.head(2)

Unnamed: 0,reaction_id,data_package_id,bond_type,functional_group_stoichiometry,reactant_smiles,product_smiles,reactant_inchiKey,product_inchiKey,reactant_solubility,product_solubility,...,product_single_point_job_id,reactant_optimization_job_id,product_optimization_job_id,UMAP-1,UMAP-2,data_type,reactantUFF,reactantMMFF,productUFF,productMMFF
767,769,25,NH,OH,c1cccc(c12)cc(nn2)O,c1cccc(c12)C=C(O)NN2,CXUGAWWYKSOLEL-UHFFFAOYSA-N,BMBWSQWNXPSELG-UHFFFAOYSA-N,-2.08,-2.41,...,2965.0,2369.0,2964.0,21.961576,-2.257458,2,20.955169,18.34382,17.140645,19.53073
772,774,25,NH,OH,Oc1cccc(c12)cc(nn2)O,Oc1cccc(c12)C=C(O)NN2,BSAMMELAAJYOKU-UHFFFAOYSA-N,QXPSFNRPQZDOFI-UHFFFAOYSA-N,-2.173,-2.142,...,3339.0,2335.0,3338.0,22.153984,-2.141872,2,22.612127,27.933289,20.633665,23.125357


In [22]:
pred_test2 = model.predict(test2_encoded.values)
keras_result_test2 = test2_data[["reaction_id", "reactant_smiles", "reaction_energy"]]
keras_result_test2["pred_test2"] = pred_test2
keras_result_test2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,reaction_id,reactant_smiles,reaction_energy,pred_test2
767,769,c1cccc(c12)cc(nn2)O,-0.01916,-0.017922
772,774,Oc1cccc(c12)cc(nn2)O,-0.01587,-0.013521
776,778,c1c(O)ccc(c12)cc(nn2)O,-0.01793,-0.013437
779,781,c1cc(O)cc(c12)cc(nn2)O,-0.01559,-0.013862
781,783,c1ccc(O)c(c12)cc(nn2)O,-0.01734,-0.014143
...,...,...,...,...
14233,14338,O=C(O)c1cc(C(=O)O)c(C(=O)O)c(c12)S/C(C2=O)=C(C...,-0.06081,-0.050426
14234,14339,O=C(O)c1c(C(=O)O)cc(C(=O)O)c(c12)S/C(C2=O)=C(C...,-0.04998,-0.048198
14235,14340,O=C(O)c1c(C(=O)O)cc(C(=O)O)c(c12)S/C(C2=O)=C(C...,-0.05763,-0.045113
14236,14341,O=C(O)c1c(C(=O)O)c(C(=O)O)cc(c12)S/C(C2=O)=C(C...,-0.05533,-0.042310


In [23]:
pred_test2

array([-0.01792245, -0.01352063, -0.01343709, ..., -0.04511323,
       -0.04231039, -0.04532441], dtype=float32)

In [24]:
# saving the dataframe
keras_result_test2.to_csv(r'.\final_models\keras_result_test2.csv', index=False)

# Save the model

In [26]:
model

<keras.wrappers.scikit_learn.KerasRegressor at 0x1b5e3cf5bc8>

In [27]:
pred_test1 = model.predict(test1_encoded.values)
pred_test1

array([-0.04692131, -0.01299443, -0.00692186, ..., -0.01711423,
       -0.01409437, -0.0244816 ], dtype=float32)

In [28]:
import pickle
# save
pickle.dump(model, open(r'.\final_models\keras_final_model.txt', "wb"))

#### Read Saved model 

In [30]:
# load
keras_final_model = pickle.load(open(r'.\final_models\keras_final_model.txt', "rb"))
keras_final_model

<keras.wrappers.scikit_learn.KerasRegressor at 0x1b59b0eb308>

In [32]:
s_pred_test1 = keras_final_model.predict(test1_encoded.values)
s_pred_test1

array([-0.04692131, -0.01299443, -0.00692186, ..., -0.01711423,
       -0.01409437, -0.0244816 ], dtype=float32)

## After deleting last dropout layer...  DIDN'T WORK!

In [7]:
import random
import numpy as np
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.metrics import mean_squared_error
import time

import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
# Function to create model, required for KerasClassifier


def create_model(optimizer='RMSprop', learn_rate=0.05, momentum=0, activation='sigmoid', dropout_rate=0.1):
    
    keras_model = Sequential()
    keras_model.add(Dense(128, input_dim=train_encoded.shape[1], activation=activation))
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(32, activation=activation)) 
    keras_model.add(Dropout(dropout_rate))
    keras_model.add(Dense(8,activation=activation)) 
    keras_model.add(Dense(1,activation='linear'))
    keras_model.summary()
    # Compile model
    keras_model.compile(loss='mean_squared_error', optimizer=optimizer)

    return keras_model

In [8]:
# Early stopping
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='loss', patience=5, verbose=1)


# Model

#input_dim = 2048
model = KerasRegressor(build_fn=create_model, batch_size=16, epochs=300, callbacks=[early_stopping])


# Training
#np.random.seed(123)
modeling(train_encoded=train_encoded, test1_encoded=test1_encoded, test2_encoded=test2_encoded, model=model)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               262272    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                4128      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 8)                 264       
_________________________________________________________________
dense_4 (Dense)      

In [None]:
Epoch 70/300
12707/12707 [==============================] - 4s 336us/step - loss: 8.1456e-05
Epoch 00070: early stopping
##########################  Scores of Train Data  ##########################
Train set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002004C7672C8>: 0.008
Train set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002004C7672C8>: 0.010
Train set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002004C7672C8>: 0.931
----------------------------------------------------------------------------
##########################  Scores of Test1 Data  ##########################
Test1 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002004C7672C8>: 0.008
Test1 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002004C7672C8>: 0.011
Test1 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002004C7672C8>: 0.928
----------------------------------------------------------------------------
##########################  Scores of Test2 Data  ##########################
Test2 set MAE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002004C7672C8>: 0.008
Test2 set RMSE of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002004C7672C8>: 0.009
Test2 set R2 Score of <keras.wrappers.scikit_learn.KerasRegressor object at 0x000002004C7672C8>: 0.888
----------------------------------------------------------------------------
##########################  Details  ##########################
297.61s elapsed during modeling