In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import sys
import os
from catGP import preprocess_data, OMGP
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
'''data_format = [metadata, slab_d0, slab_d1, bimetal_fp, 
                  ads_d0_unscaled, ads_d1_unscaled, ads_d2_unscaled, SS_unscaled,
                  ads_d0_scaled, ads_d1_scaled, ads_d2_scaled, SS_scaled,
                  energy, energy_A]'''

def get_data(A):
    data = np.load('../Fingerprint_Generation/{}_data.npy'.format(A))[()]
    y = np.array([i[-2] for i in data])
    X = np.vstack([np.hstack([i[1].reshape(-1), 
                              i[2].reshape(-1),
                              i[3].reshape(-1), 
                              i[4].reshape(-1), 
                              i[5].reshape(-1),
                              i[6].reshape(-1),
                              i[7].reshape(-1),
                              i[8].reshape(-1),
                              i[9].reshape(-1),
                              i[10].reshape(-1),
                              i[11].reshape(-1)]) for i in data])
    
    return X, y

In [3]:
adsorbates = ['CH', 'CH2', 'CH3', 'OH', 'NH', 'SH']

for ads in adsorbates:
    print('Working on adsorbates: {}'.format(ads))
    X, y = get_data(ads)
    data = preprocess_data(X, y)
    data.clean_data()
    X, y = data.get_data()

    kernel_recipe = {'ConstantKernel' : [{'RBF' : [1.0,
                                                   {'length_scale' : 1.0}]},
                                      {'constant_value' : 1.0,
                                       'constant_value_bounds' : (3e-7, 3e7)}],
                     'WhiteKernel' : {'noise_level' : 0.1,
                                      'noise_level_bounds' : (1e-5, 1e5)}}

    r_state = [10, 20, 42, 80, 150]
    #r_state = [42]
    r_data = {rs : {} for rs in r_state}
    for rs in r_state:
        print('    Working on random state: {}'.format(rs))
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                            train_size=0.80, 
                                                            random_state=rs)

        MLGP = OMGP(X_train=X_train,
                    X_test=X_test,
                    y_train=y_train,
                    y_test=y_test,
                    kernel_recipe=kernel_recipe)

        MLGP.run_GP()
        r_data[rs] = MLGP.__dict__


    if not os.path.exists('run_{}'.format(ads)):
        os.mkdir('run_{}'.format(ads))

    np.save('run_{0}/{0}_rsdata.npy'.format(ads), r_data)
    #LC = MLGP.plot_learning_curve()
    #LC.savefig('run_{0}/{0}_learning_curve.png'.format(ads))
    PP = MLGP.parity_plot(data='train')
    PP.savefig('run_{0}/{0}_parity_plot_train.png'.format(ads))
    PP = MLGP.parity_plot(data='test')
    PP.savefig('run_{0}/{0}_parity_plot_test.png'.format(ads))
    PP = MLGP.parity_plot(data='train', err_bar=True)
    PP.savefig('run_{0}/{0}_parity_plot_train_err_bar.png'.format(ads))
    PP = MLGP.parity_plot(data='test', err_bar=True)
    PP.savefig('run_{0}/{0}_parity_plot_test_err_bar.png'.format(ads))
    #PP = MLGP.parity_plot_fancy(data='train')
    #PP.savefig('run_{0}/{0}_parity_plot_train.png'.format(ads))
    #PP = MLGP.parity_plot_fancy(data='test')
    #PP.savefig('run_{0}/{0}_parity_plot_test.png'.format(ads))

Working on adsorbates: CH
    Working on random state: 10




    Working on random state: 20
    Working on random state: 42
    Working on random state: 80
    Working on random state: 150
Working on adsorbates: CH2
    Working on random state: 10




    Working on random state: 20
    Working on random state: 42




    Working on random state: 80
    Working on random state: 150
Working on adsorbates: CH3
    Working on random state: 10




    Working on random state: 20




    Working on random state: 42
    Working on random state: 80
    Working on random state: 150
Working on adsorbates: OH
    Working on random state: 10




    Working on random state: 20
    Working on random state: 42
    Working on random state: 80
    Working on random state: 150
Working on adsorbates: NH
    Working on random state: 10




    Working on random state: 20
    Working on random state: 42
    Working on random state: 80
    Working on random state: 150
Working on adsorbates: SH
    Working on random state: 10




    Working on random state: 20
    Working on random state: 42
    Working on random state: 80
    Working on random state: 150




In [4]:
for ads in adsorbates:
    data = np.load('run_{0}/{0}_rsdata.npy'.format(ads))[()]
    MAE_train = np.array([data[k]['MAE_train'] for k in data])
    MAE_test = np.array([data[k]['MAE_test'] for k in data])
    with open('run_{}/out.txt'.format(ads), 'w') as f:
        print('\nData for adsorbate: {}'.format(ads))
        f.write('MAE_train: {}\n'.format(MAE_train))
        f.write('MAE_train mean: {}\n'.format(MAE_train.mean()))
        f.write('MAE_train std: {}\n'.format(MAE_train.std()))
        f.write('MAE_test: {}\n'.format(MAE_test))
        f.write('MAE_test mean: {}\n'.format(MAE_test.mean()))
        f.write('MAE_test std: {}\n'.format(MAE_test.std()))
        print('MAE_train: {}'.format(MAE_train))
        print('MAE_train mean: {}'.format(MAE_train.mean()))
        print('MAE_train std: {}'.format(MAE_train.std()))
        print('MAE_test: {}'.format(MAE_test))
        print('MAE_test mean: {}'.format(MAE_test.mean()))
        print('MAE_test std: {}\n'.format(MAE_test.std()))


Data for adsorbate: CH
MAE_train: [0.09092076 0.08252839 0.07970769 0.14077675 0.08618118]
MAE_train mean: 0.09602295218684773
MAE_train std: 0.022689583653977923
MAE_test: [0.15085668 0.15234394 0.17814903 0.19582582 0.16752521]
MAE_test mean: 0.1689401370975302
MAE_test std: 0.016805303617000455


Data for adsorbate: CH2
MAE_train: [0.20183635 0.19161006 0.18023837 0.19887533 0.19669184]
MAE_train mean: 0.19385039123857767
MAE_train std: 0.00758163270441049
MAE_test: [0.23457951 0.26127413 0.30853878 0.25162345 0.26835227]
MAE_test mean: 0.26487362804419723
MAE_test std: 0.024596762104174934


Data for adsorbate: CH3
MAE_train: [0.1273461  0.12655671 0.12626353 0.12976871 0.12716148]
MAE_train mean: 0.1274193057359264
MAE_train std: 0.0012386633583168164
MAE_test: [0.17981968 0.16602584 0.16971316 0.15850209 0.16896808]
MAE_test mean: 0.16860576877520256
MAE_test std: 0.006868038514320154


Data for adsorbate: OH
MAE_train: [0.09444187 0.0967569  0.10086945 0.10891122 0.1023837 ]
MA