In [38]:
import numpy as np
from sklearn.model_selection import train_test_split
import sys
import os
from catGP import preprocess_data, OMGP

In [42]:
def get_data(A):
    data = np.load('../Fingerprint_Generation/{}_data.npy'.format(A))[()]
    y = np.array([i[-2] for i in data])
    X = np.vstack([np.hstack([i[1].reshape(-1), 
                              i[3].reshape(-1), 
                              i[4].reshape(-1), 
                              i[8].reshape(-1)]) for i in data])
    
    return X, y

In [43]:
adsorbates = ['CH', 'CH2', 'CH3', 'OH', 'NH', 'SH']

for ads in adsorbates:
    print('Working on adsorbates: {}'.format(ads))
    X, y = get_data(ads)
    data = preprocess_data(X, y)
    data.clean_data()
    X, y = data.get_data()

    kernel_recipe = {'ConstantKernel' : [{'RBF' : [1.0,
                                                   {'length_scale' : 1.0}]},
                                      {'constant_value' : 1.0,
                                       'constant_value_bounds' : (3e-7, 3e7)}],
                     'WhiteKernel' : {'noise_level' : 0.1,
                                      'noise_level_bounds' : (1e-5, 1e5)}}

    r_state = [10, 20, 42, 80, 150, 200, 300, 400]
    #r_state = [42]
    r_data = {rs : {} for rs in r_state}
    for rs in r_state:
        print('    Working on random state: {}'.format(rs))
        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                            train_size=0.80, 
                                                            random_state=rs)

        MLGP = OMGP(X_train=X_train,
                    X_test=X_test,
                    y_train=y_train,
                    y_test=y_test,
                    kernel_recipe=kernel_recipe)

        MLGP.run_GP()
        r_data[rs] = MLGP.__dict__


    if not os.path.exists('run_{}'.format(ads)):
        os.mkdir('run_{}'.format(ads))

    np.save('run_{0}/{0}_rsdata.npy'.format(ads), r_data)
    #LC = MLGP.plot_learning_curve()
    #LC.savefig('run_{0}/{0}_learning_curve.png'.format(ads))
    PP = MLGP.parity_plot(data='train')
    PP.savefig('run_{0}/{0}_parity_plot_train.png'.format(ads))
    PP = MLGP.parity_plot(data='test')
    PP.savefig('run_{0}/{0}_parity_plot_test.png'.format(ads))
    PP = MLGP.parity_plot(data='train', err_bar=True)
    PP.savefig('run_{0}/{0}_parity_plot_train_err_bar.png'.format(ads))
    PP = MLGP.parity_plot(data='test', err_bar=True)
    PP.savefig('run_{0}/{0}_parity_plot_test_err_bar.png'.format(ads))
    #PP = MLGP.parity_plot_fancy(data='train')
    #PP.savefig('run_{0}/{0}_parity_plot_train.png'.format(ads))
    #PP = MLGP.parity_plot_fancy(data='test')
    #PP.savefig('run_{0}/{0}_parity_plot_test.png'.format(ads))

Working on adsorbates: CH
    Working on random state: 10




    Working on random state: 20




    Working on random state: 42




    Working on random state: 80




    Working on random state: 150




    Working on random state: 200




    Working on random state: 300




    Working on random state: 400




Working on adsorbates: CH2
    Working on random state: 10




    Working on random state: 20




    Working on random state: 42




    Working on random state: 80




    Working on random state: 150




    Working on random state: 200




    Working on random state: 300




    Working on random state: 400




Working on adsorbates: CH3
    Working on random state: 10




    Working on random state: 20




    Working on random state: 42




    Working on random state: 80




    Working on random state: 150




    Working on random state: 200




    Working on random state: 300




    Working on random state: 400




Working on adsorbates: OH
    Working on random state: 10




    Working on random state: 20




    Working on random state: 42




    Working on random state: 80




    Working on random state: 150




    Working on random state: 200




    Working on random state: 300




    Working on random state: 400




Working on adsorbates: NH
    Working on random state: 10




    Working on random state: 20




    Working on random state: 42




    Working on random state: 80




    Working on random state: 150




    Working on random state: 200




    Working on random state: 300




    Working on random state: 400




Working on adsorbates: SH
    Working on random state: 10




    Working on random state: 20




    Working on random state: 42




    Working on random state: 80




    Working on random state: 150




    Working on random state: 200




    Working on random state: 300




    Working on random state: 400




In [46]:


for ads in adsorbates:
    data = np.load('run_{0}/{0}_rsdata.npy'.format(ads))[()]
    MAE_train = np.array([data[k]['MAE_train'] for k in data])
    MAE_test = np.array([data[k]['MAE_test'] for k in data])
    with open(di + '/out.txt', 'w') as f:
        print('\nData for adsorbate: {}'.format(ads))
        f.write('MAE_train: {}\n'.format(MAE_train))
        f.write('MAE_train mean: {}\n'.format(MAE_train.mean()))
        f.write('MAE_train std: {}\n'.format(MAE_train.std()))
        f.write('MAE_test: {}\n'.format(MAE_test))
        f.write('MAE_test mean: {}\n'.format(MAE_test.mean()))
        f.write('MAE_test std: {}\n'.format(MAE_test.std()))
        print('MAE_train: {}'.format(MAE_train))
        print('MAE_train mean: {}'.format(MAE_train.mean()))
        print('MAE_train std: {}'.format(MAE_train.std()))
        print('MAE_test: {}'.format(MAE_test))
        print('MAE_test mean: {}'.format(MAE_test.mean()))
        print('MAE_test std: {}\n'.format(MAE_test.std()))


Data for adsorbate: CH
MAE_train: [0.10895787 0.1113771  0.10593257 0.10887003 0.11022488 0.11417225
 0.10699193 0.10782629]
MAE_train mean: 0.10929411554939178
MAE_train std: 0.0024507344823321165
MAE_test: [0.16906475 0.163243   0.20090112 0.17187983 0.1760206  0.19082332
 0.18460066 0.17758324]
MAE_test mean: 0.179264566802645
MAE_test std: 0.011504512656409617


Data for adsorbate: CH2
MAE_train: [0.22571339 0.20871988 0.19241294 0.21573275 0.21213851 0.21183645
 0.21147432 0.21398787]
MAE_train mean: 0.21150201384751857
MAE_train std: 0.008650815010030434
MAE_test: [0.24619599 0.28098827 0.33465098 0.25555758 0.28139115 0.24658004
 0.26673067 0.26838938]
MAE_test mean: 0.27256050839123136
MAE_test std: 0.02672730664976149


Data for adsorbate: CH3
MAE_train: [0.14928394 0.14766738 0.1479706  0.15073235 0.14836108 0.15313691
 0.14615568 0.15087065]
MAE_train mean: 0.14927232340683716
MAE_train std: 0.002074717721109756
MAE_test: [0.1831361  0.17548599 0.18398305 0.16906808 0.17811

In [None]:
'''data_format = [metadata, slab_d0, slab_d1, bimetal_fp, 
                  ads_d0_unscaled, ads_d1_unscaled, ads_d2_unscaled, SS_unscaled,
                  ads_d0_scaled, ads_d1_scaled, ads_d2_scaled, SS_scaled,
                  energy, energy_A]'''