In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn as sk
import ase
import ase.io as aio
import qml
import time
from sklearn.model_selection import train_test_split
import gc
gc.collect()
import sklearn.ensemble

import sys
sys.path.append('../')
import mlkrr 
import matplotlib as mpl

import os
import time
import pickle
%config Completer.use_jedi = False
%matplotlib notebook

Make sure you have run `src/generate_data.py` to have the FCHL representations of QM9.

In [67]:
reps = np.load('../data/fchls_glob_qm9.npy', allow_pickle=True)

Load the target values

In [13]:
target = data.u0.values #in Ha

In [14]:
target -= target.mean()
target /= target.std()

Load the trained model (see an example on how to train the model separately)

In [15]:
mlkr_model = np.load('../models/trained_mlkr.npy', allow_pickle=True).item()
mlkrr_model = np.load('../models/trained_mlkrr.npy', allow_pickle=True).item()

In [16]:
train_rmse = mlkrr_model.train_rmses
test_rmse = mlkrr_model.test_rmses

train_mae = mlkrr_model.train_maes
test_mae = mlkrr_model.test_maes

In [17]:
train_rmse_mlkr = mlkr_model.train_rmses
test_rmse_mlkr = mlkr_model.test_rmses

train_mae_mlkr = mlkr_model.train_maes
test_mae_mlkr = mlkr_model.test_maes

In [18]:
fig, axes = plt.subplots(figsize=[8,3], ncols=2)
ax = axes[0]

ax.plot([], label='MLKR', c='black', linestyle='--')
ax.plot(train_mae_mlkr, c='C0', linestyle='--')
ax.plot(test_mae_mlkr, label='Test MLKR', c='C1', linestyle='--')
ax.plot([0, len(train_rmse_mlkr)], [test_mae_mlkr[0], test_mae_mlkr[0]], 
        color='black', label='NW start', linestyle='--')


ax.plot(train_mae, label='Train MLKRR', c='C0', linestyle='-')
ax.plot(test_mae, label='Test MLKRR', c='C1', linestyle='-')
ax.plot([0, len(train_rmse)], [test_mae[0], test_mae[0]], color='black', label='KRR start', linestyle='-')

ax.set_xlabel('Optimization steps')
ax.set_ylabel('MAE [kcal / mol]')
# ax.legend(loc='upper right', framealpha=1)
ax.set_ylim(0.03, 0.4)

ax = axes[1]
ax.plot(train_mae_mlkr, c='C0', linestyle='--')

ax.plot(train_mae, label='Train MLKRR', c='C0', linestyle='-')
ax.plot(test_mae, label='Test MLKRR', c='C1', linestyle='-')
ax.plot([0, len(train_rmse)], [test_mae[0], test_mae[0]], color='black', label='KRR start', linestyle='--')
ax.set_xlabel('Optimization steps')
# ax.set_ylabel('MAE [kcal / mol]')
# ax.legend(loc='upper right', framealpha=1)
ax.set_ylim(0.03, 0.1)
plt.tight_layout()
plt.savefig('../figures/optimizations.pdf')

plt.show()

<IPython.core.display.Javascript object>

In [19]:
reps_mlkrr = reps @ mlkrr_model.A.T

In [20]:
reps_mlkr = reps @ mlkr_model.A.T

In [21]:
train_size = 10000
test_size = 2000

In [22]:
indices_train, indices_test = sk.model_selection.train_test_split(
    np.arange(len(data)), train_size=train_size, test_size=test_size, random_state=0)

# LC original

In [27]:
# KRR training with original data

# Training a KRR model involves optimizing the hyperparameter sigma
params = np.linspace(10,100, 5)

predictions_test = []
scores_test = []

regg = 1e-9

maes = []
rmses = []

for param in params:
    sigma=param
    
    kernel_constant = 1 / (1 * np.sqrt(2 * np.pi) * sigma)
    exponent_constant = 1 / (1 * sigma ** 2)
        
    K = kernel_constant * np.exp(
            - exponent_constant * dmtrtr ** 2 )

    n1 = len(K)
    J = np.linalg.inv(K + regg * np.eye(n1))
    alpha_vec = J @ target[indices_train]
    
    # test predictions    
    Ks = kernel_constant * np.exp(
            - exponent_constant * dmtrts ** 2 )
    
    Y_predicted = np.dot(Ks, alpha_vec)

    mae = np.mean(np.abs(Y_predicted - target[indices_test]))
    rmse = np.sqrt(np.mean((Y_predicted - target[indices_test]) ** 2))
    scores_test.append(mae)
    predictions_test.append(Y_predicted)

    maes.append(mae)
    rmses.append(rmse)
    
    print('=========================')
    print('SIGMA: ', param)
    print('MAE: ', np.round(mae, 3))
    print('RMSE: ', np.round(rmse, 3))
    print('=========================')

best_indx = np.argmin(scores_test)
print(best_indx)
scale_factor = 1
predictions_test_original = predictions_test[best_indx]

SIGMA:  10.0
MAE:  0.125
RMSE:  0.189
SIGMA:  32.5
MAE:  0.088
RMSE:  0.132
SIGMA:  55.0
MAE:  0.079
RMSE:  0.119
SIGMA:  77.5
MAE:  0.084
RMSE:  0.124
SIGMA:  100.0
MAE:  0.091
RMSE:  0.132
2


In [28]:
# KRR training with original data

# Training a KRR model involves optimizing the hyperparameter sigma\

tsizes = [100, 300, 1000, 3000]

lc_mae = []
lc_rmse = []

for tsize in tsizes:
    print('++++++++++++++++++++++++++++++++++++')
    print('++++++++++++++++++++++++++++++++++++')
    print('tsize: {}'.format(tsize))
    print('')

    params = np.linspace(10,600, 20)

    predictions_test = []
    scores_test = []

    regg = 1e-9
    tmaes = []
    trmses = []

    indices = np.arange(tsize)
    
    lc_dmtrtr = dmtrtr[indices, :][:, indices]
    lc_tdmtrts = dmtrts[:, indices]
    
    
    for param in params:
        sigma=param

        kernel_constant = 1 / (1 * np.sqrt(2 * np.pi) * sigma)
        exponent_constant = 1 / (1 * sigma ** 2)

        K = kernel_constant * np.exp(
                - exponent_constant * lc_dmtrtr ** 2 )

        n1 = len(K)
        J = np.linalg.inv(K + regg * np.eye(n1))
        alpha_vec = J @ target[indices_train][indices]

        # test predictions    
        Ks = kernel_constant * np.exp(
                - exponent_constant * lc_tdmtrts ** 2 )

        Y_predicted = np.dot(Ks, alpha_vec)

        mae = np.mean(np.abs(Y_predicted - target[indices_test]))
        rmse = np.sqrt(np.mean((Y_predicted - target[indices_test]) ** 2))
        
        tmaes.append(mae)
        trmses.append(rmse)
        
        scores_test.append(rmse)
        predictions_test.append(Y_predicted)

        print('=========================')
        print('SIGMA: ', param)
        print('MAE: ', np.round(mae, 3))
        print('RMSE: ', np.round(rmse, 3))
        print('=========================')

    best_indx = np.argmin(scores_test)
    
    lc_mae.append(np.min(tmaes))
    lc_rmse.append(np.min(trmses))
    
    print(best_indx)
    scale_factor = 1

++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++
tsize: 100

SIGMA:  10.0
MAE:  0.634
RMSE:  0.833
SIGMA:  41.05263157894737
MAE:  0.481
RMSE:  0.637
SIGMA:  72.10526315789474
MAE:  0.478
RMSE:  0.643
SIGMA:  103.15789473684211
MAE:  0.479
RMSE:  0.648
SIGMA:  134.21052631578948
MAE:  0.476
RMSE:  0.645
SIGMA:  165.26315789473685
MAE:  0.469
RMSE:  0.637
SIGMA:  196.31578947368422
MAE:  0.459
RMSE:  0.624
SIGMA:  227.3684210526316
MAE:  0.447
RMSE:  0.608
SIGMA:  258.42105263157896
MAE:  0.435
RMSE:  0.592
SIGMA:  289.47368421052636
MAE:  0.424
RMSE:  0.578
SIGMA:  320.5263157894737
MAE:  0.416
RMSE:  0.565
SIGMA:  351.57894736842104
MAE:  0.409
RMSE:  0.554
SIGMA:  382.63157894736844
MAE:  0.403
RMSE:  0.546
SIGMA:  413.68421052631584
MAE:  0.399
RMSE:  0.539
SIGMA:  444.7368421052632
MAE:  0.395
RMSE:  0.534
SIGMA:  475.7894736842105
MAE:  0.393
RMSE:  0.53
SIGMA:  506.8421052631579
MAE:  0.391
RMSE:  0.527
SIGMA:  537.8947368421053
MAE:  0.39
RMSE:  0.525
SI

SIGMA:  537.8947368421053
MAE:  0.193
RMSE:  0.275
SIGMA:  568.9473684210527
MAE:  0.197
RMSE:  0.279
SIGMA:  600.0
MAE:  0.2
RMSE:  0.284
2


# LC MLKR

In [29]:
# KRR training with mlkr data

# Training a KRR model involves optimizing the hyperparameter sigma
params = np.linspace(20,140, 5)

predictions_test = []
scores_test = []

regg = 1e-9

maes_mlkr = []
rmses_mlkr = []

for param in params:
    sigma=param
    
    kernel_constant = 1 / (1 * np.sqrt(2 * np.pi) * sigma)
    exponent_constant = 1 / (1 * sigma ** 2)
        
    K = kernel_constant * np.exp(
            - exponent_constant * dmtrtr_mlkr ** 2 )

    n1 = len(K)
    J = np.linalg.inv(K + regg * np.eye(n1))
    alpha_vec = J @ target[indices_train]
    
    # test predictions    
    Ks = kernel_constant * np.exp(
            - exponent_constant * dmtrts_mlkr ** 2 )
    
    Y_predicted = np.dot(Ks, alpha_vec)

    mae = np.mean(np.abs(Y_predicted - target[indices_test]))
    rmse = np.sqrt(np.mean((Y_predicted - target[indices_test]) ** 2))
    scores_test.append(mae)
    predictions_test.append(Y_predicted)

    maes_mlkr.append(mae)
    rmses_mlkr.append(rmse)
    
    print('=========================')
    print('SIGMA: ', param)
    print('MAE: ', np.round(mae, 3))
    print('RMSE: ', np.round(rmse, 3))
    print('=========================')

best_indx = np.argmin(scores_test)
print(best_indx)
scale_factor = 1
predictions_test_mlkr = predictions_test[best_indx]

SIGMA:  20.0
MAE:  0.153
RMSE:  0.224
SIGMA:  50.0
MAE:  0.108
RMSE:  0.166
SIGMA:  80.0
MAE:  0.101
RMSE:  0.149
SIGMA:  110.0
MAE:  0.102
RMSE:  0.15
SIGMA:  140.0
MAE:  0.108
RMSE:  0.157
2


In [30]:
# KRR training with original data

# Training a KRR model involves optimizing the hyperparameter sigma\

tsizes = [100, 300, 1000, 3000]

lc_mae_mlkr = []
lc_rmse_mlkr = []

for tsize in tsizes:
    print('++++++++++++++++++++++++++++++++++++')
    print('++++++++++++++++++++++++++++++++++++')
    print('tsize: {}'.format(tsize))
    print('')

    params = np.linspace(10,100, 20)

    predictions_test = []
    scores_test = []

    regg = 1e-9
    tmaes = []
    trmses = []

    indices = np.arange(tsize)
    
    lc_dmtrtr_mlkr = dmtrtr_mlkr[indices, :][:, indices]
    lc_tdmtrts_mlkr = dmtrts_mlkr[:, indices]
    
    
    for param in params:
        sigma=param

        kernel_constant = 1 / (1 * np.sqrt(2 * np.pi) * sigma)
        exponent_constant = 1 / (1 * sigma ** 2)

        K = kernel_constant * np.exp(
                - exponent_constant * lc_dmtrtr_mlkr ** 2 )

        n1 = len(K)
        J = np.linalg.inv(K + regg * np.eye(n1))
        alpha_vec = J @ target[indices_train][indices]

        # test predictions    
        Ks = kernel_constant * np.exp(
                - exponent_constant * lc_tdmtrts_mlkr ** 2 )

        Y_predicted = np.dot(Ks, alpha_vec)

        mae = np.mean(np.abs(Y_predicted - target[indices_test]))
        rmse = np.sqrt(np.mean((Y_predicted - target[indices_test]) ** 2))
        
        tmaes.append(mae)
        trmses.append(rmse)
        
        scores_test.append(rmse)
        predictions_test.append(Y_predicted)

        print('=========================')
        print('SIGMA: ', param)
        print('MAE: ', np.round(mae, 3))
        print('RMSE: ', np.round(rmse, 3))
        print('=========================')

    best_indx = np.argmin(scores_test)
    
    lc_mae_mlkr.append(np.min(tmaes))
    lc_rmse_mlkr.append(np.min(trmses))

++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++
tsize: 100

SIGMA:  10.0
MAE:  0.481
RMSE:  0.687
SIGMA:  14.736842105263158
MAE:  0.439
RMSE:  0.602
SIGMA:  19.473684210526315
MAE:  0.397
RMSE:  0.535
SIGMA:  24.210526315789473
MAE:  0.371
RMSE:  0.498
SIGMA:  28.94736842105263
MAE:  0.362
RMSE:  0.486
SIGMA:  33.68421052631579
MAE:  0.36
RMSE:  0.483
SIGMA:  38.421052631578945
MAE:  0.36
RMSE:  0.484
SIGMA:  43.1578947368421
MAE:  0.361
RMSE:  0.488
SIGMA:  47.89473684210526
MAE:  0.364
RMSE:  0.494
SIGMA:  52.63157894736842
MAE:  0.368
RMSE:  0.501
SIGMA:  57.368421052631575
MAE:  0.372
RMSE:  0.507
SIGMA:  62.10526315789473
MAE:  0.375
RMSE:  0.513
SIGMA:  66.84210526315789
MAE:  0.378
RMSE:  0.517
SIGMA:  71.57894736842104
MAE:  0.38
RMSE:  0.52
SIGMA:  76.3157894736842
MAE:  0.382
RMSE:  0.522
SIGMA:  81.05263157894737
MAE:  0.383
RMSE:  0.523
SIGMA:  85.78947368421052
MAE:  0.384
RMSE:  0.524
SIGMA:  90.52631578947367
MAE:  0.385
RMSE:  0.524
SIGMA:  95

SIGMA:  95.26315789473684
MAE:  0.136
RMSE:  0.199
SIGMA:  100.0
MAE:  0.135
RMSE:  0.198


# LC MLKRR

In [31]:
# KRR training with mlkrr data

# Training a KRR model involves optimizing the hyperparameter sigma
params = np.linspace(30,100, 5)

predictions_test = []
scores_test = []

regg = 1e-9

maes_mlkrr = []
rmses_mlkrr = []

for param in params:
    sigma=param
    
    kernel_constant = 1 / (1 * np.sqrt(2 * np.pi) * sigma)
    exponent_constant = 1 / (1 * sigma ** 2)
        
    K = kernel_constant * np.exp(
            - exponent_constant * dmtrtr_mlkrr ** 2 )

    n1 = len(K)
    J = np.linalg.inv(K + regg * np.eye(n1))
    alpha_vec = J @ target[indices_train]
    
    # test predictions    
    Ks = kernel_constant * np.exp(
            - exponent_constant * dmtrts_mlkrr ** 2 )
    
    Y_predicted = np.dot(Ks, alpha_vec)

    mae = np.mean(np.abs(Y_predicted - target[indices_test]))
    rmse = np.sqrt(np.mean((Y_predicted - target[indices_test]) ** 2))
    scores_test.append(mae)
    predictions_test.append(Y_predicted)

    maes_mlkrr.append(mae)
    rmses_mlkrr.append(rmse)
    
    print('=========================')
    print('SIGMA: ', param)
    print('MAE: ', np.round(mae, 3))
    print('RMSE: ', np.round(rmse, 3))
    print('=========================')

best_indx = np.argmin(scores_test)
print(best_indx)
scale_factor = 1
predictions_test_mlkrr = predictions_test[best_indx]

SIGMA:  30.0
MAE:  0.059
RMSE:  0.101
SIGMA:  47.5
MAE:  0.054
RMSE:  0.095
SIGMA:  65.0
MAE:  0.054
RMSE:  0.093
SIGMA:  82.5
MAE:  0.058
RMSE:  0.096
SIGMA:  100.0
MAE:  0.064
RMSE:  0.102
2


In [32]:
# KRR training with original data

# Training a KRR model involves optimizing the hyperparameter sigma\

tsizes = [100, 300, 1000, 3000]

lc_mae_mlkrr = []
lc_rmse_mlkrr = []

for tsize in tsizes:
    print('++++++++++++++++++++++++++++++++++++')
    print('++++++++++++++++++++++++++++++++++++')
    print('tsize: {}'.format(tsize))
    print('')

    params = np.linspace(10,100, 20)

    predictions_test = []
    scores_test = []

    regg = 1e-9
    tmaes = []
    trmses = []

    indices = np.arange(tsize)
    
    lc_dmtrtr_mlkrr = dmtrtr_mlkrr[indices, :][:, indices]
    lc_tdmtrts_mlkrr = dmtrts_mlkrr[:, indices]
    
    
    for param in params:
        sigma=param

        kernel_constant = 1 / (1 * np.sqrt(2 * np.pi) * sigma)
        exponent_constant = 1 / (1 * sigma ** 2)

        K = kernel_constant * np.exp(
                - exponent_constant * lc_dmtrtr_mlkrr ** 2 )

        n1 = len(K)
        J = np.linalg.inv(K + regg * np.eye(n1))
        alpha_vec = J @ target[indices_train][indices]

        # test predictions    
        Ks = kernel_constant * np.exp(
                - exponent_constant * lc_tdmtrts_mlkrr ** 2 )

        Y_predicted = np.dot(Ks, alpha_vec)

        mae = np.mean(np.abs(Y_predicted - target[indices_test]))
        rmse = np.sqrt(np.mean((Y_predicted - target[indices_test]) ** 2))
        
        tmaes.append(mae)
        trmses.append(rmse)
        
        scores_test.append(rmse)
        predictions_test.append(Y_predicted)

        print('=========================')
        print('SIGMA: ', param)
        print('MAE: ', np.round(mae, 3))
        print('RMSE: ', np.round(rmse, 3))
        print('=========================')

    best_indx = np.argmin(scores_test)
    
    lc_mae_mlkrr.append(np.min(tmaes))
    lc_rmse_mlkrr.append(np.min(trmses))

++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++
tsize: 100

SIGMA:  10.0
MAE:  0.519
RMSE:  0.688
SIGMA:  14.736842105263158
MAE:  0.488
RMSE:  0.659
SIGMA:  19.473684210526315
MAE:  0.466
RMSE:  0.63
SIGMA:  24.210526315789473
MAE:  0.457
RMSE:  0.615
SIGMA:  28.94736842105263
MAE:  0.456
RMSE:  0.611
SIGMA:  33.68421052631579
MAE:  0.457
RMSE:  0.61
SIGMA:  38.421052631578945
MAE:  0.459
RMSE:  0.61
SIGMA:  43.1578947368421
MAE:  0.459
RMSE:  0.608
SIGMA:  47.89473684210526
MAE:  0.459
RMSE:  0.605
SIGMA:  52.63157894736842
MAE:  0.458
RMSE:  0.603
SIGMA:  57.368421052631575
MAE:  0.457
RMSE:  0.599
SIGMA:  62.10526315789473
MAE:  0.456
RMSE:  0.596
SIGMA:  66.84210526315789
MAE:  0.454
RMSE:  0.594
SIGMA:  71.57894736842104
MAE:  0.453
RMSE:  0.591
SIGMA:  76.3157894736842
MAE:  0.452
RMSE:  0.588
SIGMA:  81.05263157894737
MAE:  0.451
RMSE:  0.586
SIGMA:  85.78947368421052
MAE:  0.449
RMSE:  0.584
SIGMA:  90.52631578947367
MAE:  0.448
RMSE:  0.582
SIGMA:  9

SIGMA:  95.26315789473684
MAE:  0.092
RMSE:  0.142
SIGMA:  100.0
MAE:  0.093
RMSE:  0.143


In [33]:
fig, axes = plt.subplots(ncols=2, figsize=[6,3], sharey=True)

ax = axes[0]

ax.plot(tsizes + [10000], lc_mae + [np.min(maes)], label='FCHL')
ax.plot(tsizes + [10000], lc_mae_mlkr + [np.min(maes_mlkr)], label='FHCL$_{MLKR}$')
ax.plot(tsizes + [10000], lc_mae_mlkrr + [np.min(maes_mlkrr)], label='FHCL$_{MLKRR}$')


ax.set_yscale('log')
ax.set_xscale('log')
ax.set_ylim(0.05, 1)
ax.set_yticks([0.05, 0.1, 1])
ax.set_ylabel('MAE [kcal / mol]')
ax.set_xlabel('Size training set')

# ax.legend()

ax = axes[1]

ax.plot(tsizes + [10000], lc_rmse + [np.min(rmses)], label='FCHL')
ax.plot(tsizes + [10000], lc_rmse_mlkr + [np.min(rmses_mlkr)], label='FHCL$_{MLKR}$')
ax.plot(tsizes + [10000], lc_rmse_mlkrr + [np.min(rmses_mlkrr)], label='FHCL$_{MLKRR}$')
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_ylabel('RMSE [kcal / mol]')
ax.set_xlabel('Size training set')
# ax.legend()

plt.tight_layout()
fig.savefig('../figures/learning_curves.pdf')
plt.show()

<IPython.core.display.Javascript object>

In [34]:
from sklearn.neighbors import KernelDensity

yy = predictions_test_original - target[indices_test]
yy = yy[:, np.newaxis]
kde = KernelDensity(kernel="gaussian", bandwidth=0.05).fit(yy)
xvals = np.linspace(-1, 1, 500)[:, np.newaxis]
log_dens_or = kde.score_samples(xvals)

yy = predictions_test_mlkrr - target[indices_test]
yy = yy[:, np.newaxis]
kde = KernelDensity(kernel="gaussian", bandwidth=0.05).fit(yy)
xvals = np.linspace(-1, 1, 500)[:, np.newaxis]
log_dens_mlkrr = kde.score_samples(xvals)

yy = predictions_test_mlkr - target[indices_test]
yy = yy[:, np.newaxis]
kde = KernelDensity(kernel="gaussian", bandwidth=0.05).fit(yy)
xvals = np.linspace(-1, 1, 500)[:, np.newaxis]
log_dens_mlkr = kde.score_samples(xvals)

In [35]:
np.mean(np.abs(predictions_test_original - target[indices_test]))

0.07873505200899708

In [36]:
np.mean(np.abs(predictions_test_mlkr - target[indices_test]))

0.10062209882310043

In [37]:
np.mean(np.abs(predictions_test_mlkrr- target[indices_test]))

0.05385263990007893

In [38]:
fig, ax = plt.subplots(figsize=[3.35,3])

ax.plot(xvals, np.exp(log_dens_or), label='FCHL \n MAE: 0.86')
ax.plot(xvals, np.exp(log_dens_mlkr), label='FHCL$_{MLKR}$ \n MAE: 0.92')
ax.plot(xvals, np.exp(log_dens_mlkrr), label='FHCL$_{MLKRR}$\n MAE: 0.60 ')
ax.set_xlim(-0.7, 0.7)
ax.set_xlabel('kcal/mol')
ax.set_ylabel('Distribution density')
# ax.legend(loc='upper left')
plt.tight_layout()
fig.savefig('../figures/error_dists.pdf')
plt.show()

<IPython.core.display.Javascript object>

In [39]:
pca_or = sk.decomposition.PCA(n_components=50)
pcas_or = pca_or.fit_transform(reps)

pca_mlkr = sk.decomposition.PCA(n_components=50)
pcas_mlkr = pca_or.fit_transform(reps_mlkr)

pca_mlkrr = sk.decomposition.PCA(n_components=50)
pcas_mlkrr = pca_or.fit_transform(reps_mlkrr)

In [40]:
import sklearn.manifold

# tsne_or = sk.manifold.TSNE(n_jobs=-1, init='pca', learning_rate='auto')
# tsnes_or = tsne_or.fit_transform(pcas_or)

# tsne_mlkrr = sk.manifold.TSNE(n_jobs=-1, init='pca', learning_rate='auto')
# tsnes_mlkrr = tsne_mlkrr.fit_transform(pcas_mlkrr)

# tsne_mlkr = sk.manifold.TSNE(n_jobs=-1, init='pca', learning_rate='auto')
# tsnes_mlkr = tsne_mlkr.fit_transform(pcas_mlkr)

In [41]:
tsne_or, tsnes_or = np.load('../data/tsne_fchl.npy', allow_pickle=True)
tsne_mlkrr, tsnes_mlkrr = np.load('../data/tsne_fchl_mlkrr.npy', allow_pickle=True)
tsne_mlkr, tsnes_mlkr = np.load('../data/tsne_fchl_mlkr.npy', allow_pickle=True)



In [42]:
vvv = 2

ccc = target
fig, axes = plt.subplots(ncols=3, figsize=[10,3])

ax = axes[0]
ax.scatter(pcas_or[:,0], pcas_or[:,1], c=ccc, cmap='rainbow', s=0.01, vmin=-vvv, vmax=vvv)
ax.axis('off')

ax = axes[1]
cax = ax.scatter(pcas_mlkr[:,0], pcas_mlkr[:,1], c=ccc, cmap='rainbow', s=0.01, vmin=-vvv, vmax=vvv)
ax.axis('off')

ax = axes[2]
cax = ax.scatter(pcas_mlkrr[:,0], pcas_mlkrr[:,1], c=ccc, cmap='rainbow', s=0.01, vmin=-vvv, vmax=vvv)
ax.axis('off')
# cbar = fig.colorbar(cax)
# cbar.set_label('kcal / mol')
plt.savefig('../figures/pcas.png', dpi=300)
plt.show()

<IPython.core.display.Javascript object>

In [43]:
vvv = 2

ccc = target
fig, axes = plt.subplots(ncols=3, figsize=[10,3])

ax = axes[0]
ax.scatter(tsnes_or[:,0], tsnes_or[:,1], c=ccc, cmap='rainbow', s=0.001, vmin=-vvv, vmax=vvv)
ax.axis('off')

ax = axes[1]
cax = ax.scatter(tsnes_mlkr[:,0], tsnes_mlkr[:,1], c=ccc, cmap='rainbow', s=0.001, vmin=-vvv, vmax=vvv)
ax.axis('off')

ax = axes[2]
cax = ax.scatter(tsnes_mlkrr[:,0], tsnes_mlkrr[:,1], c=ccc, cmap='rainbow', s=0.001, vmin=-vvv, vmax=vvv)
ax.axis('off')

plt.tight_layout()
plt.subplots_adjust(wspace=0.1, hspace=0)

cbar = fig.colorbar(cax, ax=axes.ravel().tolist())
cbar.set_label('kcal / mol')
fig.savefig('../figures/tsnes.png', dpi=300)
plt.show()

<IPython.core.display.Javascript object>

In [44]:
rep_vars = reps.var(axis=0)
rep_vars_mlkr = reps_mlkr.var(axis=0)
rep_vars_mlkrr = reps_mlkrr.var(axis=0)

In [46]:
fig, axes = plt.subplots(figsize=[8,3], ncols=2)

ax = axes[0]
ax.plot(rep_vars / np.mean(rep_vars), label='FCHL', linewidth=1)
ax.plot(rep_vars_mlkr / np.mean(rep_vars_mlkr), label='FCHL$_{MLKR}$', alpha=0.5, linewidth=1)
ax.plot(rep_vars_mlkrr / np.mean(rep_vars_mlkrr), label='FCHL$_{MLKRR}$', alpha=0.5, linewidth=1)
# ax.set_ylim(0, 10)
ax.set_xlabel('Features')
ax.set_ylabel('Variance')
ax.legend()

ax = axes[1]
ax.plot(rep_vars / np.mean(rep_vars), label='FCHL', linewidth=1)
ax.plot(rep_vars_mlkr / np.mean(rep_vars_mlkr), label='FCHL$_{MLKR}$', alpha=0.5, linewidth=1)
ax.plot(rep_vars_mlkrr / np.mean(rep_vars_mlkrr), label='FCHL$_{MLKRR}$', alpha=0.5, linewidth=1)
# ax.set_ylim(0, 10)
ax.set_xlabel('Features')
ax.set_ylabel('Variance')
ax.legend()
ax.set_ylim(0,5)
# ax.set_yscale('log')
plt.tight_layout()
fig.savefig('../figures/variances_all.pdf')
plt.show()

<IPython.core.display.Javascript object>

In [47]:
fig, axes = plt.subplots(ncols=2)

vv = 2
ax = axes[0]
ax.imshow(mlkr_model.A, cmap='bwr', vmax=vv, vmin=-vv)

ax.set_xlabel('Features')
ax.set_ylabel('Features')

ax = axes[1]
cax = ax.imshow(mlkr_model.A, cmap='bwr', vmax=vv, vmin=-vv)
ax.set_xlim(100, 0)
ax.set_ylim(0, 100)
ax.set_xlabel('Features')



fig.colorbar(cax, ax=axes.ravel().tolist())
plt.savefig('../figures/mat_a_mlkr.pdf')

plt.show()

<IPython.core.display.Javascript object>

In [48]:
fig, axes = plt.subplots(ncols=2)

vv = 1
ax = axes[0]
ax.imshow(mlkrr_model.A, cmap='bwr', vmax=vv, vmin=-vv)

ax.set_xlabel('Features')
ax.set_ylabel('Features')

ax = axes[1]
cax = ax.imshow(mlkrr_model.A, cmap='bwr', vmax=vv, vmin=-vv)
ax.set_xlim(100, 0)
ax.set_ylim(0, 100)
ax.set_xlabel('Features')

fig.colorbar(cax, ax=axes.ravel().tolist())
plt.savefig('../figures/mat_a_mlkrr.pdf')
plt.show()


<IPython.core.display.Javascript object>