# Imports

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.kernel_ridge import KernelRidge

from qmlearn.io import read_db

# Data

In [2]:
# Calling data
data = read_db('.\nh3_data.hdf5')
properties = data['properties']
X = properties['vext']
Y = properties['gamma']

Guess DB names : {'qmmol': 'rks/qmmol', 'atoms': 'rks/train_atoms_27', 'properties': 'rks/train_props_27'}


In [3]:
# Printing shapes, as needed
# print('X shape:', X.shape)
# print('Y shape:', Y.shape)

# Resizing to obtaine 2D array
X_resized = np.resize(X, (27, 3364))
Y_resized = np.resize(Y, (27, 3364))

## Resizing method above was the preferred method, yielding the most accurate predictions
## This should be on the order of samples by (n_features)^2

# Grid search

In [10]:
# Defining the function, can be modified as needed
krr_gs = KernelRidge(kernel = 'rbf')

## The choice of kernel itself can be left to the cross-validation
## Favored function was KRR, but others can be (and have been) tested in the exact same
## It is recommended to use some sort of preprocessing when cross-validating neural networks

1.0

### Method 1

In [11]:
# Choice of parameters to test for and number of iterations                ## More can be added, as needed
param_grid = {'gamma': np.arange(0.0, 1.0, 0.0001), 'max_iter':[10000]}

# Defining function, alternatives exist
class grid():
    def __init__(self,model):
        self.model = model
    def grid_get(self, X_resized, Y_resized, param_grid):
        grid_search = GridSearchCV(self.model, param_grid, cv = 5, scoring = 'neg_mean_squared_error')
        grid_search.fit(X_resized, Y_resized)
        print(grid_search.best_params_, np.sqrt(-grid_search.best_score_))
        grid_search.cv_results_['mean_test_score'] = np.sqrt(-grid_search.cv_results_['mean_test_score'])
        print(pd.DataFrame(grid_search.cv_results_)[['params','mean_test_score','std_test_score']])

In [12]:
# Executing the grid search
grid(KernelRidge(kernel = kernel).grid_get(X_resized, Y_resized, {'gamma': np.arange(0.0, 1.0, 0.0001)})

SyntaxError: unexpected EOF while parsing (3963740760.py, line 2)

### Method 2

In [None]:
# Alternative method to defining parameters to test, can be modified as needed, such as expanding parameter_space
parameter_space = {
    'alpha': np.arange(0, 1, 1e-5),
}

reg = GridSearchCV(krr_gs, parameter_space, scoring = 'neg_root_mean_squared_error', n_jobs = -1, cv = 5)

reg.fit(X_resized, Y_resized)

In [None]:
print('Best parameters found:\n', reg.best_params_)

# print('Best score found:\n', reg.best_score_)

# Random search

### Method 1

In [None]:
# Choice of parameters to test for and number of iterations                 ## More can be added, as needed
param_grid = {'alpha': np.arange(0.0, 1.0, 0.001), 'max_iter':[10000]}


# Defining function, alternatives exist
class random():
    def __init__(self,model):
        self.model = model
    def random_get(self, X_resized, Y_resized, param_grid):
        random_search = RandomizedSearchCV(self.model, param_grid, random_state = 0,
                                           n_iter = 100, scoring = 'neg_mean_squared_error')
        random_search.fit(X_resized, Y_resized)
        print(random_search.best_params_, np.sqrt(-random_search.best_score_))
        random_search.cv_results_['mean_test_score'] = np.sqrt(-random_search.cv_results_['mean_test_score'])
        print(pd.DataFrame(random_search.cv_results_)[['params','mean_test_score','std_test_score']])

In [None]:
# Executing the random search
random(KernelRidge(kernel = 'rbf')).random_get(X_resized, Y_resized, {'alpha': np.arange(0, 1, 0.001)})

### Method 2

In [None]:
parameter_space = {
    'alpha': np.arange(0, 1, 1e-5),
}

reg = RandomizedSearchCV(krr_gs, parameter_space, scoring = 'neg_root_mean_squared_error', n_jobs = -1, cv = 5)

reg.fit(X_resized, Y_resized)

In [None]:
print('Best parameters found:\n', reg.best_params_)

# print('Best score found:\n', reg.best_score_)