In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
ROOT = ".."
filename = f"{ROOT}/data/TC_ReCo_detail_descriptor.csv"
DESCRIPTOR_NAMES = ['C_R', 'C_T', 'vol_per_atom', 'Z', 'f4', 'd5', 'L4f', 
                    'S4f', 'J4f','(g-1)J4f', '(2-g)J4f']
TARGET_NAME = 'Tc'
RANDOM_STATE = 5
df_obs = pd.read_csv(filename)
Xraw = df_obs.loc[:, DESCRIPTOR_NAMES].values
y = df_obs.loc[:, TARGET_NAME].values


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(Xraw)
X = scaler.transform(Xraw)


In [None]:
from sklearn.model_selection import KFold
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import GridSearchCV
# データからの学習
nfold = 10
kf = KFold(nfold, shuffle=True, random_state=RANDOM_STATE)
estimator = KernelRidge(alpha=1, gamma=1, kernel="rbf")
param_grid = {"alpha": np.logspace(-6, 0, 11), 
              "gamma": np.logspace(-5, 0, 11)}
reg_cv = GridSearchCV(estimator,  cv=kf, param_grid=param_grid)
reg_cv.fit(X, y)
print("best hyperparameter")
print(reg_cv.best_params_)


In [None]:
from regression_misc import plot_y_yp
from sklearn.metrics import r2_score
yp = reg_cv.predict(X)
# R2の評価
kr_score = r2_score(y, yp)
print("R2=", kr_score)
plot_y_yp(y, yp)


In [None]:
from sklearn.model_selection import cross_validate, cross_val_predict
from sklearn.metrics import make_scorer
kf = KFold(nfold, shuffle=True, random_state=RANDOM_STATE)
reg = KernelRidge(alpha=reg_cv.best_params_["alpha"], 
                  gamma=reg_cv.best_params_["gamma"], kernel="rbf")

cv_results = cross_validate(reg, X, y, scoring=make_scorer(r2_score), cv=kf)
print(np.mean(cv_results["test_score"]), np.std(cv_results["test_score"]))

kf = KFold(nfold, shuffle=True, random_state=RANDOM_STATE)
yp = cross_val_predict(reg, X, y, cv=kf)
plot_y_yp(y, yp)