In [None]:
#!/usr/bin/env python

import time
import sys
sys.path.insert(0, '../../Utilities/')
from plotting import newfig, savefig
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import operator
import itertools
from sklearn import metrics
from sklearn.metrics import *
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve
from sklearn import kernel_ridge
from sklearn.kernel_ridge import KernelRidge
from joblib import dump, load
import pickle
from sklearn.multioutput import MultiOutputRegressor
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold

n_jobs = 1
trial  = 1

#dataset=np.loadtxt("../data/datarelax.txt")
dataset=np.loadtxt("../data/datasetDR.txt")
#dataset=np.loadtxt("../data/datasetVT.txt")
#dataset=np.loadtxt("../data/datasetVV.txt")
x=dataset[:,2:3]   # 0: x [m], 1: t [s], 2: T [K]
y=dataset[:,9:10]  # Rci (relaxation source terms)

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.75, test_size=0.25, random_state=69, shuffle=True)

# https://stackoverflow.com/questions/43675665/when-scale-the-data-why-the-train-dataset-use-fit-and-transform-but-the-te
# https://machinelearningmastery.com/how-to-improve-neural-network-stability-and-modeling-performance-with-data-scaling/

#sc_x = MinMaxScaler(feature_range=(0, 1))
sc_x = StandardScaler()
sc_y = StandardScaler()

# fit scaler
sc_x.fit(x_train)
# transform training datasetx
x_train = sc_x.transform(x_train)
# transform test dataset
x_test = sc_x.transform(x_test)

#y_train = y_train.reshape(len(y_train), 1)
#y_test = y_test.reshape(len(y_train), 1)

# fit scaler on training dataset
sc_y.fit(y_train)
# transform training dataset
y_train = sc_y.transform(y_train)
# transform test dataset
y_test = sc_y.transform(y_test)

print('Training Features Shape:', x_train.shape)
print('Training Labels Shape:', y_train.shape)
print('Testing Features Shape:', x_test.shape)
print('Testing Labels Shape:', y_test.shape)

hyper_params = [{'kernel': ('poly', 'rbf',),
                 'alpha': (1e-1, 0.0, 0.1,),
                 'gamma': (0.1, 1, 10, 100,),}]

est=kernel_ridge.KernelRidge()
gs = GridSearchCV(est, cv=10, param_grid=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2')

t0 = time.time()
gs.fit(x_train, y_train.ravel())
runtime = time.time() - t0
print("KR complexity and bandwidth selected and model fitted in %.6f s" % runtime)

train_score_mse = mean_squared_error(      sc_y.inverse_transform(y_train), sc_y.inverse_transform(gs.predict(x_train)))
train_score_mae = mean_absolute_error(     sc_y.inverse_transform(y_train), sc_y.inverse_transform(gs.predict(x_train)))
train_score_evs = explained_variance_score(sc_y.inverse_transform(y_train), sc_y.inverse_transform(gs.predict(x_train)))
train_score_me  = max_error(               sc_y.inverse_transform(y_train), sc_y.inverse_transform(gs.predict(x_train)))

test_score_mse  = mean_squared_error(      sc_y.inverse_transform(y_test),  sc_y.inverse_transform(gs.predict(x_test)))
test_score_mae  = mean_absolute_error(     sc_y.inverse_transform(y_test),  sc_y.inverse_transform(gs.predict(x_test)))
test_score_evs  = explained_variance_score(sc_y.inverse_transform(y_test),  sc_y.inverse_transform(gs.predict(x_test)))
test_score_me   = max_error(               sc_y.inverse_transform(y_test),  sc_y.inverse_transform(gs.predict(x_test)))
test_score_r2   = r2_score(                sc_y.inverse_transform(y_test),  sc_y.inverse_transform(gs.predict(x_test)))

print("The model performance for testing set")
print("--------------------------------------")
print('MAE is {}'.format(test_score_mae))
print('MSE is {}'.format(test_score_mse))
print('EVS is {}'.format(test_score_evs))
print('ME is {}'.format(test_score_me))
print('R2 score is {}'.format(test_score_r2))

sorted_grid_params = sorted(gs.best_params_.items(), key=operator.itemgetter(0))

out_text = '\t'.join(['regression',
                      str(trial),
                      str(sorted_grid_params).replace('\n',','),
                      str(train_score_mse),
                      str(train_score_mae),
                      str(train_score_evs),
                      str(train_score_me),
                      str(test_score_mse),
                      str(test_score_mae),
                      str(test_score_evs),
                      str(test_score_me),
                      str(runtime)])
print(out_text)
sys.stdout.flush()

best_kernel = gs.best_params_['kernel']
best_alpha  = gs.best_params_['alpha']
best_gamma  = gs.best_params_['gamma']

outF = open("output.txt", "w")
print('best_kernel = ', best_kernel, file=outF)
print('best_alpha = ', best_alpha, file=outF)
print('best_gamma = ', best_gamma, file=outF)
print('R2 score is {}'.format(test_score_r2))
outF.close()

regr = KernelRidge(kernel=best_kernel, gamma=best_gamma, alpha=best_alpha)

t0 = time.time()
regr.fit(x_train, y_train.ravel())
regr_fit = time.time() - t0
print("Complexity and bandwidth selected and model fitted in %.6f s" % regr_fit)

t0 = time.time()
y_regr = regr.predict(x_test)
regr_predict = time.time() - t0
print("Prediction for %d inputs in %.6f s" % (x_test.shape[0], regr_predict))

# open a file to append
outF = open("output.txt", "a")
print("KR complexity and bandwidth selected and model fitted in %.6f s" % regr_fit, file=outF)
print("KR prediction for %d inputs in %.6f s" % (x_test.shape[0], regr_predict),file=outF)
print('Mean Absolute Error (MAE):', metrics.mean_absolute_error(y_test, y_regr), file=outF)
print('Mean Squared Error (MSE):', metrics.mean_squared_error(y_test, y_regr), file=outF)
print('Root Mean Squared Error (RMSE):', np.sqrt(metrics.mean_squared_error(y_test, y_regr)), file=outF)
outF.close()

print('Mean Absolute Error (MAE):', metrics.mean_absolute_error(y_test, y_regr))
print('Mean Squared Error (MSE):', metrics.mean_squared_error(y_test, y_regr))
print('Root Mean Squared Error (RMSE):', np.sqrt(metrics.mean_squared_error(y_test, y_regr)))


x_test_dim = sc_x.inverse_transform(x_test)
y_test_dim = sc_y.inverse_transform(y_test)
y_regr_dim = sc_y.inverse_transform(y_regr)

plt.scatter(x_test_dim, y_test_dim, s=2, c='k', marker='o', label='Matlab')
plt.scatter(x_test_dim, y_regr_dim, s=2, c='r', marker='+', label='KernelRidge')
#plt.title('Relaxation term $R_{ci}$ regression')
plt.ylabel('$R_{ci}$ $[J/m^3/s]$')
plt.xlabel('T [K] ')
plt.legend()
plt.tight_layout()
#plt.savefig("regression_KR.eps", dpi=150, crop='false')
#plt.savefig("regression_KR.pdf", dpi=150, crop='false')
plt.show()

## Look at the results
#gs_ind = gs.best_estimator_.support_
##plt.scatter(x[gs_ind], y[gs_ind], c='r', s=50, label='KR',   zorder=2, edgecolors=(0, 0, 0))
#plt.scatter(x_test_dim, y_test_dim, c='k',       label='data', zorder=1, edgecolors=(0, 0, 0))
#plt.plot(x_test_dim, y_regr_dim, c='r', label='KR (fit: %.6fs, predict: %.6fs)' % (regr_fit, regr_predict))
##plt.plot(X_plot, y_kr, c='g', label='KRR (fit: %.3fs, predict: %.3fs)' % (regr_fit, regr_predict))
#plt.xlabel('data')
#plt.ylabel('target')
##plt.title('SVR versus Kernel Ridge')
#plt.legend()

In [None]:
from sklearn import svm                                                                                                             
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor, RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor                                                                                   
from sklearn.neighbors import RadiusNeighborsRegressor                                                                              
from sklearn.neighbors import NearestNeighbors

# Visualize learning curves
plt.figure()

svr = SVR(kernel='rbf', epsilon=0.01, C=100., gamma='scale', coef0=0.0)
kr = KernelRidge(kernel='rbf', gamma=100., alpha=0.0)
dt = DecisionTreeRegressor(criterion='mse', splitter='best', max_features='auto', random_state=69)
et = ExtraTreesRegressor(n_estimators=2000, min_weight_fraction_leaf=0.0, max_depth=None, 
                         max_leaf_nodes=300, min_samples_split=10, min_samples_leaf=1)
gb = GradientBoostingRegressor(n_estimators=100, min_weight_fraction_leaf=0.0,
                               max_features='auto', warm_start=False, criterion='mae', max_depth=None,
                               loss='ls', min_samples_split=2, min_samples_leaf=5)
knn = KNeighborsRegressor(n_neighbors=9, algorithm='ball_tree', leaf_size=1, weights='distance', p=1)
                        
train_sizes, train_scores_svr, test_scores_svr, fit_times_svr, score_times_svr = \
    learning_curve(svr, x_train, y_train.ravel(), train_sizes=np.linspace(0.1, 1, 10),
                   scoring="neg_mean_squared_error", cv=10, return_times=True)

train_sizes, train_scores_kr, test_scores_kr, fit_times_kr, score_times_kr = \
    learning_curve(kr, x_train, y_train.ravel(), train_sizes=np.linspace(0.1, 1, 10),
                   scoring="neg_mean_squared_error", cv=10, return_times=True)

train_sizes, train_scores_dt, test_scores_dt, fit_times_dt, score_times_dt = \
    learning_curve(dt, x_train, y_train.ravel(), train_sizes=np.linspace(0.1, 1, 10),
                   scoring="neg_mean_squared_error", cv=10, return_times=True)

train_sizes, train_scores_et, test_scores_et, fit_times_et, score_times_et = \
    learning_curve(et, x_train, y_train.ravel(), train_sizes=np.linspace(0.1, 1, 10),
                   scoring="neg_mean_squared_error", cv=10, return_times=True)

train_sizes, train_scores_gb, test_scores_gb, fit_times_gb, score_times_gb = \
    learning_curve(gb, x_train, y_train.ravel(), train_sizes=np.linspace(0.1, 1, 10),
                   scoring="neg_mean_squared_error", cv=10, return_times=True)

train_sizes, train_scores_knn, test_scores_knn, fit_times_knn, score_times_knn = \
    learning_curve(knn, x_train, y_train.ravel(), train_sizes=np.linspace(0.1, 1, 10),
                   scoring="neg_mean_squared_error", cv=10, return_times=True)

plt.plot(train_sizes, -test_scores_svr.mean(1), 'o-', color="b", label="SVR")
plt.plot(train_sizes, -test_scores_kr.mean(1), 'o-', color="g", label="KR")
plt.plot(train_sizes, -test_scores_dt.mean(1), 'o-', color="k", label="DT")
plt.plot(train_sizes, -test_scores_et.mean(1), 'o-', color="y", label="ET")
plt.plot(train_sizes, -test_scores_gb.mean(1), 'o-', color="r", label="GB")
plt.plot(train_sizes, -test_scores_knn.mean(1), 'o-', color="c", label="kNN")
plt.xlabel("Train size")
plt.ylabel("Mean Squared Error")
plt.title('Learning curves')
plt.legend(loc="best")
plt.show()

# Visualize training and prediction time
plt.figure()

In [None]:
# https://scikit-learn.org/stable/auto_examples/miscellaneous/plot_kernel_ridge_regression.html#sphx-glr-auto-examples-miscellaneous-plot-kernel-ridge-regression-py
from sklearn import svm                                                                                                             
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor

# Generate sample data
sizes = np.logspace(1, 4, 7).astype(np.int)
for name, estimator in {"KR":  KernelRidge(kernel='rbf', gamma=100., alpha=0.0),
                        "SVR": SVR(kernel='rbf', epsilon=0.01, C=100., gamma='scale', coef0=0.0),
                        "DT":  DecisionTreeRegressor(criterion='mse', splitter='best', max_features='auto', random_state=69),
                        "ET":  ExtraTreesRegressor(n_estimators=2000, min_weight_fraction_leaf=0.0, max_depth=None, 
                                                   max_leaf_nodes=300, min_samples_split=10, min_samples_leaf=1),
                        "GB":  GradientBoostingRegressor(n_estimators=100, min_weight_fraction_leaf=0.0,
                                                         max_features='auto', warm_start=False,
                                                         criterion='mae', max_depth=None,
                                                         loss='ls', min_samples_split=2,
                                                         min_samples_leaf=5),
                        "kNN": KNeighborsRegressor(n_neighbors=9, algorithm='ball_tree', leaf_size=1, weights='distance', p=1)
                        
                       }.items():
    train_time = []
    test_time = []
    for train_test_size in sizes:
        t0 = time.time()
        regr.fit(x_train[:train_test_size,:], y_train[:train_test_size].ravel())
        train_time.append(time.time() - t0)

        t0 = time.time()
        regr.predict(x_test)
        test_time.append(time.time() - t0)

    plt.plot(sizes, train_time, 'o-', color="r" if name == "SVR" else "k", label="%s (train)" % name)
    plt.plot(sizes, test_time, 'o--', color="r" if name == "SVR" else "k", label="%s (train)" % name)
    plt.plot(sizes, train_time, 'o-', color="g" if name == "KR"  else "k", label="%s (test)" % name)
    plt.plot(sizes, test_time, 'o--', color="g" if name == "KR"  else "k", label="%s (test)" % name)
    plt.plot(sizes, train_time, 'o-', color="b" if name == "DT"  else "k", label="%s (test)" % name)
    plt.plot(sizes, test_time, 'o--', color="b" if name == "DT"  else "k", label="%s (test)" % name)
    plt.plot(sizes, train_time, 'o-', color="m" if name == "ET"  else "k", label="%s (test)" % name)
    plt.plot(sizes, test_time, 'o--', color="m" if name == "ET"  else "k", label="%s (test)" % name)
    plt.plot(sizes, train_time, 'o-', color="y" if name == "GB"  else "k", label="%s (test)" % name)
    plt.plot(sizes, test_time, 'o--', color="y" if name == "GB"  else "k", label="%s (test)" % name)
    plt.plot(sizes, train_time, 'o-', color="c" if name == "kNN"  else "k", label="%s (test)" % name)
    plt.plot(sizes, test_time, 'o--', color="c" if name == "kNN"  else "k", label="%s (test)" % name)

plt.xscale("log")
plt.yscale("log")
plt.xlabel("Train size")
plt.ylabel("Time (seconds)")
plt.title('Execution Time')
plt.legend(loc="best")

# save the model to disk
#dump(gs, 'model_KR.sav')