In [1]:
import torch
import joblib
from tqdm import tqdm
import pandas as pd
import numpy as np
from sklearn.linear_model import PoissonRegressor
from sklearn.svm import SVR, NuSVR
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from scipy.stats import spearmanr, pearsonr

In [2]:
x_train = torch.load('../data/X_tensor_WPC_cpu.pt')
y_train = torch.load('../data/y_tensor_WPC.pt')

In [3]:
X_train = [[v.detach().numpy() for v in x] for x in x_train]

In [4]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [5]:
def get_split(input_x, input_y, indexes):
    out_x, out_y = [], []
    for index in indexes:
        out_x.append(input_x[index])
        out_y.append(input_y[index])
    return out_x, out_y

In [6]:
def get_nusvr_model():
    return NuSVR(
    nu=0.42857142857142855,
    kernel='rbf',
    gamma=1, # type: ignore
    degree=2,
    C=50
)

In [7]:
def get_svr_model():
    return SVR(
        kernel='rbf',
        gamma=1,  # type: ignore
        epsilon=1,
        degree=2,
        C=5
    )

In [8]:
def get_pr_model():
    return PoissonRegressor(
        solver='lbfgs',
        max_iter=10,
        fit_intercept=True,
        alpha=0.01
    )

In [9]:
models = ['nusvr', 'svr', 'pr']

In [10]:
results = []
for i, (train_index, test_index) in tqdm(enumerate(kf.split(X_train))):
    result = {'Fold': i}
    xtrain, ytrain = get_split(X_train, y_train, train_index)
    xtest, ytest = get_split(X_train, y_train, test_index)
    for model_name in models:
        if model_name == 'nusvr':
            model = get_nusvr_model()
        if model_name == 'svr':
            model = get_svr_model()
        if model_name == 'pr':
            model = get_pr_model()
        model.fit(xtrain, ytrain)
        ypred = model.predict(xtest)
        result[f'{model_name}-pearson'] = pearsonr(ytest, ypred)[0]
        result[f'{model_name}-spearman'] = spearmanr(ytest, ypred)[0]
        result[f'{model_name}-rmse'] = np.sqrt(mean_squared_error(ytest, ypred))
    results.append(result)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
STOP: TOTAL NO. of ITERATION

In [11]:
df_results = pd.DataFrame(results)

In [12]:
to_concat = {
    'Fold': 'mean',
    'nusvr-pearson': df_results.loc[:, 'nusvr-pearson'].mean(),
    'nusvr-spearman': df_results.loc[:, 'nusvr-spearman'].mean(),
    'nusvr-rmse': df_results.loc[:, 'nusvr-rmse'].mean(),
    'svr-pearson': df_results.loc[:, 'svr-pearson'].mean(),
    'svr-spearman': df_results.loc[:, 'svr-spearman'].mean(),
    'svr-rmse': df_results.loc[:, 'svr-rmse'].mean(),
    'pr-pearson': df_results.loc[:, 'pr-pearson'].mean(),
    'pr-spearman': df_results.loc[:, 'pr-spearman'].mean(),
    'pr-rmse': df_results.loc[:, 'pr-rmse'].mean()
}

In [13]:
df_conc = pd.DataFrame([to_concat])
df_results = pd.concat([df_results, df_conc])

In [14]:
df_results

Unnamed: 0,Fold,nusvr-pearson,nusvr-spearman,nusvr-rmse,svr-pearson,svr-spearman,svr-rmse,pr-pearson,pr-spearman,pr-rmse
0,0,0.707161,0.708093,15.736586,0.697673,0.698427,15.911348,0.715405,0.712972,15.541604
1,1,0.75675,0.753099,14.553804,0.751063,0.749812,14.616763,0.748705,0.748613,14.827727
2,2,0.752138,0.744089,15.653025,0.742938,0.732347,15.887618,0.749824,0.746721,15.685771
3,3,0.755734,0.735286,16.718692,0.750058,0.728245,17.053566,0.75509,0.739095,16.686121
4,4,0.753664,0.758015,13.979813,0.75061,0.755665,14.026948,0.745037,0.759563,14.267685
0,mean,0.745089,0.739717,15.328384,0.738468,0.732899,15.499248,0.742812,0.741393,15.401782


In [15]:
df_results['nusvr-pearson'] = df_results['nusvr-pearson'].apply(lambda x: round(x, 3))
df_results['nusvr-spearman'] = df_results['nusvr-spearman'].apply(lambda x: round(x, 3))
df_results['nusvr-rmse'] = df_results['nusvr-rmse'].apply(lambda x: round(x, 2))
df_results['svr-pearson'] = df_results['svr-pearson'].apply(lambda x: round(x, 3))
df_results['svr-spearman'] = df_results['svr-spearman'].apply(lambda x: round(x, 3))
df_results['svr-rmse'] = df_results['svr-rmse'].apply(lambda x: round(x, 2))
df_results['pr-pearson'] = df_results['pr-pearson'].apply(lambda x: round(x, 3))
df_results['pr-spearman'] = df_results['pr-spearman'].apply(lambda x: round(x, 3))
df_results['pr-rmse'] = df_results['pr-rmse'].apply(lambda x: round(x, 2))
df_results

Unnamed: 0,Fold,nusvr-pearson,nusvr-spearman,nusvr-rmse,svr-pearson,svr-spearman,svr-rmse,pr-pearson,pr-spearman,pr-rmse
0,0,0.707,0.708,15.74,0.698,0.698,15.91,0.715,0.713,15.54
1,1,0.757,0.753,14.55,0.751,0.75,14.62,0.749,0.749,14.83
2,2,0.752,0.744,15.65,0.743,0.732,15.89,0.75,0.747,15.69
3,3,0.756,0.735,16.72,0.75,0.728,17.05,0.755,0.739,16.69
4,4,0.754,0.758,13.98,0.751,0.756,14.03,0.745,0.76,14.27
0,mean,0.745,0.74,15.33,0.738,0.733,15.5,0.743,0.741,15.4
