In [None]:
from sklearn.preprocessing import scale
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.linear_model import Ridge, RidgeClassifier
from sklearn.svm import SVR, SVC
from sklearn.model_selection import KFold, StratifiedKFold, GridSearchCV

import pandas as pd
import numpy as np

In [None]:
def splitXY(dfXY):
    lbls = ['ReactorType', 'CoolingTime', 'Enrichment', 'Burnup', 'OrigenReactor']
    dfX = dfXY.drop(lbls, axis=1)
    if 'total' in dfX.columns:
        dfX.drop('total', axis=1, inplace=True)
    r_dfY = dfXY.loc[:, lbls[0]]
    c_dfY = dfXY.loc[:, lbls[1]]
    e_dfY = dfXY.loc[:, lbls[2]]
    b_dfY = dfXY.loc[:, lbls[3]]
    return dfX, r_dfY, c_dfY, e_dfY, b_dfY

CV = 5
trainset = '../pkl_trainsets/2jul2018/22jul2018_trainset3_nucs_fissact_not-scaled.pkl'
trainXY = pd.read_pickle(trainset)
trainXY = trainXY.sample(frac=0.5)
trainX, rY, cY, eY, bY = splitXY(trainXY)
trainX = scale(trainX)

In [None]:
knn_grid = {'n_neighbors': np.linspace(1, 20, 20).astype(int)}
dtr_grid = {"max_depth": np.linspace(3, 100, 20).astype(int),
            "max_features": np.linspace(5, len(trainXY.columns)-6, 20).astype(int)}
svr_grid = {'C': np.logspace(-2, 5, 20), 'gamma': np.logspace(-7, 2, 20)} 

In [None]:
score = 'explained_variance'
kfold = KFold(n_splits=CV, shuffle=True)
knn_init = KNeighborsRegressor(weights='distance')
dtr_init = DecisionTreeRegressor()
svr_init = SVR()  

for Y in ('b', 'c', 'e'):
    trainY = pd.Series()
    # get param names and set ground truth
    if Y == 'c':
        trainY = cY
        parameter = 'cooling'
    elif Y == 'b':
        trainY = bY
        parameter = 'burnup'
    elif Y == 'e':
        trainY = eY
        parameter = 'enrichment'
    else:
        trainY = rY
        parameter = 'reactor'
        score = 'accuracy'
        kfold = StratifiedKFold(n_splits=CV, shuffle=True)
        knn_init = KNeighborsClassifier(weights='distance')
        dtr_init = DecisionTreeClassifier(class_weight='balanced')
        svr_init = SVC(C=200, class_weight='balanced')

    knn_opt = GridSearchCV(estimator=knn_init, param_grid=knn_grid, 
                                 scoring=score, n_jobs=-1, cv=kfold, 
                                 return_train_score=True)
    dtr_opt = GridSearchCV(estimator=dtr_init, param_grid=dtr_grid,
                                 scoring=score, n_jobs=-1, cv=kfold, 
                                 return_train_score=True)
    svr_opt = GridSearchCV(estimator=svr_init, param_grid=svr_grid,
                                 scoring=score, n_jobs=-1, cv=kfold, 
                                 return_train_score=True)

    knn_opt.fit(trainX, trainY)
    dtr_opt.fit(trainX, trainY)
    svr_opt.fit(trainX, trainY)

    # best params
    k = knn_opt.best_params_['n_neighbors']
    d = dtr_opt.best_params_['max_depth']
    f = dtr_opt.best_params_['max_features']
    g = svr_opt.best_params_['gamma']
    c = svr_opt.best_params_['C']

    # save info
    param_file = 'trainset3_hyperparameters.txt'
    with open(param_file, 'a') as pf:
        pf.write('The following parameters are best from the randomized search for the {} parameter prediction:\n'.format(parameter))
        pf.write('k for knn is {}\n'.format(k)) 
        pf.write('max depth for dtree is {}\n'.format(d))
        pf.write('max features for dtree is {}\n'.format(f))
        pf.write('gamma for svr is {}\n'.format(g)) 
        pf.write('C for svr is {}\n'.format(c))
    print(' k is {}\n max depth is {}\n max feat is {}\n gamma is {}\n C is {}\n'.format(k, d, f, g, c), flush=True)