In [69]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, cross_val_score

import matplotlib.pyplot as plt
datanames = ['svm', 'lda', 'logreg', 'nn_boston', 'nn_cancer', 'robotpush3', 'robotpush4']
x_dims = [3, 3, 4, 4, 4, 3, 4]
log_trans = True
log_trans_dims = {'svm': (0,1,2), 'lda': (1, 2), 'logreg':(2, 3)}

def print_compact(x):
    for _, v in enumerate(x):
        print("%g, " % v, end="")
    print()
for i, name in enumerate(datanames[0:]):
    name_on_grid = name + "_on_grid"
    print(name_on_grid)
    if name in log_trans_dims.keys():
        import_command = f"from {name_on_grid} import param_names, param_values, grid_configs"
        exec(import_command)
    else:
        grid_configs = np.loadtxt(name_on_grid + ".csv", delimiter=",")
        nan_idx = np.nonzero(np.isnan(grid_configs[:,-1])) 
        print("NA idx:", nan_idx)
        grid_configs = np.delete(grid_configs, nan_idx, axis=0)
        d = grid_configs.shape[1]
        param_names = ["NA"]*d

    data = np.array(grid_configs)
    print(data.shape)
    print("there are %d hyperparameters" % x_dims[i])
    # print each dim unique values 
    for j in range(x_dims[i]):
        param_name = param_names[j]
        unique_values = sorted(np.unique(data[:, j]))
#         reference_values = sorted(param_values[param_name])
#         assert reference_values == unique_values
        if name in log_trans_dims:
            print("parameter %d: %s" % (j, param_name))
            print_compact(unique_values)
#         else:
#             plt.hist(data[:,j], label=name + str(j), bins=20)
#             plt.show()
    
    x = data[:,:x_dims[i]]
    if log_trans and name in log_trans_dims:
        dims = log_trans_dims[name]
        x[:, dims] = np.log2(x[:, dims])
    y = data[:,x_dims[i]]
    y_min = min(y)
    print("min y = %f" % y_min)
    best_ind = np.nonzero(y == y_min)[0]
    print("best x:")

    for j in best_ind[:10]:
        print_compact(data[j,:x_dims[i]])

        
        
    xtr, xte, ytr, yte = train_test_split(x, y, test_size=0.2, random_state=0)
    lr = LinearRegression()
    lr.fit(xtr, ytr)
    r2 = lr.score(xte, yte)
    cv_r2 = cross_val_score(lr, x, y, cv=5)
    print("R2:", r2, "cv score: %.3f +/- %.3f" % (cv_r2.mean(), cv_r2.std()))
    
    rf = RandomForestRegressor()
    rf.fit(xtr, ytr)
    r2 = rf.score(xte, yte)
    cv_r2 = cross_val_score(rf, x, y, cv=5)
    print("R2:", r2, "cv score: %.3f +/- %.3f" % (cv_r2.mean(), cv_r2.std()))
    print()

svm_on_grid
(1400, 5)
there are 3 hyperparameters
parameter 0: C
0.1, 1, 5, 10, 25, 50, 75, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 10000, 1e+06, 
parameter 1: alpha
0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.5, 2, 3, 5, 
parameter 2: epsilon
0.0001, 0.001, 0.01, 0.1, 
min y = 0.241100
best x:
12.5507, -3.32193, -9.96578, 
R2: 0.04569377226013116 cv score: 0.167 +/- 0.144
R2: 0.9693967497307229 cv score: 0.978 +/- 0.011

lda_on_grid
(289, 5)
there are 3 hyperparameters
parameter 0: kappa
0.5, 0.6, 0.7, 0.8, 0.9, 1, 
parameter 1: tau
1, 4, 16, 64, 256, 1024, 
parameter 2: batch_size
1, 4, 16, 64, 256, 1024, 4096, 16384, 
min y = 1266.167382
best x:
0.5, 4, 14, 
R2: 0.5976106753250442 cv score: 0.548 +/- 0.046




R2: 0.9850731286617531 cv score: 0.967 +/- 0.017

logreg_on_grid
(9680, 7)
there are 4 hyperparameters
parameter 0: learning_rate
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
parameter 1: l2_reg
0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 
parameter 2: batch_size
20, 40, 80, 160, 320, 640, 1280, 2560, 
parameter 3: n_epochs
5, 10, 20, 40, 80, 160, 320, 640, 1280, 2560, 
min y = 0.068500
best x:
3, 0, 5.32193, 6.32193, 
3, 0, 5.32193, 7.32193, 
3, 0, 5.32193, 8.32193, 
3, 0, 5.32193, 9.32193, 
3, 0, 5.32193, 10.3219, 
3, 0, 5.32193, 11.3219, 
4, 0, 4.32193, 6.32193, 
4, 0, 4.32193, 7.32193, 
4, 0, 4.32193, 8.32193, 
4, 0, 4.32193, 9.32193, 
R2: 0.32985666827144255 cv score: -6.937 +/- 7.421




R2: 0.9990115314179514 cv score: 0.408 +/- 0.489

nn_boston_on_grid
NA idx: (array([], dtype=int64),)
(50000, 5)
there are 4 hyperparameters
min y = 6.521200
best x:
0.053367, 0.039708, 0.61862, 0.65813, 
R2: 0.2855497262275125 cv score: 0.281 +/- 0.002




R2: 0.9626510441369271 cv score: 0.959 +/- 0.005

nn_cancer_on_grid
NA idx: (array([], dtype=int64),)
(50000, 5)
there are 4 hyperparameters
min y = 0.040576
best x:
0.8434, 0.95442, 0.36952, 0.12665, 
0.84552, 0.88104, 0.065481, 0.42733, 
0.84065, 0.0050843, 0.70424, 0.99609, 
0.83952, 0.67591, 0.90233, 0.27946, 
0.84643, 0.080357, 0.97685, 0.70298, 
0.84216, 0.40303, 0.94374, 0.85515, 
0.84395, 0.44903, 0.74907, 0.5933, 
0.841, 0.57113, 0.10659, 0.017463, 
0.84788, 0.17065, 0.029864, 0.93756, 
0.84694, 0.53092, 0.33232, 0.28689, 
R2: 0.038655898737848804 cv score: 0.040 +/- 0.000




R2: 0.9973884604939771 cv score: 0.998 +/- 0.001

robotpush3_on_grid
NA idx: (array([ 1875,  1950,  1963,  5582,  8664, 16366, 16748]),)
(19993, 4)
there are 3 hyperparameters
min y = 0.074788
best x:
0.63739, 0.17381, 0.41972, 
R2: 0.7186362810190471 cv score: 0.714 +/- 0.001




R2: 0.9904542839527178 cv score: 0.991 +/- 0.000

robotpush4_on_grid
NA idx: (array([ 1114,  1977,  4246,  6041,  6314, 10863, 13369, 15269, 16000,
       16059, 17029, 17957, 18293]),)
(19987, 5)
there are 4 hyperparameters
min y = 0.076187
best x:
0.97806, 0.16691, 0.39027, 0.23409, 
R2: 0.24582289779354358 cv score: 0.247 +/- 0.004




R2: 0.43178002215344985 cv score: 0.439 +/- 0.008



In [77]:

datanames = ['svm', 'lda', 'logreg', 'nn_boston', 'nn_cancer', 'robotpush3', 'robotpush4']
x_dims = [3, 3, 4, 4, 4, 3, 4]
log_trans = True
log_trans_dims = {'svm': (0,1,2), 'lda': (1, 2), 'logreg':(2, 3)}

for i, name in enumerate(datanames[0:3]):
    name_on_grid = name + "_on_grid"
    print(name_on_grid)

    import_command = f"from {name_on_grid} import param_names, param_values, grid_configs"
    exec(import_command)


    data = np.array(grid_configs)
    print(data[:3,:])
    
    if log_trans and name in log_trans_dims:
        dims = log_trans_dims[name]
        data[:,:x_dims[i]][:, dims] = np.log2(data[:,:x_dims[i]][:, dims])
        print(data[:3,:])
    np.savetxt(name_on_grid + ".csv", data[:,:x_dims[i]+1], fmt="%g", delimiter=",")

svm_on_grid
[[6.0000e+02 5.0000e-01 1.0000e-02 2.7620e-01 1.9198e+02]
 [9.0000e+02 8.0000e-01 1.0000e-01 2.7420e-01 9.9976e+01]
 [2.0000e+03 5.0000e+00 1.0000e-01 2.7910e-01 1.1441e+02]]
[[  9.22881869  -1.          -6.64385619   0.2762     191.98      ]
 [  9.81378119  -0.32192809  -3.32192809   0.2742      99.976     ]
 [ 10.96578428   2.32192809  -3.32192809   0.2791     114.41      ]]
lda_on_grid
[[1.00000000e+00 4.00000000e+00 1.60000000e+01 2.01425535e+03
  3.63931900e+04]
 [9.00000000e-01 1.02400000e+03 4.09600000e+03 1.68054018e+03
  3.64195100e+04]
 [6.00000000e-01 1.02400000e+03 4.09600000e+03 1.32819130e+03
  2.42198500e+04]]
[[1.00000000e+00 2.00000000e+00 4.00000000e+00 2.01425535e+03
  3.63931900e+04]
 [9.00000000e-01 1.00000000e+01 1.20000000e+01 1.68054018e+03
  3.64195100e+04]
 [6.00000000e-01 1.00000000e+01 1.20000000e+01 1.32819130e+03
  2.42198500e+04]]
logreg_on_grid
[[ 0.        0.       20.        5.        0.0906    0.0954    3.990431]
 [ 0.        0.       20. 

In [74]:
np.log2(0)

  """Entry point for launching an IPython kernel.


-inf