In [1]:
import numpy as np

import data_io
import data_preprocessing
from implementations import *
import validation
import attribute_selection
import evaluators
import metrics

# Autoreload modules
%load_ext autoreload
%autoreload 2

In [2]:
from google.colab import drive
drive.mount('/content/drive')

DATA_FILE_PREFIX = '/content/drive/My Drive/mlproject1_higgs_data/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
y_train, x_train, _, cols = data_io.load_csv_data(f'{DATA_FILE_PREFIX}train.csv')
_, x_test, ids_test, cols_train = data_io.load_csv_data(f'{DATA_FILE_PREFIX}test.csv')

In [4]:
col_to_index_mapping = {col_name: index - 2 for index, col_name in enumerate(cols) if index >= 2}
y_train = (y_train + 1) // 2

In [5]:
import operator

def tune_lambda(y, x, grid, seed=42, history=True):
    w_init = np.zeros(x.shape[1])
    res = {}
    for lambda_ in grid:
        np.random.seed(seed)
        train_model = lambda y_, x_: make_predictor(reg_logistic_regression_sgd(
            y_, x_, lambda_, w_init, 5, 1000, 0.5,
        )[0])
        res[lambda_] = validation.cross_validation(y, x, train_model, 5)[0].mean()
        if history:
          print(f"{lambda_}: {res[lambda_]:.4f}")
    return max(res.items(), key=operator.itemgetter(1))

In [6]:
def make_predictor(w):
  def foo(features):
    return (features @ w > 0).astype(int)
  return foo

In [7]:
def train_model(y, x):
  w_init = np.zeros(x.shape[1])
  lambda_ = 1e-5
  return make_predictor(reg_logistic_regression_sgd(
      y, x, lambda_, w_init, 5, 1000, 0.5)[0])

In [8]:
def build_pairwise_plus(x, column_idx):
    """build pairwise multiplyed features x"""
    if x.ndim == 1:
        x = x[:, np.newaxis]
        
    columns = np.copy(x[:, column_idx])
    pairwise = []
    for i in range(columns.shape[1] - 1):
        for j in range(i + 1, columns.shape[1] - 1):
            pairwise.append(columns[:, i] + columns[:, j])
    pairwise = np.array(pairwise).T
    return np.concatenate([np.copy(x), pairwise], 1)

def transformation_pipeline_median_selected(x, col_to_index_mapping=col_to_index_mapping):
    tx = np.copy(x) # Recommended to copy x so it doesn't change
    tx[tx == -999.] = np.nan
    tx = data_preprocessing.apply_transformation(
        tx,
        [col_to_index_mapping[key] for key in col_to_index_mapping if 'PRI_jet_num' not in key],
        data_preprocessing.standardize_with_nans,
    )
    # standardize and normalize may change value of fields from default missing values, so it uses matrix calculated before applying transformations
    tx = data_preprocessing.median_missing_values(tx, np.isnan(tx)) 
    # onehot for categorical and drop one level
    tx, col_to_index_mapping_upd = data_preprocessing.one_hot_transformation(tx, 'PRI_jet_num', col_to_index_mapping)
    tx = tx[:, :-1]

    sins = np.sin(tx)
    coses = np.cos(tx)
    #polys = data_preprocessing.build_poly(tx, list(range(tx.shape[1])), [2])
    tx = np.concatenate((tx, sins, coses), axis=1)
    
    # add bias
    tx = data_preprocessing.prepend_bias_column(tx)
    first_selection_attr = [0, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 17, 20, 22, 25, 30, 31, 33, 34, 35, 36, 40, 41, 42, 44, 45, 47, 49, 50, 52, 57, 58, 59, 62, 63, 65, 66, 67, 68, 69, 72, 74, 75, 76, 79, 81, 82, 87, 88, 91, 94, 95, 96]
    tx = tx[:, first_selection_attr]
    tx = tx[:, [0, 1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 15, 18, 19, 20, 22, 23, 24, 25, 27, 31, 33, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54]]

    return tx

def transformation_pipeline_median_selected_pairwise(x, col_to_index_mapping=col_to_index_mapping):
    tx = np.copy(x) # Recommended to copy x so it doesn't change
    tx[tx == -999.] = np.nan
    tx = data_preprocessing.apply_transformation(
        tx,
        [col_to_index_mapping[key] for key in col_to_index_mapping if 'PRI_jet_num' not in key],
        data_preprocessing.standardize_with_nans,
    )
    # standardize and normalize may change value of fields from default missing values, so it uses matrix calculated before applying transformations
    tx = data_preprocessing.median_missing_values(tx, np.isnan(tx)) 
    # onehot for categorical and drop one level
    tx, col_to_index_mapping_upd = data_preprocessing.one_hot_transformation(tx, 'PRI_jet_num', col_to_index_mapping)
    tx = tx[:, :-1]

    sins = np.sin(tx)
    coses = np.cos(tx)
    #polys = data_preprocessing.build_poly(tx, list(range(tx.shape[1])), [2])
    tx = np.concatenate((tx, sins, coses), axis=1)
    
    # add bias
    tx = data_preprocessing.prepend_bias_column(tx)
    first_selection_attr = [0, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 17, 20, 22, 25, 30, 31, 33, 34, 35, 36, 40, 41, 42, 44, 45, 47, 49, 50, 52, 57, 58, 59, 62, 63, 65, 66, 67, 68, 69, 72, 74, 75, 76, 79, 81, 82, 87, 88, 91, 94, 95, 96]
    tx = tx[:, first_selection_attr]
    tx = tx[:, [1, 2, 3, 4, 6, 7, 8, 9, 12, 13, 15, 18, 19, 20, 22, 23, 24, 25, 27, 31, 33, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54]]

    d = tx.shape[1]

    tx = data_preprocessing.build_pairwise(tx, list(range(d)))

    #plus = build_pairwise_plus(tx, list(range(d)))

    #tx = np.concatenate((tx, mul), axis=1)

    tx = data_preprocessing.prepend_bias_column(tx)
    
    return tx

In [10]:
tx_train_2 = transformation_pipeline_median_selected_pairwise(x_train)
#top_80 = [0, 13, 30, 31, 36, 38, 39, 40, 42, 45, 52, 63, 75, 82, 84, 89, 90, 92, 101, 104, 108, 109, 128, 129, 136, 143, 166, 168, 213, 222, 224, 227, 245, 273, 290, 292, 299, 301, 303, 308, 313, 314, 351, 360, 362, 382, 427, 431, 441, 443, 444, 455, 459, 460, 464, 466, 467, 484, 491, 494, 509, 511, 557, 590, 596, 597, 663, 669, 670, 671, 693, 699, 742, 743, 745, 752, 777, 779, 780, 781]
#tx_train_3 = tx_train_2[:, top_80]
SELECTED = [0, 1, 8, 13, 19, 25, 28, 30, 36, 37, 38, 39, 40, 42, 43, 45, 52, 56, 63, 70, 75, 76, 78, 82, 84, 89, 90, 92, 94, 96, 98, 100, 101, 102, 104, 106, 108, 109, 111, 126, 128, 129, 130, 136, 139, 142, 143, 148, 160, 161, 165, 166, 168, 182, 213, 222, 224, 225, 227, 228, 229, 234, 240, 242, 245, 259, 261, 270, 272, 277, 290, 291, 292, 293, 295, 299, 301, 303, 308, 310, 313, 314, 316, 349, 350, 351, 354, 360, 361, 362, 364, 377, 378, 380, 382, 386, 388, 393, 394, 402, 404, 408, 409, 427, 431, 432, 433, 437, 439, 441, 442, 443, 444, 448, 455, 456, 457, 459, 460, 462, 464, 466, 467, 469, 470, 472, 473, 475, 480, 484, 491, 493, 494, 496, 497, 507, 509, 511, 513, 516, 520, 527, 533, 534, 535, 539, 547, 556, 557, 560, 567, 573, 590, 591, 596, 597, 601, 602, 603, 630, 631, 632, 634, 635, 640, 663, 665, 669, 670, 671, 672, 678, 682, 687, 688, 690, 693, 696, 697, 699, 704, 709, 716, 718, 723, 724, 726, 737, 742, 743, 745, 751, 752, 757, 768, 776, 777, 779, 780, 781]
tx_train_3 = tx_train_2[:, SELECTED]

In [13]:
train_model_3 = lambda y_, x_: make_predictor(reg_logistic_regression_sgd(
    y_, x_, 2.1544346900318868e-11, np.zeros(x_.shape[1]), 20, 1000, 0.5,
)[0])
_ = validation.cross_validation(y_train, tx_train_3, train_model_3, 10, verbose=True)

------ 10-fold cross validation results ------
    Accuracy: avg 0.8346, max 0.83808, min 0.8306, stddev 0.0026548
    Fbeta score: avg 0.75079, max 0.76022, min 0.73228, stddev 0.0070258


In [14]:
tune_lambda(y_train, tx_train_3, list(map(lambda x: 10 ** (-x / 3), range(9, 40))) + [0])

0.001: 0.8330
0.00046415888336127773: 0.8336
0.00021544346900318845: 0.8342
0.0001: 0.8346
4.641588833612782e-05: 0.8349
2.1544346900318823e-05: 0.8351
1e-05: 0.8352


KeyboardInterrupt: ignored

In [11]:
def create_submission(x_train, y_train, x_test, ids, train_model, file_name):
  predict = train_model(y_train, x_train)
  labels_train = predict(x_train)
  train_acc = metrics.accuracy(y_train, labels_train) * 100
  full_file_name = f'{file_name}.csv'
  if input(f'Train accuracy is {train_acc:.3} %. \n\
Do you want to continue and create submission `{full_file_name}`? [y/N]').strip().upper() == 'Y':
    labels = predict(x_test)
    labels = labels * 2 - 1
    data_io.create_csv_submission(ids, labels, full_file_name)

In [None]:
SELECTED = [0, 1, 8, 13, 19, 25, 28, 30, 36, 37, 38, 39, 40, 42, 43, 45, 52, 56, 63, 70, 75, 76, 78, 82, 84, 89, 90, 92, 94, 96, 98, 100, 101, 102, 104, 106, 108, 109, 111, 126, 128, 129, 130, 136, 139, 142, 143, 148, 160, 161, 165, 166, 168, 182, 213, 222, 224, 225, 227, 228, 229, 234, 240, 242, 245, 259, 261, 270, 272, 277, 290, 291, 292, 293, 295, 299, 301, 303, 308, 310, 313, 314, 316, 349, 350, 351, 354, 360, 361, 362, 364, 377, 378, 380, 382, 386, 388, 393, 394, 402, 404, 408, 409, 427, 431, 432, 433, 437, 439, 441, 442, 443, 444, 448, 455, 456, 457, 459, 460, 462, 464, 466, 467, 469, 470, 472, 473, 475, 480, 484, 491, 493, 494, 496, 497, 507, 509, 511, 513, 516, 520, 527, 533, 534, 535, 539, 547, 556, 557, 560, 567, 573, 590, 591, 596, 597, 601, 602, 603, 630, 631, 632, 634, 635, 640, 663, 665, 669, 670, 671, 672, 678, 682, 687, 688, 690, 693, 696, 697, 699, 704, 709, 716, 718, 723, 724, 726, 737, 742, 743, 745, 751, 752, 757, 768, 776, 777, 779, 780, 781]

tx_train_3 = tx_train_2[:, SELECTED]

lambda_, _ = tune_lambda(y_train, tx_train_3, list(map(lambda x: 10 ** (-x / 3), range(9, 40))) + [0])

tx_test = transformation_pipeline_median_selected_pairwise(x_test)
tx_test = tx_test[:, SELECTED]

#lambda_ = 1e-8
train_model = lambda y_, x_: make_predictor(reg_logistic_regression_sgd(
    y_, x_, lambda_, np.zeros(x_.shape[1]), 20, 1000, 0.5,
)[0])

create_submission(
    tx_train_3, y_train, 
    tx_test, 
    ids_test, train_model, '836accSub')

0.001: 0.8330
0.00046415888336127773: 0.8336
0.00021544346900318845: 0.8342
0.0001: 0.8346
4.641588833612782e-05: 0.8349
2.1544346900318823e-05: 0.8351
1e-05: 0.8352
4.641588833612782e-06: 0.8354
2.1544346900318822e-06: 0.8355
1e-06: 0.8357
4.641588833612782e-07: 0.8357
2.1544346900318822e-07: 0.8357
1e-07: 0.8357
4.641588833612782e-08: 0.8358
2.1544346900318822e-08: 0.8358
1e-08: 0.8358
4.641588833612773e-09: 0.8358
2.1544346900318866e-09: 0.8358
1e-09: 0.8358
4.6415888336127727e-10: 0.8358
2.1544346900318867e-10: 0.8358
1e-10: 0.8357
4.641588833612773e-11: 0.8358
2.1544346900318868e-11: 0.8358
1e-11: 0.8358
4.6415888336127725e-12: 0.8358
2.1544346900318868e-12: 0.8358
1e-12: 0.8358
4.641588833612772e-13: 0.8358
2.1544346900318868e-13: 0.8358
1e-13: 0.8358
0: 0.8358
Train accuracy is 83.5 %. 
Do you want to continue and create submission `836accSub.csv`? [y/N]y


NEW IDEA:





In [None]:
def get_top_abs_k_indexes(arr, k):
  indexes = np.abs(arr).argsort()[-k:]
  indexes.sort()
  return indexes

In [None]:
import sys
import numpy
import time
numpy.set_printoptions(threshold=sys.maxsize)

for i in range(20):
  lasso_lambda_ = 10 ** (-i / 3)
  time.sleep(1) # To clear RAM if needed
  lasso_w, _ = lasso_logistic_regression_sgd(y_train, tx_train_2, .1, np.zeros(tx_train_2.shape[1]), 5, 1000, .5)
  for k_subset in [20, 30, 40, 50, 80, 100, 150, 200]:
    time.sleep(1)
    top_k_w = get_top_abs_k_indexes(lasso_w, k_subset)

    features_subset = tx_train_2[:, top_k_w]

    print(f'Lambda {lasso_lambda_}, top_k {k_subset}')
    accs, _ = validation.cross_validation(y_train, features_subset, train_model, 10, verbose=True)
    mean_acc = np.mean(accs)
    with open(f'{DATA_FILE_PREFIX}lasso_data_no_abs.txt', 'a') as f:
      list_top_k = list(top_k_w)
      print(f'lambda {lasso_lambda_}, k {k_subset}: accuracy {mean_acc}, top features {list_top_k}', file=f)

  return 1/(1 + np.exp(-x))


Lambda 1.0, top_k 20
------ 10-fold cross validation results ------
    Accuracy: avg 0.62575, max 0.6362, min 0.61204, stddev 0.0076967
    Fbeta score: avg 0.42027, max 0.54746, min 0.23237, stddev 0.09181
Lambda 1.0, top_k 30
------ 10-fold cross validation results ------
    Accuracy: avg 0.69394, max 0.71152, min 0.68024, stddev 0.011197
    Fbeta score: avg 0.49158, max 0.61369, min 0.27446, stddev 0.12087
Lambda 1.0, top_k 40
------ 10-fold cross validation results ------
    Accuracy: avg 0.71472, max 0.73928, min 0.68548, stddev 0.014337
    Fbeta score: avg 0.56265, max 0.64976, min 0.32803, stddev 0.12231
Lambda 1.0, top_k 50
------ 10-fold cross validation results ------
    Accuracy: avg 0.70006, max 0.72932, min 0.66408, stddev 0.019329
    Fbeta score: avg 0.32408, max 0.62942, min 0.16413, stddev 0.13141
Lambda 1.0, top_k 80
------ 10-fold cross validation results ------
    Accuracy: avg 0.70761, max 0.72736, min 0.68304, stddev 0.011324
    Fbeta score: avg 0.33474, m

  loss = np.log1p(np.exp(-t * (tx @ weights)))


------ 10-fold cross validation results ------
    Accuracy: avg 0.72706, max 0.73628, min 0.722, stddev 0.0043651
    Fbeta score: avg 0.52364, max 0.55306, min 0.48859, stddev 0.023522
Lambda 0.4641588833612779, top_k 40
------ 10-fold cross validation results ------
    Accuracy: avg 0.75232, max 0.76164, min 0.74336, stddev 0.0060446
    Fbeta score: avg 0.60091, max 0.63941, min 0.53468, stddev 0.031121
Lambda 0.4641588833612779, top_k 50
------ 10-fold cross validation results ------
    Accuracy: avg 0.74219, max 0.757, min 0.69876, stddev 0.017514
    Fbeta score: avg 0.5642, max 0.64802, min 0.39913, stddev 0.069109
Lambda 0.4641588833612779, top_k 80
------ 10-fold cross validation results ------
    Accuracy: avg 0.75627, max 0.78444, min 0.73616, stddev 0.017005
    Fbeta score: avg 0.62857, max 0.67248, min 0.5416, stddev 0.039952
Lambda 0.4641588833612779, top_k 100
------ 10-fold cross validation results ------
    Accuracy: avg 0.70895, max 0.74632, min 0.68628, stddev 

KeyboardInterrupt: ignored

In [None]:
import sys
import numpy
import time
numpy.set_printoptions(threshold=sys.maxsize)

for i in range(-3, 30):
  lasso_lambda_ = 10 ** (-i / 5)
  time.sleep(1) # To clear RAM if needed
  lasso_w, _ = lasso_logistic_regression_sgd(y_train, tx_train_2, lasso_lambda_, np.zeros(tx_train_2.shape[1]), 5, 500, .3)
  for k_subset in [20, 30, 40, 50, 80, 100, 150, 200, 250, 300, 350, 400]:
    time.sleep(1)
    top_k_w = get_top_abs_k_indexes(lasso_w, k_subset)

    features_subset = tx_train_2[:, top_k_w]

    print(f'Lambda {lasso_lambda_}, top_k {k_subset}')
    best_ridge_lambda, mean_acc = tune_lambda(
        y_train, features_subset, [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9], history=False)
    print(f'Ridge lambda {best_ridge_lambda}, accuracy {mean_acc}')
    with open(f'{DATA_FILE_PREFIX}lasso_data_no_abs_granular_no_plus.txt', 'a') as f:
      list_top_k = list(top_k_w)
      print(f'lambda {lasso_lambda_}, k {k_subset}: accuracy {mean_acc}, best ridge lambda {best_ridge_lambda}, top features \n{list_top_k}\n\n\n', file=f)

  exp_x = np.exp(x)
  exp_x / (1 + exp_x))
  1 / (1 + np.exp(-x)),
  loss = np.log1p(np.exp(-t * (tx @ weights)))


Lambda 3.9810717055349722, top_k 20
Ridge lambda 1e-06, accuracy 0.724696
Lambda 3.9810717055349722, top_k 30
Ridge lambda 1e-07, accuracy 0.73758
Lambda 3.9810717055349722, top_k 40
Ridge lambda 1e-09, accuracy 0.745796
Lambda 3.9810717055349722, top_k 50
Ridge lambda 1e-09, accuracy 0.770696
Lambda 3.9810717055349722, top_k 80
Ridge lambda 1e-08, accuracy 0.81648
Lambda 3.9810717055349722, top_k 100
Ridge lambda 1e-06, accuracy 0.820852
Lambda 3.9810717055349722, top_k 150
Ridge lambda 1e-09, accuracy 0.8247959999999999
Lambda 3.9810717055349722, top_k 200
Ridge lambda 1e-09, accuracy 0.82854
Lambda 3.9810717055349722, top_k 250
Ridge lambda 1e-09, accuracy 0.830964
Lambda 3.9810717055349722, top_k 300
Ridge lambda 1e-09, accuracy 0.8314280000000001
Lambda 3.9810717055349722, top_k 350
Ridge lambda 1e-09, accuracy 0.831952
Lambda 3.9810717055349722, top_k 400
Ridge lambda 0.0001, accuracy 0.8309159999999999
Lambda 2.51188643150958, top_k 20
Ridge lambda 1e-06, accuracy 0.71654
Lambda

In [None]:
import sys
import numpy
import time
numpy.set_printoptions(threshold=sys.maxsize)

for i in range(23, 32):
  lasso_lambda_ = 10 ** (-i / 5)
  time.sleep(1) # To clear RAM if needed
  lasso_w, _ = lasso_logistic_regression_sgd(y_train, tx_train_2, lasso_lambda_, np.zeros(tx_train_2.shape[1]), 5, 500, .3)
  for k_subset in [20, 30, 40, 50, 80, 100, 150, 200, 250, 300, 350, 400]:
    time.sleep(1)
    top_k_w = get_top_abs_k_indexes(lasso_w, k_subset)

    features_subset = tx_train_2[:, top_k_w]

    print(f'Lambda {lasso_lambda_}, top_k {k_subset}')
    best_ridge_lambda, mean_acc = tune_lambda(
        y_train, features_subset, [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9], history=False)
    print(f'Ridge lambda {best_ridge_lambda}, accuracy {mean_acc}')
    with open(f'{DATA_FILE_PREFIX}lasso_data_no_abs_granular_no_plus.txt', 'a') as f:
      list_top_k = list(top_k_w)
      print(f'lambda {lasso_lambda_}, k {k_subset}: accuracy {mean_acc}, best ridge lambda {best_ridge_lambda}, top features \n{list_top_k}\n\n\n', file=f)

  exp_x = np.exp(x)
  exp_x / (1 + exp_x))


Lambda 2.5118864315095822e-05, top_k 20
Ridge lambda 1e-07, accuracy 0.736272
Lambda 2.5118864315095822e-05, top_k 30
Ridge lambda 1e-07, accuracy 0.762408
Lambda 2.5118864315095822e-05, top_k 40
Ridge lambda 1e-07, accuracy 0.7724200000000001
Lambda 2.5118864315095822e-05, top_k 50
Ridge lambda 1e-09, accuracy 0.778608
Lambda 2.5118864315095822e-05, top_k 80
Ridge lambda 1e-08, accuracy 0.806532
Lambda 2.5118864315095822e-05, top_k 100
Ridge lambda 1e-07, accuracy 0.812176
Lambda 2.5118864315095822e-05, top_k 150
Ridge lambda 1e-09, accuracy 0.8219879999999999
Lambda 2.5118864315095822e-05, top_k 200
Ridge lambda 1e-09, accuracy 0.82584
Lambda 2.5118864315095822e-05, top_k 250
Ridge lambda 1e-09, accuracy 0.8282320000000001
Lambda 2.5118864315095822e-05, top_k 300
Ridge lambda 1e-09, accuracy 0.8296240000000001
Lambda 2.5118864315095822e-05, top_k 350
Ridge lambda 1e-05, accuracy 0.8269719999999999
Lambda 2.5118864315095822e-05, top_k 400
Ridge lambda 1e-09, accuracy 0.82684
Lambda 1.

In [None]:
import sys
import numpy
import time
numpy.set_printoptions(threshold=sys.maxsize)

for i in range(10, 30):
  lasso_lambda_ = 10 ** (-i / 5)
  time.sleep(1) # To clear RAM if needed
  lasso_w, _ = lasso_logistic_regression_sgd(y_train, tx_train_4, lasso_lambda_, np.zeros(tx_train_4.shape[1]), 5, 500, .3)
  for k_subset in [50, 60, 70, 80, 100, 120, 150, 200]:
    time.sleep(1)
    top_k_w = get_top_abs_k_indexes(lasso_w, k_subset)

    features_subset = tx_train_4[:, top_k_w]

    print(f'Lambda {lasso_lambda_}, top_k {k_subset}')
    best_ridge_lambda, mean_acc = tune_lambda(
        y_train, features_subset, [1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9], history=False)
    print(f'Ridge lambda {best_ridge_lambda}, accuracy {mean_acc}')
    with open(f'{DATA_FILE_PREFIX}lasso_data_granular_top_80_sincos_rep.txt', 'a') as f:
      list_top_k = list(top_k_w)
      print(f'lambda {lasso_lambda_}, k {k_subset}: accuracy {mean_acc}, best ridge lambda {best_ridge_lambda}, top features \n{list_top_k}\n\n\n', file=f)

Lambda 0.01, top_k 50
Ridge lambda 1e-09, accuracy 0.82096
Lambda 0.01, top_k 60
Ridge lambda 1e-09, accuracy 0.822724
Lambda 0.01, top_k 70
Ridge lambda 1e-09, accuracy 0.8221440000000001
Lambda 0.01, top_k 80
Ridge lambda 1e-09, accuracy 0.823644
Lambda 0.01, top_k 100
Ridge lambda 1e-09, accuracy 0.821184
Lambda 0.01, top_k 120
Ridge lambda 1e-06, accuracy 0.7657480000000001
Lambda 0.01, top_k 150
Ridge lambda 1e-06, accuracy 0.8041079999999999
Lambda 0.01, top_k 200
Ridge lambda 1e-09, accuracy 0.7760239999999999
Lambda 0.00630957344480193, top_k 50
Ridge lambda 1e-09, accuracy 0.8256799999999999
Lambda 0.00630957344480193, top_k 60
Ridge lambda 1e-09, accuracy 0.827108
Lambda 0.00630957344480193, top_k 70
Ridge lambda 1e-09, accuracy 0.8264039999999999
Lambda 0.00630957344480193, top_k 80
Ridge lambda 1e-07, accuracy 0.825156
Lambda 0.00630957344480193, top_k 100
Ridge lambda 0.0001, accuracy 0.8080359999999999
Lambda 0.00630957344480193, top_k 120
Ridge lambda 1e-08, accuracy 0.7