In [1]:
import numpy as np
import os
import torch
import yaml
import json
import random 
import pandas as pd

from sklearn.externals import joblib
from sklearn.model_selection import ParameterGrid

In [2]:
random.seed(234)
np.random.seed(5432)

In [3]:
data_path = './data/'
features_path = os.path.join(data_path, 'features', str(0))
label_path = os.path.join(data_path, 'labels')
config_path = os.path.join(data_path, 'config', 'grid', 'baseline')
os.makedirs(config_path, exist_ok = True)

In [4]:
features_dict = joblib.load(os.path.join(features_path, 'features.pkl'))
label_dict = joblib.load(os.path.join(label_path, 'label_dict.pkl'))
data_dict = {split: features_dict[split]['features'] for split in features_dict.keys()}

In [5]:
grid_size = 100

In [6]:
param_grid = {
    'input_dim' : [data_dict['train'].shape[1]],
    'lr' : [1e-4, 1e-5, 1e-6],
    'num_epochs' : [30],
    'batch_size' : [64, 128, 256, 512],
    'hidden_dim' : [128, 256, 512],
    'num_hidden' : [1, 2, 3],
    'output_dim' : [2],
    'drop_prob' : [0.0, 0.25, 0.5, 0.75],
    'normalize' : [True, False],
    'iters_per_epoch' : [100],
    'gamma' : [None, 0.99, 0.95],
    'resnet' : [False],
    'sparse' : [True],
    'sparse_mode' : ['binary']
}

the_grid = list(ParameterGrid(param_grid))
np.random.shuffle(the_grid)
the_grid = the_grid[:grid_size]
grid_df = pd.DataFrame(the_grid)
grid_df.to_csv(os.path.join(config_path, 'config.csv'), index_label = 'id')

In [7]:
def yaml_write(x, path):
    with open(path, 'w') as fp:
        yaml.dump(x, fp)
        
def yaml_read(path):
    with open(path, 'r') as fp:
        return yaml.load(fp)

In [8]:
for i, config_dict in enumerate(the_grid):
    yaml_write(config_dict, os.path.join(config_path, '{}.yaml'.format(i)))