In [1]:
# Load packages
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import os
import scipy as scp
import scipy.stats as scps
from datetime import datetime

# Load my own functions
import dnnregressor_train_eval_keras as dnnk
import make_data_wfpt as mdw

In [2]:
# Load data
data = pd.read_csv(os.getcwd() + '/data_storage/data_11000000_from_simulation_mix_09_12_18_18_20_50.csv')

In [3]:
# Some cleaning of the data
data = data[['v', 'a', 'w', 'rt', 'choice', 'nf_likelihood']]
data = data.loc[data['w'] > 0.1]
data = data.loc[data['w'] < 0.9]
data = data.loc[data['a'] > 0.5]

mini_data = data.loc[1:10000]


train_f, train_l, test_f, test_l = mdw.train_test_split_rt_choice(data = data,
                                                                  write_to_file = False,
                                                                  from_file = False,
                                                                  p_train = 0.8,
                                                                  backend = 'keras')
# Choice probabilities
# train_f, train_l, test_f, test_l = mdw.train_test_from_file_choice_probabilities(n_samples = 2500000,
#                                                             f_signature = '_choice_probabilities_analytic_',
#                                                                                 backend = 'keras')

# rt_choice
# train_f, train_l, test_f, test_l = mdw.train_test_from_file_rt_choice(n_samples = 11000000,
#                                                                       f_signature = '_from_simulation_mix_',
#                                                                       backend = 'keras')

In [9]:
# Make dnnk class (cpm for choice probability model)
cpm = dnnk.dnn_trainer()
cpm.data['train_features'] = train_f
cpm.data['train_labels'] = train_l
cpm.data['test_features'] = test_f
cpm.data['test_labels'] = test_l

In [10]:
# Make all parameters we can specify explicit
# Model parameters
cpm.model_params

{'input_shape': 3,
 'output_shape': 1,
 'output_activation': 'sigmoid',
 'hidden_layers': [20, 20, 20, 20],
 'hidden_activations': ['relu', 'relu', 'relu', 'relu'],
 'l1_activation': [0.0, 0.0, 0.0, 0.0],
 'l2_activation': [0.0, 0.0, 0.0, 0.0],
 'l1_kernel': [0.0, 0.0, 0.0, 0.0],
 'l2_kernel': [0.0, 0.0, 0.0, 0.0],
 'optimizer': 'Nadam',
 'loss': 'mse',
 'metrics': ['mse']}

In [11]:
# Parameters governing training
cpm.train_params

{'callback_funs': ['ReduceLROnPlateau', 'EarlyStopping', 'ModelCheckpoint'],
 'plateau_patience': 10,
 'min_delta': 0.0001,
 'early_stopping_patience': 15,
 'callback_monitor': 'loss',
 'min_learning_rate': 1e-07,
 'red_coef_learning_rate': 0.1,
 'ckpt_period': 10,
 'ckpt_save_best_only': True,
 'ckpt_save_weights_only': True,
 'max_train_epochs': 2000,
 'batch_size': 10000,
 'warm_start': False,
 'checkpoint': 'ckpt',
 'model_cnt': 0}

In [12]:
# Parameters concerning data storage
cpm.data_params

{'data_type': 'choice_probabilities',
 'model_directory': '/home/afengler/git_repos/nn_likelihoods/keras_models',
 'checkpoint': 'ckpt',
 'model_name': 'dnnregressor',
 'data_type_signature': '_choice_probabilities_analytic_',
 'timestamp': '09_22_18_18_56_49',
 'training_data_size': 2500000}

In [13]:
# SPECIFYING META PARAMETERS THAT STAY CONSTANT DURING HYPERPARAMETER OPTIMIZATION

# Model params
cpm.model_params['output_activation'] = 'linear'
cpm.model_params['input_shape'] = 5

# Training params
# Meta
cpm.train_params['early_stopping_patience'] = 5
cpm.train_params['plateau_patience'] = 3
cpm.train_params['min_delta'] = 0.002
cpm.train_params['ckpt_period'] = 1
cpm.train_params['model_cnt'] = 0
cpm.train_params['max_train_epochs'] = 120

# Hyper
#cpm.train_params['l1_kernel']
cpm.model_params['hidden_layers'] = [5, 5, 5, 5]
#cpm.train_params['hidden_activations']
#cpm.train_params['l2_kernel'] = [0.5, 0.5, 0.5, 0.5]
#cpm.train_params['l2_activation'] = [0.5, 0.5, 0.5, 0.5]

# Data params
cpm.data_params['data_type'] = 'wfpt'
cpm.data_params['data_type_signature'] = '_choice_rt_'
cpm.data_params['training_data_size'] = 11000000

# Update timestamp
cpm.data_params['timestamp'] = datetime.now().strftime('%m_%d_%y_%H_%M_%S')

In [14]:
# Make model
# cpm.keras_model_generate(save_model = True)

In [15]:
# Train model
# cpm.run_training(save_history = True, 
#                  warm_start = False)

In [None]:
# Hyperparameter training loop:

# Runs: 
num_runs = 1
cnt = 0
max_layers = 5
layer_sizes = [100, 100]
batch_sizes = [10000]
regularization_sizes = [0.05, 0.1, 0.2]

# Update model directory to make sure we collect all our models from this hyperparameter optimization run in the same place
cpm.data_params['model_directory'] =  '/home/afengler/git_repos/nn_likelihoods/keras_models/'
cpm.data_params['model_name'] = 'dnnregressor_wftp_hyp_opt'
cpm.train_params['model_cnt'] = 0

histories = []

while cnt < num_runs:
    cnt += 1
    
    # Sample # layers 
    num_layers =  4 # np.random.choice(np.arange(4, max_layers + 1, 1))
    
    # Layer sizes
    layers = []
    activations = []
    regularizers_l1 = []
    regularizers_l2 = []
    regularizer = np.random.choice(['none', 'none'])
    regularizer_size = np.random.choice(regularization_sizes)
    
    for i in range(0, num_layers, 1):
        layers.append(np.random.choice(layer_sizes))
        activations.append('relu')
        if regularizer == 'l1':
            regularizers_l1.append(regularizer_size)
            regularizers_l2.append(0.0)
        if regularizer == 'l2':
            regularizers_l1.append(0.0)
            regularizers_l2.append(regularizer_size)
        else:
            regularizers_l1.append(0.0)
            regularizers_l2.append(0.0)
        
    # Batch size
    batch_size = np.random.choice(batch_sizes)
    
    # Update relevant model parameters
    cpm.train_params['batch_size'] = batch_size
    print('batch_size: ', batch_size)
    cpm.model_params['hidden_layers'] = layers
    print('layers: ', layers)
    cpm.model_params['hidden_activations'] = activations
    print('hidden_activations:', activations)
    cpm.model_params['l1_activation'] = regularizers_l1
    print('l1_activatons: ', regularizers_l1)
    cpm.model_params['l2_activation'] = regularizers_l2
    print('l2_activations:', regularizers_l2)
    cpm.model_params['l1_kernel'] = regularizers_l1
    print('l1_kernel: ', regularizers_l1)
    cpm.model_params['l2_kernel'] = regularizers_l2
    print('l2_kernel: ', regularizers_l2)
    
    # Make new timestamp
    #cpm.data_params['timestamp'] = datetime.now().strftime('%m_%d_%y_%H_%M_%S')
    
    # Make model
    cpm.keras_model_generate(save_model = True)
    
    # Train model
    cpm.run_training(save_history = True, 
                     warm_start = False) # Note that this increments model count automatically !
    
#     histories[-1]['model_cnt'] = cpm.train_params['model_cnt']
#     histories[-1]['num_layers'] = num_layers
#     histories[-1]['size_layers'] = str(layers)
#     histories[-1]['activations'] = str(activations) 
#     histories[-1]['batch_size'] = batch_size
    
    print(cnt)
    
# histories = pd.concat(histories)
# histories['optimizer'] = cpm.model_params['optimizer']
# histories['timestamp'] = datetime.now().strftime('%m_%d_%y_%H_%M_%S')
# histories.to_csv(cpm.data_params['model_directory'] + cpm.data_params['model_name'] + '_choice_rt_' +\
#                  cpm.data_params['timestamp'] + '/hyp_opt_histories.csv')

batch_size:  10000
layers:  [100, 100, 100, 100]
hidden_activations: ['relu', 'relu', 'relu', 'relu']
l1_activatons:  [0.0, 0.0, 0.0, 0.0]
l2_activations: [0.0, 0.0, 0.0, 0.0]
l1_kernel:  [0.0, 0.0, 0.0, 0.0]
l2_kernel:  [0.0, 0.0, 0.0, 0.0]
Train on 6070023 samples, validate on 1517984 samples
Epoch 1/120