In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import os
import torch
import copy
from torch.utils.data import Dataset, DataLoader

from sklearn.feature_extraction import DictVectorizer
from sklearn.externals import joblib
from sklearn.model_selection import ParameterGrid


from functools import reduce
from util import *

In [2]:
# %load_ext autoreload
# %autoreload 2

In [3]:
use_gpu = torch.cuda.is_available()
print(use_gpu)

True


In [4]:
data_path = '../data'
data_dict = joblib.load(os.path.join(data_path, 'data_dict.pkl'))

In [5]:
# Key = starting state, value = next state
transition_dict_train = dict(zip(data_dict['train']['state_id'], data_dict['train']['next_state_id']))
transition_dict_val = dict(zip(data_dict['val']['state_id'], data_dict['val']['next_state_id']))

In [6]:
# Define the tuning grid
param_options = {
                  'state_dim' : [data_dict['train']['X'].shape[1]],
                  'action_dim' : [25],
                  'gamma' : [0.9],
                  'batch_size' : [512],
                  'lr' : [1e-4],
                  'num_epochs' : [300],
                  'hidden_dim' : [128, 256, 512, 1024],
                  'num_hidden' : [1, 2, 3, 5, 10],
                  'drop_prob' : [0.0],
                  'target_update': [10],
                  'option' : ['linear'],
                  'use_scheduler' : [False]
         }

config_grid = ParameterGrid(param_options)

In [8]:
# Train all the models
for config in config_grid:
    
    # Create a Dataset
    train_dataset = RL_Dataset(data_dict['train']['X'], 
                               data_dict['train']['action'],
                               data_dict['train']['reward'],
                               transition_dict_train)

    val_dataset = RL_Dataset(data_dict['val']['X'], 
                               data_dict['val']['action'],
                               data_dict['val']['reward'],
                               transition_dict_val)
    # Create a dataloader
    train_loader = DataLoader(train_dataset, 
                            config['batch_size'],
                            shuffle = True,
                            num_workers = 32
                             )

    val_loader = DataLoader(val_dataset, 
                             config['batch_size'],
                             shuffle = True,
                             num_workers = 32
                             )

    loaders = {'train' : train_loader,
               'val' : val_loader
              }

    dset_sizes = {'train' : len(train_dataset),
                  'val' : len(val_dataset)
                 }
    
    
    
    print(config)
    model = dueling_net(D_in = config['state_dim'], 
                        H = config['hidden_dim'], 
                        D_out = config['action_dim'],
                        drop_prob = config['drop_prob'],
                        num_hidden = config['num_hidden'],
                        option = config['option']
                       )

    target_model = dueling_net(D_in = config['state_dim'], 
                                H = config['hidden_dim'], 
                                D_out = config['action_dim'],
                                drop_prob = config['drop_prob'],
                                num_hidden = config['num_hidden'],
                                option = config['option']
                              )

    optimizer = optim.Adam([{'params': model.parameters()}], 
                            lr = config['lr'])

    if config['use_scheduler']:
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', verbose = True)
    else:
        scheduler = None

    def weights_init(m):
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform(m.weight.data).float()

    model.apply(weights_init)
    target_model.apply(weights_init)

    if use_gpu:
        model = model.cuda()
        target_model.cuda()

    criterion = torch.nn.SmoothL1Loss(size_average = False)

    performance_dict, best_model, best_loss, time_elapsed = train_model_double(model = model, 
                                                                                target_model = target_model,
                                                                                loaders = loaders, 
                                                                                dset_sizes = dset_sizes, 
                                                                                config = config, 
                                                                                criterion = criterion,
                                                                                optimizer = optimizer,
                                                                                scheduler = scheduler,
                                                                                use_gpu = use_gpu)
    
    config_str = reduce(lambda x, y: x + y + '_', [str(key) + '_' + str(value) for key, value in config.items()]) + 'time_'+ str(time.time()).split('.')[1]
    checkpoints_path = os.path.join('../checkpoints', config_str + '.chk')
    torch.save(best_model.state_dict(), checkpoints_path)
    
    performance_df = dict_to_df(performance_dict)
    config_df = pd.DataFrame(config, index = [0])
    config_df['config_str'] = config_str
    performance_df['config_str'] = config_str
    performance_df = performance_df.set_index('config_str').join(config_df.set_index('config_str'))
    performance_df.to_csv(os.path.join('../performance/', 'performance_' + config_str + '.csv'), index = True)

{'action_dim': 25, 'batch_size': 512, 'drop_prob': 0.0, 'gamma': 0.9, 'hidden_dim': 128, 'lr': 0.0001, 'num_epochs': 2, 'num_hidden': 1, 'option': 'linear', 'state_dim': 46, 'target_update': 10, 'use_scheduler': False}
----------
Epoch 0/1
----------
Updating Target Model
train Loss: 1.521419, Best Value: 1.003226, Empirical Value: 0.081081
val Loss: 1.636146, Best Value: 1.021016, Empirical Value: 0.163624
Best Value updated
----------
Epoch 1/1
----------
train Loss: 1.406097, Best Value: 1.380607, Empirical Value: 0.736426
val Loss: 1.312525, Best Value: 1.510454, Empirical Value: 0.952501
Best Value updated
Training complete in 0m 9s
Best value: 1.510454
                                                    phase  epoch  \
config_str                                                         
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train    

Updating Target Model
train Loss: 1.378587, Best Value: 1.199519, Empirical Value: 0.170209
val Loss: 1.662702, Best Value: 1.102701, Empirical Value: 0.172816
Best Value updated
----------
Epoch 1/1
----------
train Loss: 1.396727, Best Value: 1.446371, Empirical Value: 0.787909
val Loss: 1.307042, Best Value: 1.539765, Empirical Value: 0.987616
Best Value updated
Training complete in 0m 9s
Best value: 1.539765
                                                    phase  epoch  \
config_str                                                         
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.

Updating Target Model
train Loss: 1.348166, Best Value: 0.996973, Empirical Value: -0.039449
val Loss: 1.682047, Best Value: 0.907068, Empirical Value: -0.018717
Best Value updated
----------
Epoch 1/1
----------
train Loss: 1.394624, Best Value: 1.291660, Empirical Value: 0.623429
val Loss: 1.308294, Best Value: 1.383344, Empirical Value: 0.801477
Best Value updated
Training complete in 0m 10s
Best value: 1.383344
                                                    phase  epoch  \
config_str                                                         
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob

Updating Target Model
train Loss: 1.331381, Best Value: 1.002914, Empirical Value: 0.082842
val Loss: 1.565792, Best Value: 0.922425, Empirical Value: 0.139243
Best Value updated
----------
Epoch 1/1
----------
train Loss: 1.361032, Best Value: 1.212823, Empirical Value: 0.678904
val Loss: 1.294461, Best Value: 1.277138, Empirical Value: 0.820651
Best Value updated
Training complete in 0m 11s
Best value: 1.277138
                                                    phase  epoch  \
config_str                                                         
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0

Updating Target Model
train Loss: 1.321341, Best Value: 1.008316, Empirical Value: 0.052754
val Loss: 1.574569, Best Value: 0.837199, Empirical Value: 0.048362
Best Value updated
----------
Epoch 1/1
----------
train Loss: 1.352196, Best Value: 1.186072, Empirical Value: 0.619846
val Loss: 1.289743, Best Value: 1.199349, Empirical Value: 0.708820
Best Value updated
Training complete in 0m 15s
Best value: 1.199349
                                                    phase  epoch  \
config_str                                                         
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0

Updating Target Model
train Loss: 1.421361, Best Value: 1.057190, Empirical Value: -0.001302
val Loss: 1.711784, Best Value: 0.990078, Empirical Value: 0.019082
Best Value updated
----------
Epoch 1/1
----------
train Loss: 1.388257, Best Value: 1.416968, Empirical Value: 0.768637
val Loss: 1.312264, Best Value: 1.484228, Empirical Value: 0.922442
Best Value updated
Training complete in 0m 8s
Best value: 1.484228
                                                    phase  epoch  \
config_str                                                         
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0

Updating Target Model
train Loss: 1.347880, Best Value: 1.031291, Empirical Value: 0.090390
val Loss: 1.589062, Best Value: 0.895898, Empirical Value: 0.093743
Best Value updated
----------
Epoch 1/1
----------
train Loss: 1.363681, Best Value: 1.305377, Empirical Value: 0.747411
val Loss: 1.301391, Best Value: 1.304932, Empirical Value: 0.823255
Best Value updated
Training complete in 0m 8s
Best value: 1.304932
                                                    phase  epoch  \
config_str                                                         
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      0   
action_dim_25batch_size_512_drop_prob_0.0_gamma...  train      1   
action_dim_25batch_size_512_drop_prob_0.

Updating Target Model
train Loss: 1.336652, Best Value: 0.988724, Empirical Value: -0.013314


Process Process-932:
Process Process-930:
Process Process-933:
Process Process-931:
Process Process-929:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/stephenpfohl/miniconda3/envs/pytorch-gpu/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/stephenpfohl/miniconda3/envs/pytorch-gpu/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/stephenpfohl/miniconda3/envs/pytorch-gpu/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/stephenpfohl/miniconda3/envs/pytorch-gpu/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/stephenpfohl/miniconda3/envs/pytorch-gpu/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/stephenpfohl/minicond

KeyboardInterrupt: 