In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import os
import torch
import copy
from torch.utils.data import Dataset, DataLoader

from sklearn.feature_extraction import DictVectorizer
from sklearn.externals import joblib
from sklearn.model_selection import ParameterGrid


from functools import reduce
from util import *

In [2]:
# %load_ext autoreload
# %autoreload 2

In [3]:
use_gpu = torch.cuda.is_available()
print(use_gpu)

False


In [4]:
data_path = '../data'
data_dict = joblib.load(os.path.join(data_path, 'data_dict.pkl'))
exp_name = 'exp2'

In [5]:
# Key = starting state, value = next state
transition_dict_train = dict(zip(data_dict['train']['state_id'], data_dict['train']['next_state_id']))
transition_dict_val = dict(zip(data_dict['val']['state_id'], data_dict['val']['next_state_id']))

In [6]:
# Define the tuning grid
param_options = {
                  'state_dim' : [data_dict['train']['X'].shape[1]],
                  'action_dim' : [25],
                  'gamma' : [0.9],
                  'batch_size' : [512],
                  'lr' : [1e-4],
                  'num_epochs' : [300],
                  'hidden_dim' : [128, 256, 512, 1024],
                  'num_hidden' : [1, 2, 3, 5, 10],
                  'drop_prob' : [0.0],
                  'target_update': [10],
                  'option' : ['linear'],
                  'use_scheduler' : [False]
         }

config_grid = ParameterGrid(param_options)

In [7]:
# Train all the models
for config in config_grid:
    
    # Create a Dataset
    train_dataset = RL_Dataset(data_dict['train']['X'], 
                               data_dict['train']['action'],
                               data_dict['train']['reward'],
                               transition_dict_train)

    val_dataset = RL_Dataset(data_dict['val']['X'], 
                               data_dict['val']['action'],
                               data_dict['val']['reward'],
                               transition_dict_val)
    # Create a dataloader
    train_loader = DataLoader(train_dataset, 
                            config['batch_size'],
                            shuffle = True,
                            num_workers = 32
                             )

    val_loader = DataLoader(val_dataset, 
                             config['batch_size'],
                             shuffle = True,
                             num_workers = 32
                             )

    loaders = {'train' : train_loader,
               'val' : val_loader
              }

    dset_sizes = {'train' : len(train_dataset),
                  'val' : len(val_dataset)
                 }
    
    
    
    print(config)
    model = dueling_net(D_in = config['state_dim'], 
                        H = config['hidden_dim'], 
                        D_out = config['action_dim'],
                        drop_prob = config['drop_prob'],
                        num_hidden = config['num_hidden'],
                        option = config['option']
                       )

    target_model = dueling_net(D_in = config['state_dim'], 
                                H = config['hidden_dim'], 
                                D_out = config['action_dim'],
                                drop_prob = config['drop_prob'],
                                num_hidden = config['num_hidden'],
                                option = config['option']
                              )

    optimizer = optim.Adam([{'params': model.parameters()}], 
                            lr = config['lr'])

    if config['use_scheduler']:
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', verbose = True)
    else:
        scheduler = None

    def weights_init(m):
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform(m.weight.data).float()

    model.apply(weights_init)
    target_model.apply(weights_init)

    if use_gpu:
        model = model.cuda()
        target_model.cuda()

    criterion = torch.nn.SmoothL1Loss(size_average = False)

    performance_dict, best_model, best_loss, time_elapsed = train_model_double(model = model, 
                                                                                target_model = target_model,
                                                                                loaders = loaders, 
                                                                                dset_sizes = dset_sizes, 
                                                                                config = config, 
                                                                                criterion = criterion,
                                                                                optimizer = optimizer,
                                                                                scheduler = scheduler,
                                                                                use_gpu = use_gpu)
    
    config_str = reduce(lambda x, y: x + y + '_', [str(key) + '_' + str(value) for key, value in config.items()]) + 'time_'+ str(time.time()).split('.')[1]
    checkpoints_path = os.path.join('../checkpoints/' + exp_name, config_str + '.chk')
    torch.save(best_model.state_dict(), checkpoints_path)
    
    performance_df = dict_to_df(performance_dict)
    config_df = pd.DataFrame(config, index = [0])
    config_df['config_str'] = config_str
    performance_df['config_str'] = config_str
    performance_df = performance_df.set_index('config_str').join(config_df.set_index('config_str'))
    performance_df.to_csv(os.path.join('../performance/' + exp_name, 'performance_' + config_str + '.csv'), index = True)

{'action_dim': 25, 'batch_size': 512, 'drop_prob': 0.0, 'gamma': 0.9, 'hidden_dim': 128, 'lr': 0.0001, 'num_epochs': 2, 'num_hidden': 1, 'option': 'linear', 'state_dim': 46, 'target_update': 10, 'use_scheduler': False}
----------
Epoch 0/1
----------
Updating Target Model
train Loss: 1.481192, Best Value: 1.041985, Empirical Value: 0.039548
val Loss: 1.622102, Best Value: 1.052119, Empirical Value: 0.118492
Best Value updated
----------
Epoch 1/1
----------
train Loss: 1.357917, Best Value: 1.397990, Empirical Value: 0.714122
val Loss: 1.263643, Best Value: 1.531106, Empirical Value: 0.951043
Best Value updated
Training complete in 0m 11s
Best value: 1.531106
{'action_dim': 25, 'batch_size': 512, 'drop_prob': 0.0, 'gamma': 0.9, 'hidden_dim': 128, 'lr': 0.0001, 'num_epochs': 2, 'num_hidden': 2, 'option': 'linear', 'state_dim': 46, 'target_update': 10, 'use_scheduler': False}
----------
Epoch 0/1
----------
Updating Target Model
train Loss: 1.389026, Best Value: 1.162770, Empirical Value

Process Process-286:
Process Process-288:
Process Process-285:
Process Process-283:
Process Process-287:
Process Process-284:
Process Process-279:
Process Process-282:
Process Process-281:
Process Process-280:
Process Process-278:
Process Process-277:
Process Process-276:
Process Process-275:
Process Process-274:
Process Process-273:
Process Process-272:
Process Process-271:
Process Process-270:
Process Process-269:
Process Process-267:
Process Process-268:
Process Process-266:
Process Process-265:
Process Process-264:
Process Process-263:
Process Process-262:
Process Process-261:
Process Process-260:
Process Process-259:
Process Process-257:
Process Process-258:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/s

  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
KeyboardInterrupt
Traceback (most recent call last):
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/queues.py", line

  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
Traceback (most recent call last):
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
KeyboardInterrupt
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/Users/stephenpfohl/miniconda3/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
K

KeyboardInterrupt: 