In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import os
import torch
from torch.utils.data import Dataset, DataLoader

from sklearn.feature_extraction import DictVectorizer
from sklearn.externals import joblib

from util import *

In [2]:
# %load_ext autoreload
# %autoreload 2

In [3]:
use_gpu = torch.cuda.is_available()
print(use_gpu)

False


In [4]:
data_path = '../data'
data_dict = joblib.load(os.path.join(data_path, 'data_dict.pkl'))

In [5]:
# Key = starting state, value = next state
transition_dict_train = dict(zip(data_dict['train']['state_id'], data_dict['train']['next_state_id']))
transition_dict_val = dict(zip(data_dict['val']['state_id'], data_dict['val']['next_state_id']))

In [6]:
config = {
              'state_dim' : data_dict['train']['X'].shape[1],
              'action_dim' : 25,
              'gamma' : 0.9,
              'batch_size' : 512,
              'lr' : 1e-4,
              'num_epochs' : 100,
              'hidden_dim' : 24,
              'num_hidden' : 5,
              'drop_prob' : 0.0,
              'target_update': 10,
              'option' : 'linear',
              'use_scheduler' : False
        }

In [7]:
data_dict['train']['X'].shape

(153569, 46)

In [8]:
# Create a Dataset
train_dataset = RL_Dataset(data_dict['train']['X'], 
                           data_dict['train']['action'],
                           data_dict['train']['reward'],
                           transition_dict_train)

val_dataset = RL_Dataset(data_dict['val']['X'], 
                           data_dict['val']['action'],
                           data_dict['val']['reward'],
                           transition_dict_val)

In [None]:
# Create a dataloader
train_loader = DataLoader(train_dataset, 
                        config['batch_size'],
                        shuffle = True,
                        num_workers = 32
#                         collate_fn = my_collate
                         )

val_loader = DataLoader(val_dataset, 
                         config['batch_size'],
                         shuffle = True,
                         num_workers = 32
#                         collate_fn = my_collate
                         )

In [None]:
loaders = {'train' : train_loader,
           'val' : val_loader
          }

dset_sizes = {'train' : len(train_dataset),
              'val' : len(val_dataset)
             }


model = dueling_net(D_in = config['state_dim'], 
                    H = config['hidden_dim'], 
                    D_out = config['action_dim'],
                    drop_prob = config['drop_prob'],
                    num_hidden = config['num_hidden'],
                    option = config['option']
                   )

target_model = dueling_net(D_in = config['state_dim'], 
                            H = config['hidden_dim'], 
                            D_out = config['action_dim'],
                            drop_prob = config['drop_prob'],
                            num_hidden = config['num_hidden'],
                            option = config['option']
                          )

optimizer = optim.Adam([{'params': model.parameters()}], 
                        lr = config['lr'])

if config['use_scheduler']:
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', verbose = True)
else:
    scheduler = None

def weights_init(m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform(m.weight.data).float()

model.apply(weights_init)
target_model.apply(weights_init)

if use_gpu:
    model = model.cuda()
    target_model.cuda()

criterion = torch.nn.SmoothL1Loss(size_average = False)

performance_dict, best_model, best_loss, time_elapsed = train_model_double(model = model, 
                                                                            target_model = target_model,
                                                                            loaders = loaders, 
                                                                            dset_sizes = dset_sizes, 
                                                                            config = config, 
                                                                            criterion = criterion,
                                                                            optimizer = optimizer,
                                                                            scheduler = scheduler,
                                                                            use_gpu = use_gpu)
# loss_dict[name] = best_loss

----------
Epoch 0/99
----------
Updating Target Model
train Loss: 1.410401, Best Value: 0.820800, Empirical Value: 0.066326
val Loss: 1.569660, Best Value: 0.792363, Empirical Value: 0.086020
Best Value updated
----------
Epoch 1/99
----------
train Loss: 1.419766, Best Value: 0.968574, Empirical Value: 0.380415
val Loss: 1.353072, Best Value: 1.068829, Empirical Value: 0.570541
Best Value updated
----------
Epoch 2/99
----------
train Loss: 1.341967, Best Value: 1.101373, Empirical Value: 0.648538
val Loss: 1.335076, Best Value: 1.096483, Empirical Value: 0.686933
Best Value updated
----------
Epoch 3/99
----------
train Loss: 1.333891, Best Value: 1.092328, Empirical Value: 0.700453
val Loss: 1.330344, Best Value: 1.061778, Empirical Value: 0.691975
----------
Epoch 4/99
----------
train Loss: 1.331324, Best Value: 1.055440, Empirical Value: 0.695545
val Loss: 1.330550, Best Value: 0.977940, Empirical Value: 0.657962
----------
Epoch 5/99
----------
train Loss: 1.330095, Best Value:

train Loss: 1.301164, Best Value: 1.518359, Empirical Value: 1.348870
val Loss: 1.300374, Best Value: 1.502448, Empirical Value: 1.333195
----------
Epoch 46/99
----------
train Loss: 1.301140, Best Value: 1.518078, Empirical Value: 1.348939
val Loss: 1.300259, Best Value: 1.509144, Empirical Value: 1.339249
----------
Epoch 47/99
----------
train Loss: 1.301095, Best Value: 1.518449, Empirical Value: 1.348129
val Loss: 1.300324, Best Value: 1.504757, Empirical Value: 1.338632
----------
Epoch 48/99
----------
train Loss: 1.300928, Best Value: 1.518932, Empirical Value: 1.349358
val Loss: 1.300203, Best Value: 1.507633, Empirical Value: 1.338697
----------
Epoch 49/99
----------
train Loss: 1.300999, Best Value: 1.517788, Empirical Value: 1.347596
val Loss: 1.300104, Best Value: 1.505857, Empirical Value: 1.336965
----------
Epoch 50/99
----------
Updating Target Model
train Loss: 1.300968, Best Value: 1.519144, Empirical Value: 1.349097
val Loss: 1.304703, Best Value: 1.502587, Empiri