In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import os
import torch
from torch.utils.data import Dataset, DataLoader

from sklearn.feature_extraction import DictVectorizer
from sklearn.externals import joblib

from util import *

In [2]:
# %load_ext autoreload
# %autoreload 2

In [3]:
use_gpu = torch.cuda.is_available()
print(use_gpu)

False


In [4]:
data_path = '../data'
data_dict = joblib.load(os.path.join(data_path, 'data_dict.pkl'))

In [5]:
# Key = starting state, value = next state
transition_dict_train = dict(zip(data_dict['train']['state_id'], data_dict['train']['next_state_id']))
transition_dict_val = dict(zip(data_dict['val']['state_id'], data_dict['val']['next_state_id']))

In [6]:
config = {
              'state_dim' : data_dict['train']['X'].shape[1],
              'action_dim' : 25,
              'gamma' : 0.9,
              'batch_size' : 512,
              'lr' : 1e-4,
              'num_epochs' : 100,
              'hidden_dim' : 24,
              'num_hidden' : 5,
              'drop_prob' : 0.0,
              'target_update': 10,
              'option' : 'linear',
              'use_scheduler' : False
        }

In [7]:
print(data_dict['train']['X'].shape)
print(data_dict['val']['X'].shape)


(153569, 46)
(19540, 46)


In [8]:
# Create a Dataset
train_dataset = RL_Dataset(data_dict['train']['X'], 
                           data_dict['train']['action'],
                           data_dict['train']['reward'],
                           transition_dict_train)

val_dataset = RL_Dataset(data_dict['val']['X'], 
                           data_dict['val']['action'],
                           data_dict['val']['reward'],
                           transition_dict_val)

In [9]:
# Create a dataloader
train_loader = DataLoader(train_dataset, 
                        config['batch_size'],
                        shuffle = True,
                        num_workers = 32
#                         collate_fn = my_collate
                         )

val_loader = DataLoader(val_dataset, 
                         config['batch_size'],
                         shuffle = True,
                         num_workers = 32
#                         collate_fn = my_collate
                         )

In [10]:
loaders = {'train' : train_loader,
           'val' : val_loader
          }

dset_sizes = {'train' : len(train_dataset),
              'val' : len(val_dataset)
             }


model = dueling_net(D_in = config['state_dim'], 
                    H = config['hidden_dim'], 
                    D_out = config['action_dim'],
                    drop_prob = config['drop_prob'],
                    num_hidden = config['num_hidden'],
                    option = config['option']
                   )

target_model = dueling_net(D_in = config['state_dim'], 
                            H = config['hidden_dim'], 
                            D_out = config['action_dim'],
                            drop_prob = config['drop_prob'],
                            num_hidden = config['num_hidden'],
                            option = config['option']
                          )

optimizer = optim.Adam([{'params': model.parameters()}], 
                        lr = config['lr'])

if config['use_scheduler']:
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', verbose = True)
else:
    scheduler = None

def weights_init(m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform(m.weight.data).float()

model.apply(weights_init)
target_model.apply(weights_init)

if use_gpu:
    model = model.cuda()
    target_model.cuda()

criterion = torch.nn.SmoothL1Loss(size_average = False)

performance_dict, best_model, best_loss, time_elapsed = train_model_double(model = model, 
                                                                            target_model = target_model,
                                                                            loaders = loaders, 
                                                                            dset_sizes = dset_sizes, 
                                                                            config = config, 
                                                                            criterion = criterion,
                                                                            optimizer = optimizer,
                                                                            scheduler = scheduler,
                                                                            use_gpu = use_gpu)
# loss_dict[name] = best_loss

----------
Epoch 0/99
----------
Updating Target Model
train Loss: 1.423674, Best Value: 0.635803, Empirical Value: -0.194977
val Loss: 1.591676, Best Value: 0.607954, Empirical Value: -0.159891
Best Value updated
----------
Epoch 1/99
----------
train Loss: 1.444079, Best Value: 0.804370, Empirical Value: 0.150163
val Loss: 1.328656, Best Value: 0.925831, Empirical Value: 0.350447
Best Value updated
----------
Epoch 2/99
----------
train Loss: 1.339452, Best Value: 0.961703, Empirical Value: 0.447062
val Loss: 1.297089, Best Value: 0.983865, Empirical Value: 0.503225
Best Value updated
----------
Epoch 3/99
----------
train Loss: 1.324837, Best Value: 0.963877, Empirical Value: 0.522545
val Loss: 1.290932, Best Value: 0.935136, Empirical Value: 0.521515
----------
Epoch 4/99
----------
train Loss: 1.320989, Best Value: 0.919744, Empirical Value: 0.527066
val Loss: 1.288822, Best Value: 0.898995, Empirical Value: 0.520884
----------
Epoch 5/99
----------
train Loss: 1.319315, Best Valu

train Loss: 1.294007, Best Value: 1.210035, Empirical Value: 1.074724
val Loss: 1.262998, Best Value: 1.206201, Empirical Value: 1.071410
----------
Epoch 47/99
----------
train Loss: 1.293907, Best Value: 1.210846, Empirical Value: 1.074545
val Loss: 1.263109, Best Value: 1.213298, Empirical Value: 1.079916
Best Value updated
----------
Epoch 48/99
----------
train Loss: 1.293905, Best Value: 1.211601, Empirical Value: 1.075010
val Loss: 1.263042, Best Value: 1.205980, Empirical Value: 1.070533
----------
Epoch 49/99
----------
train Loss: 1.293855, Best Value: 1.210561, Empirical Value: 1.074047
val Loss: 1.262981, Best Value: 1.206815, Empirical Value: 1.071115
----------
Epoch 50/99
----------
Updating Target Model
train Loss: 1.293832, Best Value: 1.211124, Empirical Value: 1.073921
val Loss: 1.267802, Best Value: 1.208561, Empirical Value: 1.072865
----------
Epoch 51/99
----------
train Loss: 1.294606, Best Value: 1.279833, Empirical Value: 1.156834
val Loss: 1.263331, Best Valu

val Loss: 1.264865, Best Value: 1.500114, Empirical Value: 1.388313
Best Value updated
----------
Epoch 92/99
----------
train Loss: 1.295191, Best Value: 1.513744, Empirical Value: 1.400615
val Loss: 1.264747, Best Value: 1.511303, Empirical Value: 1.395649
Best Value updated
----------
Epoch 93/99
----------
train Loss: 1.295154, Best Value: 1.516197, Empirical Value: 1.399927
val Loss: 1.264709, Best Value: 1.502284, Empirical Value: 1.388970
----------
Epoch 94/99
----------
train Loss: 1.295000, Best Value: 1.515946, Empirical Value: 1.400446
val Loss: 1.264646, Best Value: 1.518137, Empirical Value: 1.401708
Best Value updated
----------
Epoch 95/99
----------
train Loss: 1.295175, Best Value: 1.517067, Empirical Value: 1.400177
val Loss: 1.264710, Best Value: 1.507870, Empirical Value: 1.390401
----------
Epoch 96/99
----------
train Loss: 1.294982, Best Value: 1.517962, Empirical Value: 1.399781
val Loss: 1.264658, Best Value: 1.508170, Empirical Value: 1.391200
----------
Epoc