In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import os
import torch
from torch.utils.data import Dataset, DataLoader

from sklearn.feature_extraction import DictVectorizer
from sklearn.externals import joblib

from util import *

In [2]:
# %load_ext autoreload
# %autoreload 2

In [3]:
use_gpu = torch.cuda.is_available()
print(use_gpu)

False


In [4]:
data_path = '../data'
data_dict = joblib.load(os.path.join(data_path, 'data_dict.pkl'))

In [6]:
# Key = starting state, value = next state
transition_dict = dict(zip(data_dict['train']['state_id'], data_dict['train']['next_state_id']))

In [7]:
config = {
              'state_dim' : data_dict['train']['X'].shape[1],
#               'embed_dim' : 32,
              'action_dim' : 25,
              'gamma' : 0.9,
              'batch_size' : 64,
              'lr' : 1e-4,
              'num_epochs' : 30,
              'hidden_dim' : 128,
              'num_hidden' : 1,
              'drop_prob' : 0.0,
              'target_update': 10,
              'option' : 'linear',
              'use_scheduler' : False
        }

In [9]:
train_dataset = RL_Dataset(data_dict['train']['X'], 
                           data_dict['train']['action'],
                           data_dict['train']['reward'],
                           transition_dict)

In [11]:
# Create a dataloader
train_loader = DataLoader(train_dataset, 
                        config['batch_size'],
                        shuffle = True,
                        num_workers = 32
#                         collate_fn = my_collate
                         )

In [12]:
loaders = {'train' : train_loader}
dset_sizes = {'train' : len(train_dataset)}

model = dueling_net(D_in = config['state_dim'], 
                    H = config['hidden_dim'], 
                    D_out = config['action_dim'],
                    drop_prob = config['drop_prob'],
                    num_hidden = config['num_hidden'],
                    option = config['option']
                   )

target_model = dueling_net(D_in = config['state_dim'], 
                            H = config['hidden_dim'], 
                            D_out = config['action_dim'],
                            drop_prob = config['drop_prob'],
                            num_hidden = config['num_hidden'],
                            option = config['option']
                          )

optimizer = optim.Adam([{'params': model.parameters()}], 
                        lr = config['lr'])

if config['use_scheduler']:
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', verbose = True)
else:
    scheduler = None

def weights_init(m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform(m.weight.data).float()

model.apply(weights_init)
target_model.apply(weights_init)

if use_gpu:
    model = model.cuda()
    target_model.cuda()

criterion = torch.nn.SmoothL1Loss(size_average = False)

performance_dict, best_model, best_loss, time_elapsed = train_model_double(model = model, 
                                                                            target_model = target_model,
                                                                            loaders = loaders, 
                                                                            dset_sizes = dset_sizes, 
                                                                            config = config, 
                                                                            criterion = criterion,
                                                                            optimizer = optimizer,
                                                                            scheduler = scheduler,
                                                                            use_gpu = use_gpu)
# loss_dict[name] = best_loss

----------
Epoch 0/29
----------
Updating Target Model
train Loss: 1.407744, Value: 0.028392
Best Value updated
----------
Epoch 1/29
----------
train Loss: 1.349242, Value: 0.818628
Best Value updated
----------
Epoch 2/29
----------
train Loss: 1.337873, Value: 0.827665
Best Value updated
----------
Epoch 3/29
----------
train Loss: 1.336673, Value: 0.796755
----------
Epoch 4/29
----------
train Loss: 1.336560, Value: 0.767895
----------
Epoch 5/29
----------
train Loss: 1.336960, Value: 0.740834
----------
Epoch 6/29
----------
train Loss: 1.336101, Value: 0.724048
----------
Epoch 7/29
----------
train Loss: 1.335390, Value: 0.706925
----------
Epoch 8/29
----------
train Loss: 1.335299, Value: 0.689504
----------
Epoch 9/29
----------
train Loss: 1.334525, Value: 0.680598
----------
Epoch 10/29
----------
Updating Target Model
train Loss: 1.333473, Value: 0.670580
----------
Epoch 11/29
----------
train Loss: 1.343720, Value: 1.191778
Best Value updated
----------
Epoch 12/29
---