# Pointer Networks

In [None]:
import sys
sys.path.insert(0, '../')

import math, operator
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import pickle

import os, time, copy
import json
import models.train_predict_utils as ut
%load_ext autoreload
%autoreload 2

In [None]:
from torch.utils.tensorboard import SummaryWriter
%load_ext tensorboard

In [None]:
from models.batch_env_rl import BatchEnvRL

from models.neural_net import Agent
from models.run_episode import RunEpisode

from models.features_utils import ScalerGlob, DynamicFeatures
from generator.op.generator_utils import get_generated_seeds

In [None]:
import torch, torch.nn as nn
import torch.autograd as autograd
from torch.distributions import Categorical
from torch.utils.checkpoint import checkpoint

from torch import optim
from torch import dot
import torch.nn.functional as F

In [None]:
print(torch.__version__)

In [None]:
# for reproducibility"
random_seed = 25029
np.random.seed(random_seed)

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)

## Train

In [None]:
def trainEpochs(run_episode, ds, model_opt, scheduler, args, run_vr ='0'):
    
    val_ranges = dict()
    val_ranges[20] = [1, 250]
    val_ranges[50] = [251, 500]
    val_ranges[100] = [501, 750]
    val_ranges[200] = [751, 1000]
    
    # Keep track of time elapsed and running averages
    start = time.time()
     
    reward_total = 0 
    tloss_total = 0
    rwds_total = 0
    pen_total = 0
    train_hist = []
    
    gen_seeds = get_generated_seeds()
    #to consider only up to seed 4000 for all n_nodes
    gen_seeds[20] = np.array([s for s in gen_seeds.get(20) if s<=4000]) 
    
    writer = SummaryWriter()
    with tqdm(range(args.epochs), leave=False, desc='1th loop') as tepoch:
        
        step = 0
        for epoch in tepoch:
            avreward, tloss, avg_rwds, avg_pen = ut.train_model(run_episode, ds.data_scaler, model_opt, scheduler,
                                                                         args, gen_seeds)
            
            reward_total += avreward
            tloss_total += tloss
            rwds_total += avg_rwds
            pen_total += avg_pen
            step +=1    
            if (epoch+1) % args.nprint == 0:
                taverage_loss = tloss_total / step
                avreward_total = reward_total / step
                avg_rwds_total = rwds_total / step
                avg_pen_total = pen_total / step

                print('epoch: {}, Av. loss: {:.3f}, Av. final reward: {:.3f}'.format(str(epoch+1), taverage_loss, avreward_total))
                print('epoch: {}, Av. rwd: {:.3f}, Av. pen: {:.3f}'.format(str(epoch+1), avg_rwds_total, avg_pen_total))
                tepoch.set_postfix(loss=taverage_loss, reward=avg_rwds_total, penalty=avg_pen_total, final=avreward_total)
                time.sleep(0.1)
                
                tloss_total = 0
                reward_total = 0
                rwds_total = 0
                pen_total = 0
                step = 0
  
                step_dict = {}
                step_dict['epoch'] = epoch+1
                step_dict['tr_rwd'] = avg_rwds_total
                step_dict['tr_pen'] = avg_pen_total
                step_dict['tr_loss'] = taverage_loss
                writer.add_scalar(f'tr_rwd', avg_rwds_total, epoch)
                writer.add_scalar(f'tr_pen', avg_pen_total, epoch)
                writer.add_scalar(f'tr_total', avg_rwds_total+avg_pen_total, epoch)
                writer.add_scalar(f'tr_loss', taverage_loss, epoch)
                
                file_path = '{path}/train_hist_{agent_name}_noise_{noise}_{notebook_name}_r{run_vr}.csv'.format(path=args.save_hist_dir, 
                                               agent_name=args.agent_name,
                                               noise=str(int(args.noise_on)),
                                               notebook_name=args.nb_name,
                                               run_vr=run_vr)
                train_hist_df = pd.DataFrame(train_hist)
                train_hist_df.to_csv(file_path, index=False)
                
                av_rws_total = 0
                av_pens_total = 0
                for n_nodes_val in [20, 50, 100, 200]:
                    av_rwds, av_pens = ut.run_validation(run_episode, 
                                                         val_ranges.get(n_nodes_val)[0],
                                                         val_ranges.get(n_nodes_val)[1], 
                                                         ds, args, which_set='test')
                    print (f'validation {n_nodes_val} nodes - reward: {av_rwds:.2f}, penalty: {av_pens:.2f}, final: {(av_rwds+av_pens):.2f}')

                    writer.add_scalar(f'rwds_val_{n_nodes_val}', av_rwds, epoch+1)
                    writer.add_scalar(f'pens_val_{n_nodes_val}', av_pens, epoch+1)
                    writer.add_scalar(f'total_val_{n_nodes_val}', av_rwds+av_pens, epoch+1)
                    
                    step_dict[f'val_rwd_{n_nodes_val}'] = av_rwds
                    step_dict[f'val_pen_{n_nodes_val}'] = av_pens
                    av_rws_total += av_rwds
                    av_pens_total += av_pens
                    
                writer.add_scalar(f'rwds_val_all', av_rws_total/4, epoch+1)
                writer.add_scalar(f'pens_val_all', av_pens_total/4, epoch+1)
                writer.add_scalar(f'total_val_all', av_rws_total/4 + av_pens_total/4, epoch+1)
                print(' ')
                
                train_hist.append(step_dict)

            if (epoch+1) % args.nsave == 0:
                file_path = '{path}/model_{agent_name}_noise_{noise}_{notebook_name}_epoch_{epoch}_r{run_vr}.pkl'\
                                            .format(path=args.save_weights_dir, 
                                                   agent_name=args.agent_name,
                                                   noise=str(int(args.noise_on)),
                                                   notebook_name=args.nb_name,
                                                   epoch=epoch+1,
                                                   run_vr=run_vr)
                
                
                torch.save({
                        'epoch': epoch+1,
                        'model_state_dict': run_episode.state_dict(),
                        'optimizer_state_dict': model_opt.state_dict()}, file_path)
        writer.close()
    return pd.DataFrame(train_hist)

## Set up

In [None]:
# ====================================================
# Config
# ====================================================
class args:
    save_weights_dir = '../weights'
    save_hist_dir = '../training_hist'
    save_sub = '../submissions'
    epochs = 15000
    n_nodes_list = range(10, 210)
    save_with_tr = True
    nb_name = 'nb7p0p8p3rg'
    agent_name = 'agent001'
    nsave = 1000
    ndfeatures = 34
    lr = 1e-4
    min_lr = 1e-5
    batch_size = 32
    weight_decay = 1e-5
    max_grad_norm = 2
    beta = 0.0 # for moving Av
    gamma = 0.01 # for entropy 
    # Model parameters
    rnn_hidden = 256  # dimension of decoder 
    encoder_dim = 256
    pre_lnorm = False
    has_glimpse = False
    use_lookahead = True
    dropout = 0.1
    n_layers = 3
    n_heads = 8
    ff_dim = 512
    use_cuda = True
    device = torch.device("cuda:0" if use_cuda else "cpu")
    use_checkpoint = True
    nprint = 250
    n_sims = 6
    accumulation_steps = n_sims 
    from_file = True
    noise_on = True
    feature_list = ['x_coordinate',
                    'y_coordinate',
                    'tw_low',
                    'tw_high',
                    'prize',
                    'tmax',
                    'tw_delta',
                    'prize_tw_delta_ratio',
                    'tw_high_tmax_delta',
                    'tw_low_tmax_delta',
                    'prize_max_return_time_ratio']

    nfeatures = len(feature_list)

## RunEpisode testing

In [None]:
model = Agent(args.nfeatures, args.ndfeatures, args.rnn_hidden, args).to(args.device)

In [None]:
run_episode = RunEpisode(model, args, DynamicFeatures, args.use_lookahead)

In [None]:
ds = ScalerGlob()

In [None]:
model_opt = optim.AdamW(run_episode.parameters(), lr=args.lr)
scheduler = optim.lr_scheduler.CosineAnnealingLR(model_opt, T_max= args.epochs, 
                                                        eta_min=args.min_lr)

In [None]:
train_hist = trainEpochs(run_episode, ds, model_opt, scheduler, args)

In [None]:
checkpoint = torch.load('{path}/model_{agent_name}_noise_{noise}_{notebook_name}_epoch_{epoch}_r0.pkl'.format(path=args.save_weights_dir, 
                                   agent_name=args.agent_name,
                                   noise=str(int(args.noise_on)),
                                   notebook_name=args.nb_name,
                                   epoch=15000))
run_episode.load_state_dict(checkpoint['model_state_dict'])
model_opt.load_state_dict(checkpoint['optimizer_state_dict'])

In [None]:
av_rwds, av_pens = ut.run_validation(run_episode, 1, 1000, ds, args, which_set='test')
print(av_rwds+av_pens, av_rwds, av_pens)

In [None]:
av_rwds, av_pens = ut.create_submission(run_episode, ds, args, n_tours=100, with_as=False, which_set='test')
print(av_rwds+av_pens, av_rwds, av_pens)