In [3]:
import os
import sys
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
import nevergrad as ng

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from python.models import TCN_Davies
from python.data import Data, collate_fn
from python.optimizations import (weight_norm, VariationalDropout, 
    VariationalHidDropout, WeightDrop, embedded_dropout)
from python.utils import *

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('Torch version: {}'.format(torch.__version__))
print('Device: %s' % (device))
if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!")

Torch version: 1.1.0
Device: cpu


In [5]:
data = pd.read_csv('../data/data.csv', index_col=0)

In [25]:
np.random.seed(1111)

data = data[data['data_set'] == 1][0:5]

batch_size = 2

train_valid_ratio = 0.9

train_indices = np.sort(np.random.choice(data['idx'], int(len(data)*train_valid_ratio), replace=False))
valid_indices = np.sort(np.array(list(set(np.arange(len(data))) - set(train_indices))))

train_set = Data(data.iloc[train_indices])
valid_set = Data(data.iloc[valid_indices])

train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, 
                         drop_last=True, collate_fn=collate_fn)
valid_loader = DataLoader(dataset=valid_set, batch_size=len(valid_set), collate_fn=collate_fn)

file = open('output.log','w') 

## Objective functions

In [26]:
def validation_loss(nhid, levels, kernel_size, dropout, epochs=500):
    print('call validation_loss(nhid={}, levels={}, kernel_size={}, dropout={})'.format(
        nhid, levels, kernel_size, dropout), file=open('output.log','a'))
    
    valid_loss_vec = []
    clip = 0.2

    model = TCN_Davies(nhid, levels, kernel_size, dropout).to(device)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        
    optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08)
    loss_function = nn.NLLLoss(weight=torch.tensor([1., 70.], dtype=torch.double).to(device))
  
    for epoch in range(epochs): 
        print('Epoch: {:4d}'.format(epoch), end='   ', file=open('output.log','a'))
        
        for i, (feature, label) in enumerate(train_loader):
            model.zero_grad()  # clear out accumulates gradients 
            out = model(feature.to(device))
            loss = loss_function(out, label.to(device))
            loss.backward()    
            if clip > 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()

        with torch.no_grad():
            input, label = next(iter(valid_loader))
            out = model(input.to(device))
            loss = loss_function(out, label.to(device)).item()
            valid_loss_vec.append(loss)
            
            min_idx = np.argmin(valid_loss_vec)
            
        print('Validation Loss: {:.3}'.format(loss), file=open('output.log','a'))
        
        # Early stopping
        if epoch > min_idx + 30:
            print('early stopping', file=open('output.log','a'))
            break       
    
    min_idx = np.argmin(valid_loss_vec)
    min_loss = np.min(valid_loss_vec)

    print('Minimum at {} of {}: {:.3}\n'.format(min_idx, len(valid_loss_vec), min_loss), 
         file=open('output.log','a'))
    file.close()
    return loss

In [31]:
def rate_of_convergence(batch_size, learning_rate, timeout=10):
    print('call rate_of_convergence(batch_size={:d}, learning_rate={:.0e})'.format(
        batch_size, learning_rate), file=open('output.log','a'))
    
    start = int(time.time())
    
    valid_loss_vec = []
    clip = 0.2
     
    nhid = 16
    levels = 10
    kernel_size = 5
    dropout = 0.1

    model = TCN_Davies(nhid, levels, kernel_size, dropout).to(device)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        
    train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, 
                         drop_last=True, collate_fn=collate_fn)    
        
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08)
    loss_function = nn.NLLLoss(weight=torch.tensor([1., 70.], dtype=torch.double).to(device))
  
    for epoch in range(1000): 
        print('Epoch: {:4d}'.format(epoch), end='   ', file=open('output.log','a'))
        
        for i, (feature, label) in enumerate(train_loader):
            model.zero_grad()  # clear out accumulates gradients 
            out = model(feature.to(device))
            loss = loss_function(out, label.to(device))
            loss.backward()    
            if clip > 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()

        with torch.no_grad():
            input, label = next(iter(valid_loader))
            out = model(input.to(device))
            loss = loss_function(out, label.to(device)).item()
            valid_loss_vec.append(loss)
            
            min_idx = np.argmin(valid_loss_vec)
            
        print('Validation Loss: {:.3}'.format(loss), file=open('output.log','a'))
        end = int(time.time())
        
        # timeout
        if (end - start)/60 > timeout:
            break
            
    min_idx = np.argmin(valid_loss_vec)
    min_loss = np.min(valid_loss_vec)

    print('Minimum at {} of {}: {:.3}\n'.format(min_idx, len(valid_loss_vec), min_loss), 
         file=open('output.log','a'))
    file.close()
    return min_loss

In [33]:
# argument transformation
nhid = ng.var.OrderedDiscrete([70, 80, 90, 100])
levels =  ng.var.OrderedDiscrete([6, 7, 8, 9])
kernel_size = ng.var.OrderedDiscrete([4, 5, 6, 7, 8])
dropout = ng.var.OrderedDiscrete([0.0 , 0.05, 0.1, 0.15, 0.2, 0.3])

batch_size =  ng.var.OrderedDiscrete([50, 100, 150, 200])
learning_rate = ng.var.OrderedDiscrete([1e-2, 5e-2, 1e-3, 5e-3])

budget = 10 

instrum = ng.Instrumentation(nhid, levels, kernel_size, dropout)
instrum2 = ng.Instrumentation(batch_size, learning_rate)

optimizer = ng.optimizers.OnePlusOne(instrumentation=instrum2, budget=budget)

recommendation = optimizer.optimize(rate_of_convergence)

In [36]:
print(recommendation)