In [1]:
import numpy as np
import pandas as pd
from copy import deepcopy

class DataLoader:
    def __init__(self, train_f="wind_power_data/wind_power_train.pickle", test_f = "wind_power_data/wind_power_test.pickle", n=96, rolling=True, small_subset=False, directory='./'):
        self.trainset = pd.read_pickle(directory+'/'+train_f).values
        self.testset = pd.read_pickle(directory+'/'+test_f).values
        self.m = self.trainset.shape[0]
        self.m_test = self.testset.shape[0]
        self.n = n
        self.rolling = rolling
        self.small_subset = small_subset
        if self.rolling:
            if small_subset:
                self.sample_indices = np.random.choice(list(range(self.n, self.m-self.n+1)), 2000, replace=False)
            else:
                self.sample_indices = np.random.choice(list(range(self.n, self.m-self.n+1)), self.m-2*self.n, replace=False)
        else:
            self.sample_indices = np.random.choice(list(range(self.n, self.m-self.n+1, self.n)), int((self.m-self.n)/self.n), replace=False)
        self.num_samples = self.sample_indices.shape[0]
        self.sample_idx = 0
        self.epoch_end = True

    def sample(self, batch_size=24):

        if self.sample_idx+batch_size >= self.num_samples:
            self.epoch_end = False
            indices = self.sample_indices[self.sample_idx:]
            self.sample_idx = 0
            if self.rolling:
                if self.small_subset:
                    self.sample_indices = np.random.choice(list(range(self.n, self.m-self.n+1)), 2000, replace=False)
                else:
                    self.sample_indices = np.random.choice(list(range(self.n, self.m-self.n+1)), self.m-2*self.n, replace=False)
            else:
                self.sample_indices = np.random.choice(list(range(self.n, self.m-self.n+1, self.n)), int((self.m-self.n)/self.n), replace=False)
        else:
            indices = self.sample_indices[self.sample_idx:self.sample_idx+batch_size]
            self.sample_idx += batch_size

        context = np.vstack([np.reshape(self.trainset[i-self.n:i], [1, self.n]) for i in indices])
        context = context[:, :, None]

        forecast = np.vstack([np.reshape(self.trainset[i:i+self.n], [1, self.n]) for i in indices])

        return context, forecast

    def test_samples(self, num_contexts=15):
        indices = np.random.choice(list(range(self.n, self.m_test-self.n+1, self.n)), num_contexts, replace=False)
        context = np.vstack([np.reshape(self.testset[i-self.n:i], [1, self.n]) for i in indices])
        forecast = np.vstack([np.reshape(self.testset[i:i+self.n], [1, self.n]) for i in indices])

        context = np.reshape(context, [num_contexts, self.n])
        context = context[:, :, None]
        forecast = np.reshape(forecast, [num_contexts, self.n])

        return context, forecast
    
    def test_data(self):
        context = np.vstack([np.reshape(self.testset[i:i+self.n], [1, self.n]) for i in range(self.test.shape[0]-self.n)])
        forecast = np.vstack([np.reshape(self.testset[i:i+self.n], [1, self.n]) for i in range(self.n, self.test.shape[0])])
        return context, forecast
        



In [3]:
import torch
from DataLoader import DataLoader
import numpy as np
import os
from waveglow_model import WaveGlow, WaveGlowLoss
import matplotlib.pyplot as plt
import argparse

# parser = argparse.ArgumentParser(description='argument parser')
# parser.add_argument('--epochs', dest='epochs', type=int, default=100)
# parser.add_argument('--rolling', dest='rolling', type=int, default=1)
# parser.add_argument('--small_subset', dest='small_subset', type=int, default=0)
# parser.add_argument('--use_gpu', dest='use_gpu', type=int, default=1)
# parser.add_argument('--checkpointing', dest='checkpointing', type=int, default=1)
# parser.add_argument('--generate_per_epoch', dest='generate_per_epoch', type=int, default=1)
# parser.add_argument('--generate_final', dest='generate_final', type=int, default=1)
# parser.add_argument('--batch_size', dest='batch_size', type=int, default=12)
# parser.add_argument('--learning_rate', dest='learning_rate', type=float, default=1e-4)
# parser.add_argument('--n_context_channels', dest='n_context_channels', type=int, default=96)
# parser.add_argument('--n_flows', dest='n_flows', type=int, default=6)
# parser.add_argument('--n_group', dest='n_group', type=int, default=24)
# parser.add_argument('--n_early_every', dest='n_early_every', type=int, default=3)
# parser.add_argument('--n_early_size', dest='n_early_size', type=int, default=6)
# parser.add_argument('--n_layers', dest='n_layers', type=int, default=4)
# parser.add_argument('--dilation_list', dest='dilation_list', type=str, default='1 1 2 2')
# parser.add_argument('--n_channels', dest='n_channels', type=int, default=96)
# parser.add_argument('--kernel_size', dest='kernel_size', type=int, default=3)
# args = parser.parse_args()

# args.rolling = True if args.rolling else False
# args.small_subset = True if args.small_subset else False
# args.use_gpu = True if args.use_gpu else False
# args.checkpointing = True if args.checkpointing else False
# args.dilation_list = [int(i) for i in args.dilation_list.split(' ')]

def mse_loss(context, forecast, model, use_gpu, generations_per_sample=20):
    mse_loss = 0.0
    for i in range(generations_per_sample):
        if use_gpu:
            context = torch.cuda.FloatTensor(context)
        else:
            context = torch.FloatTensor(context)

        if use_gpu:
            gen_forecast = model.generate(context)
        else:
            gen_forecast = model.generate(context).cpu()

        mse_loss += np.square(gen_forecast-forecast).mean(axis=1)


    print("Test MSE Loss: %.4f" % mse_loss)
    return mse_loss

def load_checkpoint(checkpoint_path, model, optimizer):
    assert(os.path.isfile(checkpoint_path))
    checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
    iteration = checkpoint_dict['iteration']
    optimizer.load_state_dict(checkpoint_dict['optimizer'])
    model_for_loading = checkpoint_dict['model']
    model.load_state_dict(model_for_loading.state_dict())
    print("Loaded checkpoint '%s' (iteration %d)" % (checkpoint_path, iteration))
    return model, optimizer, iteration


def save_checkpoint(model, optimizer, learning_rate, iteration, filepath, use_gpu=True):
    print("Saving model and optimizer state at iteration %d to %s" % (iteration, filepath))


    model_for_saving = model

    model_for_saving.load_state_dict(model.state_dict())
    torch.save({'model': model_for_saving,
                'iteration': iteration,
                'optimizer': optimizer.state_dict(),
                'learning_rate': learning_rate}, filepath)


# n_context_channels=96, n_flows=6, n_group=24, n_early_every=3, n_early_size=8, n_layers=2, dilation_list=[1,2], n_channels=96, kernel_size=3, use_gpu=True
def training_procedure(dataset=None, num_gpus=0, output_directory='./train', epochs=1000, learning_rate=1e-4, batch_size=12, checkpointing=True, checkpoint_path="./checkpoints", seed=2019, params = [96, 6, 24, 3, 8, 2, [1,2], 96, 3], use_gpu=True, gen_tests=False, mname='model'):
    print("#############")
    params.append(use_gpu)
    torch.manual_seed(seed)
    if use_gpu:
        torch.cuda.manual_seed(seed)

#     if not os.path.isdir(output_directory[2:]): os.mkdir(output_directory[2:])
    if checkpointing and not os.path.isdir(checkpoint_path[2:]): os.mkdir(checkpoint_path[2:])
    criterion = WaveGlowLoss()
    model = WaveGlow(*params)
    if use_gpu:
        model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # iteration = 0
    # if checkpoint_path != "":
        # model, optimizer, iteration = load_checkpoint(checkpoint_path, model, optimizer)

        # iteration += 1


    model.train()
    loss_iteration = []
    for epoch in range(epochs):
        iteration = 0
        print("Epoch: %d/%d" % (epoch+1, epochs))
        avg_loss = []
        while(dataset.epoch_end):
            # model.zero_grad()
            context, forecast = dataset.sample(batch_size)

            if use_gpu:
                forecast = torch.autograd.Variable(torch.cuda.FloatTensor(forecast))
                context = torch.autograd.Variable(torch.cuda.FloatTensor(context))
            else:
                forecast = torch.autograd.Variable(torch.FloatTensor(forecast))
                context = torch.autograd.Variable(torch.FloatTensor(context))

            z, log_s_list, log_det_w_list, early_out_shapes = model(forecast, context)

            loss = criterion((z, log_s_list, log_det_w_list))
            reduced_loss = loss.item()
            loss_iteration.append(reduced_loss)
            optimizer.zero_grad()
            loss.backward()
            avg_loss.append(reduced_loss)
            optimizer.step()
            print("Model waveglow_ncontextchannels-%d_nflows-%d_ngroup-%d-nearlyevery-%d-nearlysize-%d-nlayers-%d_dilations-%s_nchannels_%d-kernelsize-%d-lr-%.5f_seed-%d" % (params[0], params[1], params[2], params[3], params[4], params[5], str(params[6]), params[7], params[8], learning_rate, seed))
            print("On iteration %d with loss %.4f" % (iteration, reduced_loss))
            iteration += 1
            # if (checkpointing and (iteration % iters_per_checkpoint == 0)):
# n_context_channels=96, n_flows=6, n_group=24, n_early_every=3, n_early_size=8, n_layers=2, dilation_list=[1,2], n_channels=96, kernel_size=3, use_gpu=True      epochs=1000, learning_rate=1e-4, batch_size=12, checkpointing=True, checkpoint_path="./checkpoints", seed=2019, params = [96, 6, 24, 3, 8, 2, [1,2], 96, 3], use_gpu=True, gen_tests=True):


        if gen_tests: generate_tests(dataset, model, 5, 96, use_gpu, str(epoch+1))
        epoch_loss = sum(avg_loss)/len(avg_loss)
        if checkpointing:
            checkpoint_path = "%s/%s/epoch-%d_loss-%.4f" % (output_directory, mname, epoch, epoch_loss)
            save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path, use_gpu)

        dataset.epoch_end = True
        
    context, forecast = dataset.test_data()
    z, log_s_list, log_det_w_list, early_out_shapes = model(forecast, context)
    
    
    loss = criterion((z, log_s_list, log_det_w_list))
    test_loss = loss.item()    
    test_mse = mse_loss(test_context, test_forecast, model, use_gpu)
    
    checkpoint_path = "%s/%s/finalmodel_epoch-%d_testloss-%.4f_testmse_%.4f" % (output_directory, mname, epoch, test_loss, test_mse)
    save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path, use_gpu)
    
    print("Test loss for this model is %.5f" % test_loss)
    plt.figure()
    plt.plot(range(len(loss_iteration)), np.log10(np.array(loss_iteration)+1.0))
    plt.xlabel('iteration')
    plt.ylabel('log10 of loss')
    plt.savefig('total_loss_graph.png')
    plt.close()
    return test_loss, model

def generate_tests(dataset, model, num_contexts=15, n=96, use_gpu=True, epoch='final', batch_size=24, output_directory='generated', mname='model'):
    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)
        
    context, forecast = dataset.test_samples(num_contexts=num_contexts)
    if use_gpu:
        context = torch.cuda.FloatTensor(context)
    else:
        context = torch.FloatTensor(context)

    if use_gpu:
        gen_forecast = model.generate(context).cpu()
    else:
        gen_forecast = model.generate(context)

    for i in range(num_contexts, ):
        plt.figure()
        plt.plot(range(n), gen_forecast[i, :], label='generated')
        plt.plot(range(n), forecast[i, :], label='original')
        plt.legend()
        plt.xlabel('time (t)')
        plt.savefig('$s/%s/forecast_generated_%d_epoch-%s.png' % (output_directory, mname, i, epoch))
        plt.close()



def training_generator(config, epochs=100, batch_size=24, seed=2019, generate_per_epoch=True, checkpointing=False, use_gpu=False, n_channels=96, n_context_channels=96, rolling=True, dataset=None):
    # n_context_channels=96, n_flows=6, n_group=24, n_early_every=3, n_early_size=8, n_layers=2, dilation_list=[1,2], n_channels=96, kernel_size=3, use_gpu=True
    # params = [96, 6, 24, 3, 8, 2, [1,2], 96, 3]
    
    multiple = [1, 2]
#     if config["multiple"]==0:
#         multiple = [1, 2]
#     elif config["multiple"]==1:
#         multiple = [2, 4]
    

    dilation_list = [multiple[0]]*int((config["dilation_rate"]*.25 + .25)*config["n_layers"]) + [multiple[1]]*int((1-(config["dilation_rate"]*.25 + .25))*config["n_layers"])

    params = [n_context_channels,
                config["n_flows"],
                config["n_group"],
                config["n_early_every"],
                config["n_early_size"],
                config["n_layers"],
                dilation_list,
                n_channels,
                config["kernel_size"]]
#     dataset = deepcopy(config["dataset"])
    dataset = config["dataset"]
#     if dataset==None:
#         dataset = DataLoader(rolling=rolling, small_subset=False)
    output_directory = './train'
    
    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)
        
    mname = 'waveglow_ncontextchannels-%d_nflows-%d_ngroup-%d-nearlyevery-%d-nearlysize-%d-nlayers-%d_dilations-%s_nchannels_%d-kernelsize-%d-lr-%.5f_seed-%d' % (params[0], params[1], params[2], params[3], params[4], params[5], str(params[6]), params[7], params[8], config["learning_rate"], seed)
    if not os.path.isdir(output_directory+mname):
        os.mkdir(output_directory+'/'+mname)
        
    test_loss, final_model = training_procedure(epochs=epochs, 
                            dataset=dataset, 
                            use_gpu=use_gpu, 
                            checkpointing=checkpointing, 
                            gen_tests=generate_per_epoch, 
                            batch_size=batch_size, 
                            learning_rate=config["learning_rate"],
                            seed=seed,
                            params=params,
                            mname=mname,
                            output_directory=output_directory)
    if generate_final:
        generate_tests(dataset, final_model, use_gpu=use_gpu, mname=mname)
        
    return test_loss
        
        


In [5]:
dataset = DataLoader(rolling=True, small_subset=False)
config = {'n_flows': 6, 'n_group': 24, 'n_early_every': 3, 'n_early_size': 8, 'n_layers': 4, 'dilation_rate': 2, 'multiple': [0, 1], 'kernel_size': 3, 'learning_rate': 0.001, 'dataset': dataset}
training_generator(config=config)

#############
Channels:  24
Channels:  24
Channels:  24
Channels:  16
Channels:  16
Channels:  16
Epoch: 1/100
Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]_nchannels_96-kernelsize-3-lr-0.00100_seed-2019
On iteration 0 with loss 45.5237
Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]_nchannels_96-kernelsize-3-lr-0.00100_seed-2019
On iteration 1 with loss 33.1554
Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]_nchannels_96-kernelsize-3-lr-0.00100_seed-2019
On iteration 2 with loss 19.4754
Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]_nchannels_96-kernelsize-3-lr-0.00100_seed-2019
On iteration 3 with loss 11.4075
Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]

Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]_nchannels_96-kernelsize-3-lr-0.00100_seed-2019
On iteration 42 with loss 1.6423
Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]_nchannels_96-kernelsize-3-lr-0.00100_seed-2019
On iteration 43 with loss 1.4205
Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]_nchannels_96-kernelsize-3-lr-0.00100_seed-2019
On iteration 44 with loss 1.3565
Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]_nchannels_96-kernelsize-3-lr-0.00100_seed-2019
On iteration 45 with loss 1.4141
Model waveglow_ncontextchannels-96_nflows-6_ngroup-24-nearlyevery-3-nearlysize-8-nlayers-4_dilations-[1, 1, 1, 2]_nchannels_96-kernelsize-3-lr-0.00100_seed-2019
On iteration 46 with loss 1.2656
Model waveglow_ncontextchannel

KeyboardInterrupt: 

In [None]:
import ray, os
from ray.tune import run
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.suggest.bayesopt import BayesOptSearch
from hyperopt import hp


    # n_context_channels=96, n_flows=6, n_group=24, 
    #n_early_every=3, n_early_size=8, 
    #n_layers=2, dilation_list=[1,2], n_channels=96, 
    #kernel_size=3, use_gpu=True
    # params = [96, 6, 24, 3, 8, 2, [1,2], 96, 3]

dataset = DataLoader(rolling=True, small_subset=False)
space = {'n_flows': hp.choice('n_flows', np.arange(6, 7, dtype=int)),
        'n_group': hp.choice('n_group', np.arange(24, 25, dtype=int)),
#         'n_early_every': hp.choice('n_early_every', np.arange(1, 3, dtype=int)),
         'n_early_every': hp.choice('n_early_every', np.arange(3, 4, dtype=int)),
        'n_early_size': hp.choice('n_early_size', np.arange(2, 8, dtype=int)),
        'n_layers': hp.choice('n_layers', np.arange(20, 24, 4, dtype=int)),
#         'dilation_rate': hp.choice('dilation_rate', [0, 1, 2, 3]),
         'dilation_rate': hp.choice('dilation_rate', [0, 1]),
        'multiple': hp.choice('multiple', [0, 1]),
        'kernel_size': hp.choice('kernel_size', [1, 3]),
        'learning_rate': hp.choice('learning_rate', np.arange(1, 20, dtype=int)),
        'dataset': dataset}

algo = HyperOptSearch(space, 
                      max_concurrent=1, 
                      mode="min")

# scheduler = AsyncHyperBandScheduler(metric="test_loss", mode="min")
analysis = run(training_generator, search_alg=algo, num_samples=1)

2019-11-28 12:18:05,888	INFO resource_spec.py:205 -- Starting Ray with 4.93 GiB memory available for workers and up to 2.48 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/4.93 GiB heap, 0.0/1.71 GiB objects
Memory usage on this node: 7.8/16.0 GiB

