In [1]:
import learn2learn as l2l
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pickle
import sys


sys.path.insert(1, "..")

from ts_dataset import TSDataset
from base_models import LSTMModel, FCN
from metrics import torch_mae as mae
from pytorchtools import EarlyStopping

dataset_name = "HR"
dataset_name = "POLLUTION"
window_size = 32
window_size = 5
task_size = 50
batch_size = 64
input_dim = 13
input_dim = 14
output_dim = 1
##test
import copy


def to_torch(numpy_tensor):
    
    return torch.tensor(numpy_tensor).float().cuda()


def test2(maml, model_name, test_data_ML, adaptation_steps, learning_rate, with_early_stopping = False, horizon = 10):
    

    total_tasks_test = len(test_data_ML)
    error_list =  []

    learner = maml.clone()  # Creates a clone of model
    learner.cuda()
    accum_error = 0.0
    count = 0

    input_dim = test_data_ML.x.shape[-1]
    window_size = test_data_ML.x.shape[-2]
    output_dim = test_data_ML.y.shape[-1]

    for task in range(0, (total_tasks_test-horizon-1), total_tasks_test//100):

        
        if model_name == "LSTM":
            model2 = LSTMModel( batch_size=None, seq_len = None, input_dim = input_dim, n_layers = 2, hidden_dim = 120, output_dim =1)
        elif model_name == "FCN":
            kernels = [8,5,3] if window_size != 5 else [4,2,1]
            model2 = FCN(time_steps = window_size,  channels=[input_dim, 128, 128, 128] , kernels=kernels)
        
        #model2.cuda()
        #model2.load_state_dict(copy.deepcopy(maml.module.state_dict()))
        #opt2 = optim.Adam(model2.parameters(), lr=learning_rate)
        learner = maml.clone() 

        x_spt, y_spt = test_data_ML[task]
        x_qry = test_data_ML.x[(task+1):(task+1+horizon)].reshape(-1, window_size, input_dim)
        y_qry = test_data_ML.y[(task+1):(task+1+horizon)].reshape(-1, output_dim)
        
        if model_name == "FCN":
            x_qry = np.transpose(x_qry, [0,2,1])
            x_spt = np.transpose(x_spt, [0,2,1])

        x_spt, y_spt = to_torch(x_spt), to_torch(y_spt)
        x_qry = to_torch(x_qry)
        y_qry = to_torch(y_qry)

        early_stopping = EarlyStopping(patience=2, model_file="temp/temp_file.pt", verbose=True)
        
        #learner.module.train()
        #model2.eval()
        for step in range(adaptation_steps):

            #model2.train()
            pred = learner(model(x_spt))
            error = mae(pred, y_spt)

            #opt2.zero_grad()
            #error.backward()
              
            learner.adapt(error)
            #opt2.step()
    
            if with_early_stopping:
                with torch.no_grad():
                    
                    model2.load_state_dict(copy.deepcopy(learner.module.state_dict()))
                    #model2.eval()
                    pred = model2(model(x_qry))
                    error = mae(pred, y_qry)
                early_stopping(error, model2)
                
            if early_stopping.early_stop:
                print("Early stopping")
                break
                
        if with_early_stopping:
            model2.load_state_dict(torch.load("temp/temp_file.pt"))
        #model2.eval()
        #learner.module.eval()
        pred = learner(model(x_qry))
        error = mae(pred, y_qry)
        
        accum_error += error.data
        count += 1
        
    error = accum_error/count
    
    return error   

def test(maml, model_name, test_data_ML, adaptation_steps, learning_rate, with_early_stopping = False, horizon = 10):

    total_tasks_test = len(test_data_ML)
    error_list =  []

    learner = maml.clone()  # Creates a clone of model
    
    accum_error = 0.0
    count = 0

    input_dim = test_data_ML.x.shape[-1]
    window_size = test_data_ML.x.shape[-2]
    output_dim = test_data_ML.y.shape[-1]

    for task in range(0, (total_tasks_test-horizon-1), total_tasks_test//100):

        
        if model_name == "LSTM":
            model2 = LSTMModel( batch_size=None, seq_len = None, input_dim = input_dim, n_layers = 2, hidden_dim = 120, output_dim =1)
        elif model_name == "FCN":
            kernels = [8,5,3] if window_size != 5 else [4,2,1]
            model2 = FCN(time_steps = window_size,  channels=[input_dim, 128, 128, 128] , kernels=kernels)
        
        model2.cuda()
        model2.load_state_dict(copy.deepcopy(maml.module.state_dict()))
        opt2 = optim.SGD(model2.parameters(), lr=learning_rate)


        x_spt, y_spt = test_data_ML[task]
        x_qry = test_data_ML.x[(task+1):(task+1+horizon)].reshape(-1, window_size, input_dim)
        y_qry = test_data_ML.y[(task+1):(task+1+horizon)].reshape(-1, output_dim)
        
        if model_name == "FCN":
            x_qry = np.transpose(x_qry, [0,2,1])
            x_spt = np.transpose(x_spt, [0,2,1])

        x_spt, y_spt = to_torch(x_spt), to_torch(y_spt)
        x_qry = to_torch(x_qry)
        y_qry = to_torch(y_qry)

        early_stopping = EarlyStopping(patience=2, model_file="temp/temp_file.pt", verbose=True)
        
        #model2.eval()
        #model2.train()
        for step in range(adaptation_steps):

            model2.zero_grad()
            
            pred = model2(model(x_spt))
            error = mae(pred, y_spt)

            opt2.zero_grad()
            error.backward()
  
            opt2.step()
    
            if with_early_stopping:
                with torch.no_grad():
                    #model2.eval()
                    pred = model2(model(x_qry))
                    error = mae(pred, y_qry)
                early_stopping(error, model2)
                
            if early_stopping.early_stop:
                print("Early stopping")
                break
                
        if with_early_stopping:
            model2.load_state_dict(torch.load("temp/temp_file.pt"))
        #model2.eval()
        pred = model2(model(x_qry))
        error = mae(pred, y_qry)
        
        accum_error += error.data
        count += 1
        
    error = accum_error/count
    
    return error
    

    
class LSTMModel_MRA(nn.Module):
    
    def __init__(self, batch_size, seq_len, input_dim, n_layers, hidden_dim, output_dim, lin_hidden_dim = 100):
        super(LSTMModel_MRA, self).__init__()

        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, batch_first=True)
        self.linear = nn.Linear(hidden_dim, output_dim)#
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.n_layers = n_layers
        #self.hidden = self.init_hidden()
        self.input_dim = input_dim
        
        self.hidden_to_mean = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.hidden_to_logvar = nn.Linear(self.hidden_dim, self.hidden_dim)
        
    def init_hidden(self):
        # This is what we'll initialise our hidden state as
        return (torch.zeros(self.n_layers, self.batch_size, self.hidden_dim),
                torch.zeros(self.n_layers, self.batch_size, self.hidden_dim))
        
    def forward(self, x):

        _, (hn, _) = self.lstm(x)
        out = hn[-1].view(len(x),-1)
        latent_mean = self.hidden_to_mean(out)
        latent_logvar = self.hidden_to_logvar(out)
        self.kld = -0.5 * torch.mean(1 + latent_logvar - latent_mean.pow(2) - latent_logvar.exp())
        
        if self.training:
            std = torch.exp(0.5 * latent_logvar)
            eps = torch.randn_like(std)
            return eps.mul(std).add_(latent_mean)
        else:
            return latent_mean

    def get_kld(self):
        
        return self.kld



In [2]:
train_data = pickle.load(  open( "../../Data/TRAIN-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )
train_data_ML = pickle.load( open( "../../Data/TRAIN-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-ML.pickle", "rb" ) )
validation_data = pickle.load( open( "../../Data/VAL-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )
validation_data_ML = pickle.load( open( "../../Data/VAL-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-ML.pickle", "rb" ) )
test_data = pickle.load( open( "../../Data/TEST-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )
test_data_ML = pickle.load( open( "../../Data/TEST-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-ML.pickle", "rb" ) )

In [4]:
hidden_dim = 120
model = LSTMModel( batch_size=batch_size, seq_len = window_size, input_dim = input_dim, n_layers = 2, hidden_dim = hidden_dim, output_dim =1)

In [5]:
kernels = [8,5,3] if window_size != 5 else [4,2,1]
encoder = FCN(time_steps = window_size,  channels=[input_dim, 128, 128, 128] , kernels=kernels)

In [6]:
model = LSTMModel_MRA( batch_size=batch_size, seq_len = window_size, input_dim = input_dim, n_layers = 2, hidden_dim = hidden_dim, output_dim =1)

In [7]:
learning_rate = 0.01
meta_learning_rate = 0.0005
model2 = nn.Linear(hidden_dim, 1)
maml = l2l.algorithms.MAML(model2, lr=learning_rate, first_order=False)
opt = optim.Adam(list(maml.parameters()) + list(model.parameters()), lr=meta_learning_rate)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, patience = 5, verbose=True)

In [8]:
total_num_tasks  = len(train_data_ML)
print("Num tasks for trianing:", total_num_tasks)
num_tasks = 20
horizon = 10
num_iterations = 50
adaptation_steps = 1

Num tasks for trianing: 2056


In [10]:
#torch.cuda.empty_cache()
model_name = "LSTM"

In [11]:
#torch.backends.cudnn.enabled = False
model.cuda()
model2.cuda()
num_tasks = total_num_tasks
def to_torch(numpy_tensor):
    
    return torch.tensor(numpy_tensor).float().cuda()

#test( maml, model_name, test_data_ML, adaptation_steps, learning_rate)
num_iterations = 2000
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, patience =10, verbose=True)
for iteration in range(num_iterations):
    
    
    model.train()
    print("Training flag:", model.training)
    opt.zero_grad()
      # Creates a clone of model
    iteration_error = 0.0
    
    print(iteration)
    
    for task in range(num_tasks):
        
        learner = maml.clone()
        task = np.random.randint(0,total_num_tasks-horizon)
        task_qry = np.random.randint(1,horizon+1)
        x_spt, y_spt = train_data_ML[task]
        #x_qry, y_qry = train_data_ML[(task+1):(task+1+horizon)]
        x_qry, y_qry = train_data_ML[task+1]

        x_qry = x_qry.reshape(-1, window_size, input_dim)
        y_qry = y_qry.reshape(-1, output_dim)

        if model_name == "FCN":
            x_qry = np.transpose(x_qry, [0,2,1])
            x_spt = np.transpose(x_spt, [0,2,1])

        x_spt, y_spt = to_torch(x_spt), to_torch(y_spt)
        x_qry = to_torch(x_qry)
        y_qry = to_torch(y_qry)
        
        # Fast adapt
        for step in range(adaptation_steps):
            
            
            pred = learner(model(x_spt))
            error = mae(pred, y_spt)
            learner.adapt(error)#, allow_unused=True)#, allow_nograd=True)
            #print(error.data)


        pred = learner(model(x_qry))
        evaluation_error = mae(pred, y_qry) + model.get_kld()
        #iteration_error += evaluation_error
        evaluation_error.backward()
        
        # Meta-update the model parameters
    
    #iteration_error /= num_tasks
    #iteration_error.backward()#retain_graph = True)
    
    for p in maml.parameters():
        p.grad.data.mul_(1.0 / num_tasks)
    #print("loss iteration:",iteration_error.data)
    opt.step()
   
        
    if(iteration%1 == 0):
        model.eval()
        print("Training flag:", model.training)
        val_error  = test2( maml, model_name, validation_data_ML, adaptation_steps, learning_rate)
        test_error  = test2( maml, model_name, test_data_ML, adaptation_steps, learning_rate)
    print(val_error)
    print(test_error)
    #scheduler.step(val_error)
    #scheduler.step(val_error)

Training flag: True
0
Training flag: False
tensor(0.0327, device='cuda:0')
tensor(0.0450, device='cuda:0')
Training flag: True
1
Training flag: False
tensor(0.0372, device='cuda:0')
tensor(0.0468, device='cuda:0')
Training flag: True
2
Training flag: False
tensor(0.0387, device='cuda:0')
tensor(0.0474, device='cuda:0')
Training flag: True
3
Training flag: False
tensor(0.0378, device='cuda:0')
tensor(0.0468, device='cuda:0')
Training flag: True
4
Training flag: False
tensor(0.0360, device='cuda:0')
tensor(0.0457, device='cuda:0')
Training flag: True
5
Training flag: False
tensor(0.0338, device='cuda:0')
tensor(0.0443, device='cuda:0')
Training flag: True
6
Training flag: False
tensor(0.0322, device='cuda:0')
tensor(0.0434, device='cuda:0')
Training flag: True
7
Training flag: False
tensor(0.0313, device='cuda:0')
tensor(0.0428, device='cuda:0')
Training flag: True
8
Training flag: False
tensor(0.0307, device='cuda:0')
tensor(0.0423, device='cuda:0')
Training flag: True
9
Training flag: 

Training flag: False
tensor(0.0324, device='cuda:0')
tensor(0.0425, device='cuda:0')
Training flag: True
77
Training flag: False
tensor(0.0321, device='cuda:0')
tensor(0.0423, device='cuda:0')
Training flag: True
78
Training flag: False
tensor(0.0317, device='cuda:0')
tensor(0.0421, device='cuda:0')
Training flag: True
79
Training flag: False
tensor(0.0314, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
80
Training flag: False
tensor(0.0315, device='cuda:0')
tensor(0.0420, device='cuda:0')
Training flag: True
81
Training flag: False
tensor(0.0317, device='cuda:0')
tensor(0.0422, device='cuda:0')
Training flag: True
82
Training flag: False
tensor(0.0320, device='cuda:0')
tensor(0.0424, device='cuda:0')
Training flag: True
83
Training flag: False
tensor(0.0322, device='cuda:0')
tensor(0.0426, device='cuda:0')
Training flag: True
84
Training flag: False
tensor(0.0325, device='cuda:0')
tensor(0.0427, device='cuda:0')
Training flag: True
85
Training flag: False
tensor(

Training flag: False
tensor(0.0295, device='cuda:0')
tensor(0.0412, device='cuda:0')
Training flag: True
153
Training flag: False
tensor(0.0294, device='cuda:0')
tensor(0.0411, device='cuda:0')
Training flag: True
154
Training flag: False
tensor(0.0293, device='cuda:0')
tensor(0.0411, device='cuda:0')
Training flag: True
155
Training flag: False
tensor(0.0292, device='cuda:0')
tensor(0.0411, device='cuda:0')
Training flag: True
156
Training flag: False
tensor(0.0291, device='cuda:0')
tensor(0.0411, device='cuda:0')
Training flag: True
157
Training flag: False
tensor(0.0290, device='cuda:0')
tensor(0.0410, device='cuda:0')
Training flag: True
158
Training flag: False
tensor(0.0289, device='cuda:0')
tensor(0.0411, device='cuda:0')
Training flag: True
159
Training flag: False
tensor(0.0289, device='cuda:0')
tensor(0.0411, device='cuda:0')
Training flag: True
160
Training flag: False
tensor(0.0289, device='cuda:0')
tensor(0.0411, device='cuda:0')
Training flag: True
161
Training flag: Fals

tensor(0.0296, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
228
Training flag: False
tensor(0.0296, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
229
Training flag: False
tensor(0.0296, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
230
Training flag: False
tensor(0.0296, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
231
Training flag: False
tensor(0.0296, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
232
Training flag: False
tensor(0.0296, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
233
Training flag: False
tensor(0.0295, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
234
Training flag: False
tensor(0.0295, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
235
Training flag: False
tensor(0.0295, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
236
Training flag: False
tensor(0.0295, devi

Training flag: False
tensor(0.0297, device='cuda:0')
tensor(0.0418, device='cuda:0')
Training flag: True
304
Training flag: False
tensor(0.0297, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
305
Training flag: False
tensor(0.0298, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
306
Training flag: False
tensor(0.0298, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
307
Training flag: False
tensor(0.0297, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
308
Training flag: False
tensor(0.0297, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
309
Training flag: False
tensor(0.0297, device='cuda:0')
tensor(0.0419, device='cuda:0')
Training flag: True
310
Training flag: False
tensor(0.0296, device='cuda:0')
tensor(0.0418, device='cuda:0')
Training flag: True
311
Training flag: False
tensor(0.0296, device='cuda:0')
tensor(0.0418, device='cuda:0')
Training flag: True
312
Training flag: Fals

KeyboardInterrupt: 

In [26]:
val_error  = test2( maml, model_name, validation_data_ML, adaptation_steps, learning_rate)

In [None]:
test( maml, "LSTM", test_data_ML,5, 0.001, with_early_stopping = True)


In [None]:

def step(model, data_iter, len_dataloader, optimizer = None, loss = mae, is_train=False, threshold = False):

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if is_train:
        model.train()
    else:
        model.eval()

    accum_err = 0
    accum_size = 0
    i = 0
    while i < len_dataloader:

        # training model using source data
        data_source = data_iter.next()
        x, y = data_source

        if is_train:
            model.zero_grad()
        
        x = torch.tensor(x).float().to(device)
        y = torch.tensor(y).float().to(device)

        y_pred = model(x) 

        if threshold:
            y_pred = torch.clamp(y_pred, 0, 1)

        err = loss(y, y_pred)

        if is_train:
            err.backward()
            optimizer.step()

        #print(err)
        accum_err +=err*x.shape[0]
        accum_size += x.shape[0]
        i += 1

    return float(accum_err/accum_size)
        


def train(model, train_loader, val_loader, early_stopping, learning_rate = 0.001, epochs = 500, add_weight_decay = False, monitor_stopping = True):

    optimizer = optim.Adam(model.parameters(), lr=learning_rate) if ~add_weight_decay else optim.Adam(model.parameters(), lr=learning_rate, weight_decay = 1e-1)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience = early_stopping.patience//4, verbose=True)
    

    for epoch in range(epochs):

        len_train_loader = len(train_loader)
        train_iter = iter(train_loader)

        len_val_loader = len(val_loader)
        val_iter = iter(val_loader)

        mean_err = step(model, train_iter, len_train_loader, optimizer, is_train=True)

        with torch.no_grad():
            mean_err_val = step(model, val_iter, len_val_loader)

        print ('epoch: %d, \n TRAINING -> mean_err: %f' % (epoch, mean_err))
        print ('epoch: %d, \n VAL -> mean_err: %f' % (epoch, mean_err_val))

        scheduler.step(mean_err_val)

        if monitor_stopping:
            early_stopping(mean_err_val, model)

            if early_stopping.early_stop:
                print("Early stopping")
                break

        print('done')

    return epoch+1
from torch.utils.data import Dataset, DataLoader
from pytorchtools import EarlyStopping

params = {'batch_size': batch_size,
      'shuffle': True,
      'num_workers': 0}

learning_rate = 0.01
epochs = 20
model.cuda()
train_loader = DataLoader(train_data, **params)
val_loader = DataLoader(validation_data, **params)
early_stopping = EarlyStopping(patience=20, model_file="test.pt", verbose=True)
train(model, train_loader, val_loader, early_stopping, learning_rate, epochs) 

In [None]:
val_error(test_data_ML, 5, 0.01)


In [None]:
import copy
model2 = LSTMModel( batch_size=batch_size, seq_len = window_size, input_dim = input_dim, n_layers = 2, hidden_dim = 120, output_dim =1).cuda()
model2.load_state_dict(copy.deepcopy(maml.module.state_dict()))
opt2 = optim.Adam(model.parameters(), lr=0.01)
task=0
x_spt, y_spt = test_data_ML[task]
x_qry, y_qry = test_data_ML[(task+1):(task+1+horizon)]

x_spt, y_spt = to_torch(x_spt), to_torch(y_spt)
x_qry = to_torch(x_qry.reshape(-1, window_size, input_dim))
y_qry = to_torch(y_qry.reshape(-1, output_dim))

for step in range(adaptation_steps):


    pred = model2(x_spt)
    error = mae(pred, y_spt)
    print(error.data)
    opt2.zero_grad()
    error.backward()
    opt2.step()
    
pred = model2(x_qry)
error = mae(pred, y_qry)
print(error.data)

In [2]:
grid = [0., 0.25, 0.5, 0.75]

In [6]:
grid[np.random.randint(0,4)]

0.75