In [1]:
import numpy as np
import torch
import models
import utils
import time

# X, Y = utils.generate_demonstrations(time_len=200, params=np.array([[0.6,-0.1],[0.5,-0.23],[0.4,-0.43],[-0.6,0.1],[-0.5,0.23],[-0.4,0.43]]), title='Training')
# v_X, v_Y = utils.generate_demonstrations(time_len=200, params=np.array([[0.55,-0.155],[0.45,-0.32],[-0.45,0.32],[-0.55,0.155]]), title='Validation')
# np.save('training_X',X)
# np.save('training_Y',Y)
# np.save('validation_X',v_X)
# np.save('validation_Y',v_Y)

X=np.load("training_X.npy")
Y=np.load("training_Y.npy")
X_val=np.load("validation_X.npy")
Y_val=np.load("validation_Y.npy")
obs_max = 5 
d_N = X.shape[0] 
d_x , d_y = (X.shape[-1] , Y.shape[-1])
time_len = X.shape[1] 

def get_train_sample1():
    n = np.random.randint(0,obs_max)+1
    d = np.random.randint(0, d_N)
    
    observations = np.zeros((n,d_x+d_y)) 
    target_X = np.zeros((1,d_x))
    target_Y = np.zeros((1,d_y))
    
    perm = np.random.permutation(time_len)
    observations[:n,:d_x] = X[d,perm[:n]]
    observations[:n,d_x:d_x+d_y] = Y[d,perm[:n]]
    target_X[0] = X[d,perm[n]]
    target_Y[0] = Y[d,perm[n]]
    return torch.from_numpy(observations), torch.from_numpy(target_X), torch.from_numpy(target_Y)

def get_train_sample2(batch_size=1):
    observations = np.zeros((batch_size, obs_max, d_x+d_y))
    observation_mask = np.zeros((batch_size, obs_max))
    target_X = np.zeros((batch_size, 1, d_x))
    target_Y = np.zeros((batch_size, 1, d_y))
    for i in range(batch_size):
        n = np.random.randint(0,obs_max)+1
        d = np.random.randint(0, d_N)
        perm = np.random.permutation(time_len)
        observations[i,:n,:d_x] = X[d,perm[:n]]
        observations[i,:n,d_x:d_x+d_y] = Y[d,perm[:n]]
        observation_mask[i,:n] = 1
        target_X[i,0] = X[d,perm[n]]
        target_Y[i,0] = Y[d,perm[n]]
        
    return  torch.from_numpy(observations), \
            torch.from_numpy(target_X), \
            torch.from_numpy(target_Y), \
            torch.from_numpy(observation_mask)
        

In [4]:
# 1 - ALPER's Implementation WITHOUT batch

cnp1 = models.CNP(
    in_shape=(d_x, d_y),
    hidden_size=32,
    num_hidden_layers=2
).double()
optimizer1 = torch.optim.Adam(cnp1.parameters(), lr=1e-4)
n_train_steps = 10000
t0 = time.time()
for _ in range(n_train_steps):
    obs, target_x, target_y = get_train_sample1()
    optimizer1.zero_grad()
    loss = cnp1.nll_loss(
        observation = obs.unsqueeze(0),
        target = target_x.unsqueeze(0),
        target_truth = target_y.unsqueeze(0)
    )
    loss.backward()
    optimizer1.step()
print("Time taken: ", time.time()-t0)

Time taken:  14.623771667480469


In [3]:
# 2 - ALPER's Implementation WITH batch stuff
# prepare the observation_mask:

cnp2 = models.CNP(
    in_shape=(d_x, d_y),
    hidden_size=32,
    num_hidden_layers=2
).double()
optimizer2 = torch.optim.Adam(cnp2.parameters(), lr=1e-4)
n_train_steps = 10000
t0 = time.time()
for _ in range(n_train_steps):
    obs, target_x, target_y, obs_mask= get_train_sample2()
    optimizer2.zero_grad()
    loss = cnp2.nll_loss(
        observation = obs,
        target = target_x,
        target_truth = target_y,
        observation_mask = obs_mask
    )
    loss.backward()
    optimizer2.step()
print("Time taken: ", time.time()-t0)

Time taken:  13.431654214859009


In [2]:
# 3 - With torch compile
cnp3 = models.CNP(
    in_shape=(d_x, d_y),
    hidden_size=32,
    num_hidden_layers=2
).double()
cnp3 = torch.compile(cnp3)
optimizer3 = torch.optim.Adam(cnp3.parameters(), lr=1e-4)
t0 = time.time()
for _ in range(10000):
    obs, target_x, target_y, obs_mask= get_train_sample2()
    optimizer3.zero_grad()
    loss = cnp3.nll_loss(
        observation = obs,
        target = target_x,
        target_truth = target_y,
        observation_mask = obs_mask
    )
    loss.backward()
    optimizer3.step()
print("Time taken: ", time.time()-t0)

Time taken:  14.726606845855713
