In [1]:
%load_ext autoreload
%reload_ext autoreload
%autoreload 2

In [2]:
from gym_swimmer import SwimmerEnv
import torch
import numpy as np
from torch import nn
import math
from models import *
from core import generate_default_model_name
Env = SwimmerEnv
env = Env()

In [3]:
BATCH = 64
N_EPOCH = 12000
n_candidates = 100
bthreshold=1e-2

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

bnn = DMLP(state_dim=Env.state_dim, action_dim=Env.action_dim, mode='straight')
bnn.to(device)
bnn.train()

lnn = DMLP(state_dim=Env.state_dim+Env.goal_dim, action_dim=Env.action_dim, mode='sum')
lnn.to(device)
lnn.train()

boptimizer = torch.optim.Adam(bnn.parameters(), lr=1e-4, weight_decay=1e-8)
bscheduler = torch.optim.lr_scheduler.ExponentialLR(boptimizer, gamma=0.996)

loptimizer = torch.optim.Adam(lnn.parameters(), lr=1e-4, weight_decay=1e-8)
lscheduler = torch.optim.lr_scheduler.ExponentialLR(loptimizer, gamma=0.996)

In [9]:
def sample_action(nn, o, tensor_a, max_iter=30, mode='max', threshold=-1e-2):
    '''
        Laypunov: min
        Barrier: max
    '''
    # size of a: (num_agents, n_candidates, action_dim)
    
    if len(o.shape)==2:
        o = o.unsqueeze(1)
    assert len(tensor_a.shape)==3
    n_candidate = tensor_a.shape[1]
    
    nn.eval()
    
    vec = nn.get_vec(o).detach()
    vec = vec.repeat((1, n_candidate, 1))    
    
    tensor_a.requires_grad = True
    aoptimizer = torch.optim.Adam([tensor_a], lr=1)

    iter_ = 0
    while iter_ < max_iter:
        value = nn.get_field(vec, tensor_a)
        if mode=='max':
            cvalue = (-value+threshold).relu()
        else:
            cvalue = (value-threshold).relu()
        if torch.min(cvalue, dim=-1)[0].sum()==0:
            break
        aoptimizer.zero_grad()
        cvalue.sum().backward()
        torch.nn.utils.clip_grad_value_([tensor_a], 1e-2)
        aoptimizer.step()
        with torch.no_grad():
            tensor_a[:] = tensor_a.clamp(-1, 1)
        iter_ += 1
        
    value = nn.get_field(vec, tensor_a)
    if mode=='max':
        cvalue = (-value+threshold).relu()
    else:
        cvalue = (value-threshold).relu()    
    
    finalv = torch.zeros_like(value[:, 0])
    finala = torch.zeros_like(tensor_a[:, 0, :])
    valid = torch.min(cvalue, dim=-1)[0]==0
    if mode=='max':
        if (~valid).sum()!=0:
            finalv[~valid] = torch.max(value[~valid], dim=-1)[0]
            finala[~valid] = tensor_a[~valid, torch.max(value[~valid], dim=-1)[1]]
        if (valid).sum()!=0:
            tvalue = value.clone()
            tvalue[cvalue!=0] = float('inf')
            finalv[valid] = torch.min(tvalue[valid], dim=-1)[0]
            finala[valid] = tensor_a[valid, torch.min(tvalue[valid], dim=-1)[1]]
    else:
        if (~valid).sum()!=0:
            finalv[~valid] = torch.min(value[~valid], dim=-1)[0]
            finala[~valid] = tensor_a[~valid, torch.min(value[~valid], dim=-1)[1]]
        if (valid).sum()!=0:
            tvalue = value.clone()
            tvalue[cvalue!=0] = float('-inf')
            finalv[valid] = torch.max(tvalue[valid], dim=-1)[0] 
            finala[valid] = tensor_a[valid, torch.max(tvalue[valid], dim=-1)[1]]
    
    nn.train()
    
    return tensor_a, value, finalv, finala

In [10]:
def train_barrier(bnn, optimizer, buf, pbar, lamda=0.1, n_iter=10):
    bnn.train()
    buf.concat_goal = False
    
    # Set up function for computing value loss
    def compute_loss(bnn, data, next_data):
        value = bnn(**data)
        next_value = bnn(**next_data)
        
        bloss1 = ((1e-2-value).relu())*data['prev_free']*data['next_free'] / (1e-9 + (data['next_free']).sum())
        bloss2 = ((1e-2+value).relu())*(data['prev_danger']+data['next_danger']) / (1e-9 + (data['prev_danger']+data['next_danger']).sum())
        bloss = bloss1.sum() + bloss2.sum()
        
        can_generate = 0  # (-dvalue-1e-2).relu()-(-dvalue-1e-2).relu().detach()
        deriv = next_value-value+0.1*value
        dloss = ((-deriv+1e-2+can_generate).relu())*data['prev_free']*data['next_free']*next_data['next_free']
        dloss = dloss.sum() / (1e-9 + (data['prev_free']*data['next_free']*next_data['next_free']).sum())

        return bloss, dloss
    
    # imitation learning
    for i in range(n_iter):
        loader, next_loader = buf.get()
        for j, data_pair in enumerate(zip(loader, next_loader)):
            data, next_data = data_pair 
            optimizer.zero_grad()
            bloss, dloss = compute_loss(bnn, data, next_data)
            loss = bloss + dloss
            loss.backward()            
            optimizer.step() 
            with torch.no_grad():
                bvalue = bnn(**data)
                b_mean = bvalue.mean()
            desc = "bloss %.6f, dloss %.6f, bmean %.6f" % (bloss, dloss, b_mean)
            pbar.set_description(desc)
            optimizer.zero_grad()    
    
    return desc

In [11]:
def train_lyapunov(lnn, optimizer, buf, pbar, lamda=0.1, n_iter=10, n_candidates=100):
    lnn.train()
    buf.concat_goal = True
    
    # Set up function for computing value loss
    def compute_loss(lnn, data):
        value = lnn(**data).detach()
        next_o = data['next_x']
        
        a = torch.rand(len(next_o), n_candidates, data['action'].shape[-1]).to(device).uniform_(-1, 1)
        _, _, _, finala = sample_action(lnn, next_o, a, max_iter=0, mode='min', threshold=(value-1e-2).unsqueeze(1))
        
        value = lnn(**data)
        next_value = lnn(**next_data)
        next_value_neg = lnn(x=next_o, action=finala)        
        goal_loss = ((value**2)*data['next_goal']).sum() / (1e-9 + data['next_goal'].sum()) + \
                    ((next_value**2)*next_data['next_goal']).sum() / (1e-9 + next_data['next_goal'].sum())
        
        deriv = next_value-value
        dloss = ((deriv+1e-2).relu())
        dloss = dloss.mean()
        
        deriv = next_value_neg-value
        contrastloss = ((-deriv-1e-2).relu())
        contrastloss = contrastloss.mean()        
        
        return goal_loss, dloss, contrastloss
    
    # imitation learning
    for i in range(n_iter):
        loader, next_loader = buf.get()
        for j, data, next_data in enumerate(zip(loader, next_loader)):
            optimizer.zero_grad()
            goal_loss, dloss, contrastloss = compute_loss(lnn, data, next_data)
            loss = goal_loss + dloss + contrastloss
            loss.backward()            
            optimizer.step() 
            desc = "goal_loss %.6f, dloss %.6f, closs %.6f" % (goal_loss, dloss, closs)
            pbar.set_description(desc)
            optimizer.zero_grad()    
    
    return desc

In [12]:
# create replay buffer
import scipy
from random import shuffle
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate
from collections import defaultdict


class DotDict(dict):
    """
    a dictionary that supports dot notation 
    as well as dictionary access notation 
    usage: d = DotDict() or d = DotDict({'val1':'first'})
    set attributes: d.val2 = 'second' or d['val2'] = 'second'
    get attributes: d.val2 or d['val2']
    """
    __getattr__ = dict.__getitem__
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

    def __init__(self, dct):
        for key, value in dct.items():
            if hasattr(value, 'keys'):
                value = DotDict(value)
            self[key] = value
            
    def to(self, device):
        for key, value in self.items():
            self[key] = value.to(device)


class GlobalReplayBuffer:
    """
    A buffer for storing trajectories experienced by a PPO agent interacting
    with the environment, and using Generalized Advantage Estimation (GAE-Lambda)
    for calculating the advantages of state-action pairs.
    """

    def __init__(self, size, batch=64):
        self.obs_buf = []  
        self.batch = batch
        self.ptr = 0
        self.max_size = size        
        
    def store(self, **kwargs):
        """
        Append one timestep of agent-environment interaction to the buffer.
        """
#         assert self.ptr < self.max_size     # buffer has to have room so you can store
        
        obs = DotDict({})
        for key, value in kwargs.items():
            obs[key] = torch.as_tensor(value, dtype=torch.float)
        self.obs_buf.append(obs)
        self.ptr += 1

    def get(self):
        """
        Call this at the end of an epoch to get all of the data from
        the buffer, with advantages appropriately normalized (shifted to have
        mean zero and std one). Also, resets some pointers in the buffer.
        """
        # collate_fn = lambda x: {x_.to(device) if print(x, x_) else 0 for x_ in default_collate(x)}
        
        def collate_fn(data):
            """
               data: is a list of tuples with (example, label, length)
                     where 'example' is a tensor of arbitrary shape
                     and label/length are scalars
            """
            data = default_collate(data)
            for k, v in data.items():
                data[k] = v.to(device)
            if self.concat_goal:
                data['x'] = torch.cat((data['x'], data['goal']), dim=-1)
                data['next_x'] = torch.cat((data['next_x'], data['goal']), dim=-1)
            return data
        
        loader = DataLoader(self.obs_buf, shuffle=True, batch_size=self.batch, collate_fn=collate_fn)
        
        return loader
    
    def relabel_l(self):
        if lbuf.obs_buf[-1]['next_goal']==1:
            return
        
        # choose a future state
        chosen_idx = np.random.randint(1, len(self.obs_buf))
        obs = self.obs_buf[chosen_idx]
        new_goal = obs['x'].data.cpu().numpy()[:len(obs['goal'])]
        for idx, obs, next_obs in zip(range(len(self.obs_buf)-1), self.obs_buf[:-1], self.obs_buf[1:]):
            obs['goal'] = torch.as_tensor(new_goal, dtype=torch.float)
            obs['next_goal'] = ((next_obs['x'][:len(new_goal)]-obs['goal']).norm() < 0.1)
            if obs['next_goal']:
                chosen_idx = idx + 1
                
        self.obs_buf = self.obs_buf[:chosen_idx]
    
    def relabel_b(self):
        # TODO
        pass
    
    
class GatherReplayBuffer:
    
    def __init__(self, batch=64, concat_goal=False):
        self.buffers = []
        self.batch = batch
        self.concat_goal = concat_goal
        
    def append(self, buffer):
        self.buffers.append(buffer)
        
    def get(self):
        prev_o = []
        prev_o.extend([o for b in self.buffers for o in b.obs_buf[:-1]])
        next_o = []
        next_o.extend([o for b in self.buffers for o in b.obs_buf[1:]])
        
        # collate_fn = lambda x: {x_.to(device) if print(x, x_) else 0 for x_ in default_collate(x)}
        
        def collate_fn(data):
            """
               data: is a list of tuples with (example, label, length)
                     where 'example' is a tensor of arbitrary shape
                     and label/length are scalars
            """
            data = default_collate(data)
            for k, v in data.items():
                data[k] = v.to(device)
            if self.concat_goal:
                data['x'] = torch.cat((data['x'], data['goal']), dim=-1)
                data['next_x'] = torch.cat((data['next_x'], data['goal']), dim=-1)
            return data
            
        l = list(zip(prev_o, next_o))
        shuffle(l)
        
        loader = DataLoader([_[0] for _ in l], shuffle=False, batch_size=self.batch, collate_fn=collate_fn)
        next_loader = DataLoader([_[1] for _ in l], shuffle=False, batch_size=self.batch, collate_fn=collate_fn)
        
        return loader, next_loader        

In [98]:
%debug

> [0;32m<ipython-input-86-b8530f2dbbda>[0m(44)[0;36msample_action[0;34m()[0m
[0;32m     42 [0;31m        [0mcvalue[0m [0;34m=[0m [0;34m([0m[0mvalue[0m[0;34m-[0m[0mthreshold[0m[0;34m)[0m[0;34m.[0m[0mrelu[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     43 [0;31m[0;34m[0m[0m
[0m[0;32m---> 44 [0;31m    [0mfinalv[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0mzeros_like[0m[0;34m([0m[0mvalue[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m [0;36m0[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     45 [0;31m    [0mfinala[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0mzeros_like[0m[0;34m([0m[0mtensor_a[0m[0;34m[[0m[0;34m:[0m[0;34m,[0m [0;36m0[0m[0;34m,[0m [0;34m:[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     46 [0;31m    [0mvalid[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0mmin[0m[0;34m([0m[0mcvalue[0m[0;34m,[0m [0mdim[0m[0;34m=[0m[0;34m-[0m[0;36m1[0m[0;34m)[0m[0;34

ipdb>  exit9)


*** SyntaxError: unmatched ')'


ipdb>  exit()


# Train Function

In [13]:
def iter_action(bnn, lnn, o_b, o_l, a, bthreshold=-1e-2, lthreshold=-1e-2, max_iter=30):
    # size of a: (num_agents, n_candidates, action_dim)
    
    a = a.reshape((-1, a.shape[-1]))
    n_candidate = a.shape[0]
    
    bnn.eval()
    lnn.eval()
    
    input_b = {k: v.to(device) for k, v in o_b.items()}
    vecb = bnn.get_vec(**(input_b)).detach()
    vecb = vecb.reshape(1, -1).repeat((n_candidate, 1))
    
    input_l = {k: v.to(device) for k, v in o_l.items()}
    vecl = lnn.get_vec(x=torch.cat((input_l['x'], input_l['goal']), dim=-1)).detach()
    vecl = vecl.reshape(1, -1).repeat((n_candidate, 1))    
    
    tensor_a = torch.FloatTensor(a).to(device)
    tensor_a.requires_grad = True
    aoptimizer = torch.optim.Adam([tensor_a], lr=1)

    iter_ = 0
    while iter_ < max_iter:
        bvalue = bnn.get_field(vecb, tensor_a)
        lvalue = lnn.get_field(vecl, tensor_a)
        cvalue = (-bvalue+bthreshold).relu()+(lvalue-lthreshold).relu()
        if torch.min(cvalue)==0:
            break
        aoptimizer.zero_grad()
        cvalue.sum().backward()
        torch.nn.utils.clip_grad_value_([tensor_a], 1e-2)
        aoptimizer.step()
        with torch.no_grad():
            tensor_a[:] = tensor_a.clamp(-1, 1)
        iter_ += 1

    bvalue = bnn.get_field(vecb, tensor_a)
    lvalue = lnn.get_field(vecl, tensor_a)
    cvalue = (-bvalue+bthreshold).relu()+(lvalue-lthreshold).relu()
    return tensor_a.data.cpu().numpy(), bvalue.data.cpu().numpy(), lvalue.data.cpu().numpy(), cvalue.data.cpu().numpy()

def choose_action(cvalue):
    if np.any(cvalue == 0):
        idx = np.arange(len(cvalue))[cvalue == 0]
        idx = np.random.choice(idx, 1)[0]
    else:
        idx = np.argmin(cvalue)
    return idx

In [161]:
from tqdm import tqdm
import gc
from copy import deepcopy

# def is_counter_d(o, next_o, free, next_free, barrier, v_cur, v_next):
#     counter_mse = np.abs(v_next - v_cur - barrier) > 1e-2
#     return counter_mse

# def is_counter_b(o, next_o, free, danger, barrier, v_cur, v_next):
#     not_free = next_free.astype(float)<free.astype(float)
#     counter_free = np.logical_and(free, v_cur > -1e-1)
#     counter_obs = np.logical_and(danger, v_cur < 1e-1)
#     counter_barrier = np.logical_and(v_next - v_cur > -0.1 * v_cur, free)
#     return np.logical_or(np.logical_or(counter_free, counter_obs), counter_barrier)
    

max_episode_length     = Env.max_episode_steps
EXPERIENCE_BUFFER_SIZE = Env.max_episode_steps

name_dict = generate_default_model_name(Env)
BMODEL_PATH = name_dict['db'].replace('dbgnn', 'dbnn')
LMODEL_PATH = name_dict['dl'].replace('dlgnn', 'dlnn')
LOG_FILE_L = 'cam_'+Env.__name__+'_l.txt'
LOG_FILE_B = 'cam_'+Env.__name__+'_b.txt'
open(LOG_FILE_L, 'w+').close()
open(LOG_FILE_B, 'w+').close()
bbuf = GlobalReplayBuffer(EXPERIENCE_BUFFER_SIZE)
lbuf = GlobalReplayBuffer(EXPERIENCE_BUFFER_SIZE)
env = Env()
env.reset(); lthreshold=-1e3
o = env._get_obs()

pbar = tqdm(range(N_EPOCH))
for epoch_i in pbar:
    
    total_trans = 0
    unsafe_rate = 0
#     buf.max_size += EXPERIENCE_BUFFER_SIZE
    # Main loop: collect experience in env and update/log each epoch
    while(total_trans<EXPERIENCE_BUFFER_SIZE):

        o = env._get_obs()
        
        a_all = np.random.uniform(-1., 1., size=(n_candidates, env.action_dim))
        o_l = o_b = {'x': torch.FloatTensor(o), 'goal': torch.FloatTensor(env.goal)}
        a_refine, bvalue, lvalue, cvalue = iter_action(bnn, lnn, o_b, o_l, a_all, max_iter=min(epoch_i//100, 30), lthreshold=lthreshold, bthreshold=bthreshold)
        idx = choose_action(cvalue)
        a, bvalue, lvalue, cvalue = a_refine[idx, :], bvalue[idx], lvalue[idx], cvalue[idx]
        lthreshold = lvalue - 1e-2
        # a = np.random.uniform(-1, 1, size=(Env.action_dim,))
        
        next_o, r, d, info = env.step(a)
        nowbuf.store(**info)

        if d:
            nowbuf.relabel_l()
            allbuf.append(nowbuf)
            env.reset(); lthreshold=-1e3; nowbuf = GlobalReplayBuffer(1024); 

    unsafe_rate = unsafe_rate / total_trans
    
    descb = train_barrier(bnn, boptimizer, bbuf, pbar=pbar, n_iter=10)
    descl = train_lyapunov(lnn, loptimizer, lbuf, pbar=pbar, n_iter=10) 
    
#     if (epoch_i % 10 == 0) and (epoch_i != 0) and (epoch_i < 6000):
#         bscheduler.step()
#         lscheduler.step()
    
    with open(LOG_FILE_L, 'a+') as f:
        f.write(descl+'\t'+str(pbar.last_print_n)+'\n')
    with open(LOG_FILE_B, 'a+') as f:
        f.write(descb+'\t'+str(pbar.last_print_n)+'\t'+'unsafe rate: '+str(unsafe_rate)+'\n')     
    
    torch.save(bnn.state_dict(), BMODEL_PATH)
    torch.save(lnn.state_dict(), LMODEL_PATH)

goal_loss 0.000000, dloss 0.000001:   0%|          | 13/12000 [00:17<4:28:06,  1.34s/it]            


KeyboardInterrupt: 

In [20]:
device

device(type='cuda', index=0)

In [38]:
%debug

> [0;32m/home/rainorangelemon/anaconda3/envs/gnn/lib/python3.8/site-packages/torch/nn/functional.py[0m(1753)[0;36mlinear[0;34m()[0m
[0;32m   1751 [0;31m    [0;32mif[0m [0mhas_torch_function_variadic[0m[0;34m([0m[0minput[0m[0;34m,[0m [0mweight[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   1752 [0;31m        [0;32mreturn[0m [0mhandle_torch_function[0m[0;34m([0m[0mlinear[0m[0;34m,[0m [0;34m([0m[0minput[0m[0;34m,[0m [0mweight[0m[0;34m)[0m[0;34m,[0m [0minput[0m[0;34m,[0m [0mweight[0m[0;34m,[0m [0mbias[0m[0;34m=[0m[0mbias[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 1753 [0;31m    [0;32mreturn[0m [0mtorch[0m[0;34m.[0m[0m_C[0m[0;34m.[0m[0m_nn[0m[0;34m.[0m[0mlinear[0m[0;34m([0m[0minput[0m[0;34m,[0m [0mweight[0m[0;34m,[0m [0mbias[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   1754 [0;31m[0;34m[0m[0m
[0m[0;32m   1755 [0;31m[0;34m[0m[0m
[0m


ipdb>  exit()


# Inference

In [164]:
from gym_swimmer import SwimmerEnv
from stable_baselines3 import PPO
from tqdm import tqdm 

env = SwimmerEnv()
model = PPO.load("swimmer/best_model.zip")

env = Env()
imgs = [env.sim.render(600, 300)]

num_tot = 0
num_goaled = 0 
num_collision = 0

obs = env.reset(); lthreshold=-1e3
ts = 0
while True:
    ts += 1 
    
    o = env._get_obs()
    a_oracle = model.predict(o)[0]
    a_all = np.random.uniform(-1., 1., size=(1000, env.action_dim))
    a_all[0,:] = a_oracle
    
    o_l = o_b = {'x': torch.FloatTensor(o), 'goal': torch.FloatTensor(env.goal)}
    a_refine, bvalue, lvalue, cvalue = iter_action(bnn, lnn, o_b, o_l, a_all, max_iter=0, lthreshold=lthreshold, bthreshold=bthreshold)
    idx = choose_action(cvalue)
    
    idx = 0
    
    ac, bvalue, lvalue, cvalue = a_refine[idx, :], bvalue[idx], lvalue[idx], cvalue[idx]
    lthreshold = lvalue - 1e-2
    
    print(ac, lvalue, bvalue, cvalue)
    obs, rw, done, _ = env.step(ac)
    if env.sim.data.ncon!=0:
        print('collision')
    imgs.append(env.sim.render(600, 300))
    if done:
        break

[-1.          0.18508455] 0.06435737 0.07643822 1000.06433
[-1. -1.] 0.08304545 0.09719533 0.028688084
[1. 1.] 0.022714553 0.10902397 0.0
[1. 1.] 0.021035004 0.043871645 0.008320451
[-1. -1.] 0.07122941 0.06251663 0.06019441
[-0.5870377 -1.       ] 0.081835255 0.023417275 0.020605844
[1. 1.] 0.0066363253 0.13620774 0.0
[0.2722642 1.       ] 0.011613596 0.0020998418 0.022877429
[-1. -1.] 0.070142314 0.025615757 0.06852872
[ 0.28634393 -0.27431247] 0.038593262 -0.021201644 0.031201644
[0.51681185 1.        ] 0.0052824663 0.010600628 0.0
[-1. -1.] 0.051720716 0.036437448 0.05643825
[ 0.32304662 -0.7526628 ] 0.03467583 -0.010475455 0.020475455
[0.64764667 1.        ] 0.004692968 -0.014261765 0.024261765
[-1. -1.] 0.046387747 0.04511833 0.05169478
[ 0.5528521 -0.4121422] 0.020557651 0.008372053 0.0016279472
[-0.06452352  1.        ] 0.0117154475 0.061053164 0.001157796
[-0.3520448 -1.       ] 0.02656737 0.07142775 0.024851922
[1.         0.25921556] 0.010344019 0.05065412 0.0
[-0.84635264  

In [165]:
from PIL import Image
ims = [Image.fromarray(np.flip(a_frame, axis=0)) for a_frame in imgs]
ims[0].save("cam.gif", save_all=True, append_images=ims[1:], duration=20)

In [23]:
%debug

> [0;32m<ipython-input-20-b0ab67fbcaba>[0m(49)[0;36msample_action[0;34m()[0m
[0;32m     47 [0;31m    [0mvalid[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0mmin[0m[0;34m([0m[0mcvalue[0m[0;34m,[0m [0mdim[0m[0;34m=[0m[0;34m-[0m[0;36m1[0m[0;34m)[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m==[0m[0;36m0[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     48 [0;31m    [0;32mif[0m [0mmode[0m[0;34m==[0m[0;34m'max'[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 49 [0;31m        [0mfinalv[0m[0;34m[[0m[0;34m~[0m[0mvalid[0m[0;34m][0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0mmax[0m[0;34m([0m[0mvalue[0m[0;34m[[0m[0;34m~[0m[0mvalid[0m[0;34m][0m[0;34m,[0m [0mdim[0m[0;34m=[0m[0;34m-[0m[0;36m1[0m[0;34m)[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     50 [0;31m        [0mfinala[0m[0;34m[[0m[0;34m~[0m[0mvalid[0m[0;34m][0m [0;34m=[0m [0mtensor_a[0m[0;34m[[0m[0;34m~[0m[0mvalid[0

ipdb>  valid


tensor([True], device='cuda:0')


ipdb>  cvalue


tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

ipdb>  cvalue.shape


torch.Size([1, 1000])


ipdb>  torch.min(cvalue, dim=-1)[0]==0


tensor([True], device='cuda:0')


ipdb>  value


tensor([[ 9.7262e-03,  5.0118e-03, -8.4718e-03,  9.9360e-03,  1.3209e-02,
          8.2699e-03,  4.2516e-03,  1.6563e-02,  7.1831e-03,  1.3738e-03,
         -5.8962e-03, -6.5514e-03,  1.4476e-02, -2.3273e-03, -1.6408e-03,
          1.1450e-03,  1.0558e-03,  1.3862e-02, -5.8483e-03, -4.7138e-03,
         -7.7967e-03, -6.1636e-04,  9.5766e-03,  5.8739e-03, -8.6961e-03,
          2.2245e-03, -7.9326e-03, -4.1428e-03,  1.1588e-02,  6.9545e-03,
         -4.8973e-03, -2.9604e-03,  7.7139e-03,  3.7160e-03,  8.0969e-03,
          1.1821e-02, -7.1511e-03, -9.0692e-04, -6.1387e-03,  5.6423e-03,
          5.0686e-03,  1.0692e-02,  9.8625e-03, -8.3699e-03,  7.5419e-03,
         -5.5405e-03,  7.9922e-03,  5.4960e-03, -7.7494e-03,  6.0038e-03,
          1.6131e-02,  2.2360e-03, -1.8093e-04,  4.2529e-03,  6.9596e-03,
          8.6023e-03,  8.7183e-04,  1.3906e-02,  8.2790e-03,  1.5417e-02,
          1.6796e-03,  1.3456e-02,  2.9710e-03, -2.0279e-03,  7.8851e-03,
          1.2089e-02,  1.3287e-02, -3.

ipdb>  value[~valid]


tensor([], device='cuda:0', size=(0, 1000), grad_fn=<IndexBackward>)


ipdb>  torch.max(value[~valid], dim=-1)[0]


*** RuntimeError: cannot perform reduction function max on tensor with no elements because the operation does not have an identity


ipdb>  value[~valid]


tensor([], device='cuda:0', size=(0, 1000), grad_fn=<IndexBackward>)


ipdb>  value[~valid, :]


tensor([], device='cuda:0', size=(0, 1000), grad_fn=<IndexBackward>)


ipdb>  exit()
