In [3]:
import torch

In [1]:
def create_mask(seq_lengths, max_len):
    batch_size = seq_lengths.size(0)
    mask = torch.arange(max_len).unsqueeze(0).repeat(batch_size, 1) < seq_lengths.unsqueeze(1)
    return mask.float()

In [5]:
mask = create_mask( torch.LongTensor([3, 5, 2]), 6)

In [13]:
mask.shape # (B, L)

torch.Size([3, 6])

In [8]:
mask.unsqueeze(0).shape

torch.Size([1, 3, 6])

In [17]:
mask.unsqueeze(-1) * mask.unsqueeze(-2)

tensor([[[1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.]],

        [[1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 0.],
         [0., 0., 0., 0., 0., 0.]],

        [[1., 1., 0., 0., 0., 0.],
         [1., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.]]])

In [5]:
def graph_sparsification(S, relative_threshold=0.5):
    S_flatten = S.view(S.shape[0], -1)
    sorted_S_flatten, _ = S_flatten.sort(dim=-1, descending=True)
    num_edges = sorted_S_flatten.count_nonzero(-1)
    to_keep_edge = torch.ceil(num_edges * relative_threshold).to(torch.long)
    threshold_score = sorted_S_flatten[range(S_flatten.shape[0]), to_keep_edge]
    A = (S_flatten > threshold_score.reshape(-1, 1)).to(torch.float32).reshape(S.shape)
    return A

In [6]:
adj = torch.tensor([[[1, 2, 4, 0],
                    [0, 7, 9, 3],
                    [2, 3, 0, 2],
                    [2, 3, 0, 2]],
                    
                    [[0, 0, 0, 0],
                    [4, 2, 3, 4],
                    [2, 3, 8, 2],
                    [2, 3, 8, 2]]])

In [7]:
A = graph_sparsification(adj)


In [8]:
A

tensor([[[0., 0., 1., 0.],
         [0., 1., 1., 1.],
         [0., 1., 0., 0.],
         [0., 1., 0., 0.]],

        [[0., 0., 0., 0.],
         [1., 0., 0., 1.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.]]])

In [212]:
B, L, L = A.shape
A_bool = (A > 0).float()
A_bool = A_bool * (torch.ones(L, L)  - torch.eye(L)) + torch.eye(L) 
degrees = torch.sum(A_bool, dim=-1)
D = torch.diag_embed(degrees)
D_inv_sqrt = torch.diag_embed(torch.pow(degrees, -0.5))
A = torch.matmul(torch.matmul(D_inv_sqrt, A_bool), D_inv_sqrt)
# A = (A_bool / D) / D.transpose(0, 2, 1)

In [208]:
L_norm

tensor([[[0.5000, 0.0000, 0.5000, 0.0000],
         [0.0000, 0.3333, 0.4082, 0.4082],
         [0.0000, 0.4082, 0.5000, 0.0000],
         [0.0000, 0.4082, 0.0000, 0.5000]],

        [[1.0000, 0.0000, 0.0000, 0.0000],
         [0.5774, 0.3333, 0.0000, 0.4082],
         [0.0000, 0.0000, 1.0000, 0.0000],
         [0.0000, 0.0000, 0.7071, 0.5000]]])

In [211]:
degrees

tensor([[2., 3., 2., 2.],
        [1., 3., 1., 2.]])

In [2]:
import torch
mask = torch.randn((3, 4))

In [58]:
mask = torch.tensor([[0, 1, 0, 1], 
                     [1, 0, 0, 1], 
                     [1, 1, 0, 0]])

In [91]:
X = torch.randn((3, 4, 2))

In [92]:
X

tensor([[[-1.5856,  0.8895],
         [ 0.5397,  0.1495],
         [-0.1620, -1.3551],
         [ 1.6799,  0.2898]],

        [[ 0.7726,  0.5483],
         [ 0.1684,  1.7168],
         [ 1.4387, -1.1326],
         [-0.2431,  1.0632]],

        [[-0.1673, -0.7475],
         [-1.3425, -0.8553],
         [-1.7839,  1.1274],
         [ 0.3538,  0.3980]]])

In [93]:
sorted_mask, sorted_mask_index = torch.sort(mask, dim=-1, descending=True)  # B*L -> B*L
X = torch.gather(X, dim=1, index=sorted_mask_index.unsqueeze(-1).expand(-1, -1, X.size(-1)))

In [95]:
X

tensor([[[-1.5856,  0.8895],
         [ 0.5397,  0.1495],
         [-0.1620, -1.3551],
         [ 1.6799,  0.2898]],

        [[ 0.7726,  0.5483],
         [ 0.1684,  1.7168],
         [ 1.4387, -1.1326],
         [-0.2431,  1.0632]],

        [[-0.1673, -0.7475],
         [-1.3425, -0.8553],
         [-1.7839,  1.1274],
         [ 0.3538,  0.3980]]])

In [85]:
mask = sorted_mask
reduced_sequence_length = torch.sum(mask, 1) #(bs, )

In [86]:
reduced_sequence_length

tensor([2, 2, 2])

In [96]:
to_max_length = torch.arange(torch.max(reduced_sequence_length))

In [97]:
to_max_length

tensor([0, 1])

In [98]:
X = X[:, to_max_length, :]
mask = mask[:, to_max_length]

In [99]:
X

tensor([[[-1.5856,  0.8895],
         [ 0.5397,  0.1495]],

        [[ 0.7726,  0.5483],
         [ 0.1684,  1.7168]],

        [[-0.1673, -0.7475],
         [-1.3425, -0.8553]]])

In [112]:
max_step = 5
seq_len = torch.tensor([3, 4, 2])

In [113]:
mask = torch.arange(max_step)[None, :] < seq_len[:, None]

In [120]:
mask

tensor([[ True,  True,  True, False, False],
        [ True,  True,  True,  True, False],
        [ True,  True, False, False, False]])

In [121]:
x = torch.randn((3, 5, 2))
x

tensor([[[ 0.7198,  0.5123],
         [-0.4916, -1.3961],
         [ 1.0301,  0.0440],
         [ 0.1436, -0.7570],
         [-0.0789, -0.2011]],

        [[-0.9736, -1.5427],
         [ 0.2477, -1.1202],
         [-1.4221, -0.3273],
         [-2.5559,  1.3811],
         [ 0.6052, -0.5862]],

        [[ 2.3897, -0.4373],
         [-0.3200,  2.3656],
         [ 0.9912,  0.0768],
         [-0.0712, -0.6074],
         [ 0.0264, -0.4160]]])

In [135]:
x[range(x.shape[0]), seq_len-1, :]

tensor([[ 1.0301,  0.0440],
        [-2.5559,  1.3811],
        [-0.3200,  2.3656]])

In [136]:
x

tensor([[[ 0.7198,  0.5123],
         [-0.4916, -1.3961],
         [ 1.0301,  0.0440],
         [ 0.1436, -0.7570],
         [-0.0789, -0.2011]],

        [[-0.9736, -1.5427],
         [ 0.2477, -1.1202],
         [-1.4221, -0.3273],
         [-2.5559,  1.3811],
         [ 0.6052, -0.5862]],

        [[ 2.3897, -0.4373],
         [-0.3200,  2.3656],
         [ 0.9912,  0.0768],
         [-0.0712, -0.6074],
         [ 0.0264, -0.4160]]])

In [1]:
import torch

In [17]:
a = torch.tensor(3.0, requires_grad=True)
b = torch.tensor(1.0, requires_grad=True)

In [18]:
c = torch.where(a > b, 1, 0)

In [3]:
import torch

In [1]:
def top_k_graph_dense(node_embeddings, k, device=None):
    # time1 = time.time()
    raw_graph = torch.mm(node_embeddings, node_embeddings.t())
    values, indices = raw_graph.topk(k=k, dim=-1)
    assert torch.max(indices) < raw_graph.shape[1]
    mask = torch.zeros(raw_graph.shape).to(raw_graph.device)
    mask[torch.arange(raw_graph.shape[0]).view(-1, 1), indices] = 1.
    mask.requires_grad = False
    sparse_graph = raw_graph * mask
    # time2 = time.time()
    # print(f'dense knn time: {time2 - time1} second')
    return sparse_graph

In [4]:
node_embedding = torch.randn((4, 2))

In [6]:
top_k_graph_dense(node_embedding, 2)

tensor([[ 0.1291, -0.0000,  0.1428,  0.0000],
        [-0.0000,  0.1976, -0.1022, -0.0000],
        [ 0.1428, -0.0000,  0.1762,  0.0000],
        [ 0.0871, -0.0000,  0.0844,  0.0000]])

In [None]:
def sampling(att_log_logit, training, temp=1):
    if training:
        random_noise = torch.empty_like(att_log_logit).uniform_(1e-10, 1 - 1e-10)
        random_noise = torch.log(random_noise) - torch.log(1.0 - random_noise)
        att_bern = ((att_log_logit + random_noise) / temp).sigmoid()
    else:
        att_bern = (att_log_logit).sigmoid()
    return att_bern

In [14]:
import torch
A = torch.randn((4, 4)).abs()
A

tensor([[1.3420, 0.2144, 1.2561, 0.8420],
        [2.2793, 0.1255, 0.4976, 0.1684],
        [1.3094, 0.0990, 0.6438, 0.2882],
        [0.4883, 0.0616, 1.0262, 0.3365]])

In [15]:
degrees = torch.sum(A, dim=-1)
D_inv_sqrt = torch.diag_embed(torch.pow(degrees, -0.5))
A1 = torch.matmul(torch.matmul(D_inv_sqrt, A), D_inv_sqrt)
A1

tensor([[0.3672, 0.0640, 0.4295, 0.3185],
        [0.6804, 0.0409, 0.1856, 0.0695],
        [0.4477, 0.0369, 0.2751, 0.1362],
        [0.1847, 0.0254, 0.4850, 0.1759]])

In [19]:
degrees

tensor([3.6545, 3.0708, 2.3405, 1.9126])

In [18]:
D_inv_sqrt

tensor([[0.5231, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5707, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.6537, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.7231]])

In [16]:
def normalize(adj, mode="sym"):
    if mode == "sym":
        inv_sqrt_degree = 1. / \
                            (torch.sqrt(adj.sum(dim=1, keepdim=False)) + 1e-10)
        return inv_sqrt_degree[:, None] * adj * inv_sqrt_degree[None, :]

In [17]:
normalize(A)

tensor([[0.3672, 0.0640, 0.4295, 0.3185],
        [0.6804, 0.0409, 0.1856, 0.0695],
        [0.4477, 0.0369, 0.2751, 0.1362],
        [0.1847, 0.0254, 0.4850, 0.1759]])

In [None]:
E = torch.tensor([0.0003, 0.0002, 0.0002, 0.0002, 0.0000, 0.0000, 0.0002, 0.0000, 0.0002,
        0.0002, 0.0000, 0.0001, 0.0002, 0.0000, 0.0002, 0.0002, 0.0002, 0.0002,
        0.0000, 0.0000, 0.0002, 0.0003, 0.0000, 0.0002, 0.0000, 0.0002, 0.0002,
        0.0000, 0.0002, 0.0002, 0.0002, 0.0000, 0.0000, 0.0002, 0.0000, 0.0002,
        0.0000, 0.0002, 0.0002, 0.0000, 0.0002, 0.0002, 0.0000, 0.0000, 0.0002,
        0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0000, 0.0002, 0.0001,
        0.0002, 0.0000, 0.0002, 0.0002, 0.0000, 0.0002, 0.0000, 0.0002, 0.0000,
        0.0002, 0.0000, 0.0003, 0.0002, 0.0000, 0.0002, 0.0000, 0.0002, 0.0002,
        0.0003, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003, 0.0000, 0.0002, 0.0002,
        0.0003, 0.0000, 0.0003, 0.0002, 0.0000, 0.0000, 0.0002, 0.0002, 0.0003,
        0.0000, 0.0000, 0.0002, 0.0000, 0.0000, 0.0002, 0.0002, 0.0002, 0.0002,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0002, 0.0002, 0.0002, 0.0000,
        0.0003, 0.0000, 0.0002, 0.0000, 0.0000, 0.0000, 0.0002, 0.0000, 0.0000,
        0.0002, 0.0002, 0.0002, 0.0000, 0.0002, 0.0001, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0002, 0.0000, 0.0002, 0.0000, 0.0002, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000])

In [9]:
import torch
import torch.nn as nn

In [2]:
c = torch.randn(2, 3)

In [10]:
X = torch.randn(3, 2)
X

tensor([[-0.9783,  0.1693],
        [ 1.1213, -0.0492],
        [-1.2739,  0.2494]])

In [17]:
lin  = nn.Linear(1, X.shape[-1], bias=False)
weighted_tensor = lin(torch.ones(1, 1))

In [18]:
X_fts = X * weighted_tensor.unsqueeze(0)

In [19]:
X_fts

tensor([[[ 0.5817,  0.0075],
         [-0.6667, -0.0022],
         [ 0.7575,  0.0111]]], grad_fn=<MulBackward0>)

In [20]:
weighted_tensor

tensor([[-0.5946,  0.0444]], grad_fn=<MmBackward0>)

In [25]:
lin.weight.reshape(1, -1) * X

tensor([[ 0.5817,  0.0075],
        [-0.6667, -0.0022],
        [ 0.7575,  0.0111]], grad_fn=<MulBackward0>)

In [None]:
def graph_sparsification(S, relative_threshold=None, to_keep_edge=None):
    # S (bs, max_step, max_step)
    S_flatten = S.view(S.shape[0], -1) #(bs, max_step * max_step)
    sorted_S_flatten, _ = S_flatten.sort(dim=-1, descending=True)
    num_edges = sorted_S_flatten.count_nonzero(-1) #(bs, )
    if relative_threshold:
        to_keep_edge = torch.ceil(
            num_edges * relative_threshold).to(torch.long)
    threshold_score = sorted_S_flatten[range(
        S_flatten.shape[0]), to_keep_edge] # (bs, )
    A = (S_flatten > threshold_score.reshape(-1, 1)
            ).float().reshape(S.shape) #(bs, max_step, max_step)
    return A

In [26]:
adj = torch.tensor([[[1, 2.0, 3.0, 0],
                    [0, 4.0, 8, 6],
                    [2, 7, 0, 5],
                    [2, 9, 0, 10]],
                    
                    [[0, 0, 0, 0],
                    [5, 4, 2, 6],
                    [9, 1, 3, 7],
                    [17, 3, 8, 23]]])

In [27]:
graph_sparsification(adj, relative_threshold=0.5)

tensor([[[0., 0., 0., 0.],
         [0., 0., 1., 1.],
         [0., 1., 0., 1.],
         [0., 1., 0., 1.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 1.],
         [1., 0., 0., 1.],
         [1., 0., 1., 1.]]])

In [48]:
def _fcn_net(layer_sizes=[80, 40]):
    fc_layers = nn.Sequential()
    layer_sizes = [160] + layer_sizes
    for i in range(len(layer_sizes) - 1):
        fc_layers.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
        # nn.BatchNorm1d(layer_sizes[i + 1])
        fc_layers.append(nn.BatchNorm1d(num_features=layer_sizes[i+1],
                  
                              momentum=0.95,
                                        eps=0.0001))
        fc_layers.append(nn.ReLU())
    fc_layers.append(nn.Linear(layer_sizes[-1], 1))
    return fc_layers

In [75]:
class Fcn_net(nn.Module):
    def __init__(self, layer_sizes=[80, 40]):
        super().__init__()
        pre_size = 160
        self.lin = []
        self.bn = []
        self.act = nn.ReLU()
        for cur_size in layer_sizes:
            self.lin.append(nn.Linear(pre_size, cur_size))
            self.bn.append(nn.BatchNorm1d(num_features=cur_size,
                                            momentum=0.95,
                                            eps=0.0001))
            pre_size = cur_size
        self.out = nn.Linear(pre_size, 1)
        
    def forward(self, x):
        for i in range(len(self.lin)):
            x = self.lin[i](x)
            x = x.transpose(1, 2)
            x = self.bn[i](x)
            x = x.transpose(1, 2)
            x = self.act(x)
        x = self.out(x)
        return x
    

In [76]:
layers = Fcn_net()


In [78]:
X = torch.randn((2, 300, 160)) * 3.0 + 0.02

In [79]:
t = layers(X)

In [80]:
t.sum().backward()

In [82]:
layers.lin[0].weight.grad

tensor([[-0.5245,  0.1437,  0.5624,  ..., -2.5712,  0.8055,  1.0378],
        [ 0.0907, -2.0665,  1.0194,  ..., -2.4032,  0.4346,  0.0520],
        [ 0.4680, -2.1957, -0.7792,  ..., -2.7512, -1.2171, -0.7632],
        ...,
        [ 1.3315, -1.2946,  2.3550,  ...,  0.4583,  0.6593, -0.3422],
        [-0.2917, -2.1434,  1.1829,  ...,  1.2107,  0.5214, -0.2825],
        [-0.5851,  1.0321, -1.4969,  ..., -0.6987, -0.5602,  0.2895]])

In [85]:
with torch.no_grad():
    layers.eval()
    t = layers(X)

In [88]:
layers.lin[0].weight.grad

tensor([[-0.5245,  0.1437,  0.5624,  ..., -2.5712,  0.8055,  1.0378],
        [ 0.0907, -2.0665,  1.0194,  ..., -2.4032,  0.4346,  0.0520],
        [ 0.4680, -2.1957, -0.7792,  ..., -2.7512, -1.2171, -0.7632],
        ...,
        [ 1.3315, -1.2946,  2.3550,  ...,  0.4583,  0.6593, -0.3422],
        [-0.2917, -2.1434,  1.1829,  ...,  1.2107,  0.5214, -0.2825],
        [-0.5851,  1.0321, -1.4969,  ..., -0.6987, -0.5602,  0.2895]])

### Debug

In [90]:
def create_mask(seq_lengths, max_len):
    batch_size = seq_lengths.size(0)
    mask = torch.arange(max_len).to(seq_lengths.device).unsqueeze(0).repeat(
        batch_size, 1) < seq_lengths.unsqueeze(1)
    return mask.float()

In [92]:
seq_length = torch.tensor([2, 3, 4, 5, 7, 0, 8])
max_len = 10
create_mask(seq_length, max_len)

tensor([[1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1., 0., 0.]])

In [93]:
def graph_sparsification(S, relative_threshold=None, to_keep_edge=None):
    # 
    # S (bs, max_step, max_step)
    # for each batch, reserve largets  (ratio * num_edge) , return mask 
    S_flatten = S.view(S.shape[0], -1) #(bs, max_step * max_step)
    sorted_S_flatten, _ = S_flatten.sort(dim=-1, descending=True)
    num_edges = sorted_S_flatten.count_nonzero(-1) #(bs, )
    if relative_threshold:
        to_keep_edge = torch.ceil(
            num_edges * relative_threshold).to(torch.long)
    threshold_score = sorted_S_flatten[range(
        S_flatten.shape[0]), to_keep_edge] # (bs, )
    A = (S_flatten > threshold_score.reshape(-1, 1)
            ).float().reshape(S.shape) #(bs, max_step, max_step)
    return A

In [96]:
adj = torch.tensor([[[1, 2, 4, 0],
                    [0, 7, 9, 3],
                    [2, 3, 0, 2],
                    [2, 3, 0, 2]],
                    
                    [[0, 0, 0, 0],
                    [4, 2, 3, 4],
                    [2, 3, 8, 2],
                    [2, 3, 8, 2]]])
mask = graph_sparsification(adj, relative_threshold=0.5)
adj * mask

tensor([[[0., 0., 4., 0.],
         [0., 7., 9., 3.],
         [0., 3., 0., 0.],
         [0., 3., 0., 0.]],

        [[0., 0., 0., 0.],
         [4., 0., 0., 4.],
         [0., 0., 8., 0.],
         [0., 0., 8., 0.]]])

### tf.greater对应的pytorch可求导操作

In [99]:
a = torch.tensor([3., 2.], requires_grad=True)
b = torch.tensor([0., 1.], requires_grad=True)

In [102]:
t  = (a > b)

In [112]:
a, b

(tensor([3., 2.], requires_grad=True), tensor([0., 1.], requires_grad=True))

In [107]:
t = torch.gt(a , b)

tensor([True, True])

In [113]:
t.requires_grad

False

In [115]:
L = 3
(torch.ones(L, L, dtype=torch.float32) - torch.eye(L, dtype=torch.float32)) + torch.eye(L, dtype=torch.float32)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [1]:
import torch


In [2]:
seq_len = torch.tensor([2, 3, 4, 5])

In [4]:
idx = (seq_len-1).view(-1, 1).unsqueeze(-1).expand(len(seq_len), 1, 3)

In [5]:
idx.shape # (bs, 1, h)

torch.Size([4, 1, 3])

In [6]:
a = torch.tensor([0, 1, 0])

In [9]:
 torch.arange(3).unsqueeze(-1).repeat(1, 4)

tensor([[0, 0, 0, 0],
        [1, 1, 1, 1],
        [2, 2, 2, 2]])