In [1]:
import torch
import numpy as np
import pandas as pd 

In [15]:
def zero_filtering(x: torch.Tensor) -> torch.Tensor:
    """
    Add eps value for zero embedding, because competition metric is cosine similarity
    Cosine Similarity will be returned NaN, when input value has zero
    """
    eps = 1e-8
    x[x == 0] = eps
    return x

In [3]:
test = torch.tensor([
    [1,2,4,5,6,3,5,4,-1,-1,-1],
    [1,2,4,5,6,3,5,4,-1,-1,-1]
    ])
test

tensor([[ 1,  2,  4,  5,  6,  3,  5,  4, -1, -1, -1],
        [ 1,  2,  4,  5,  6,  3,  5,  4, -1, -1, -1]])

In [6]:
test[0][test[0] != -1]

tensor([1, 2, 4, 5, 6, 3, 5, 4])

In [4]:
test = torch.tensor([1,2,4,5,6,3,5,4,-1,-1,-1])
test

tensor([ 1,  2,  4,  5,  6,  3,  5,  4, -1, -1, -1])

In [14]:
test[test != -1]

tensor([1, 2, 4, 5, 6, 3, 5, 4])

In [4]:
test = torch.randn(40, 1024)
test, test.shape

(tensor([[-1.3444,  0.5577, -0.7954,  ..., -0.2724,  0.3714,  0.4175],
         [-0.8089, -0.3178,  1.5001,  ..., -0.8454,  0.0325,  0.9044],
         [-0.6807,  0.5505, -0.4885,  ...,  0.5007,  0.1895,  0.2763],
         ...,
         [-1.2769, -0.3433, -0.9795,  ...,  0.5901,  0.6422, -0.0337],
         [ 2.2834, -0.0694, -1.0468,  ...,  0.3699, -0.9568, -0.8544],
         [ 0.6269, -0.4410, -0.0246,  ...,  0.4569,  0.0257, -1.7585]]),
 torch.Size([40, 1024]))

In [19]:
test[0] = 0
test

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.8089, -0.3178,  1.5001,  ..., -0.8454,  0.0325,  0.9044],
        [-0.6807,  0.5505, -0.4885,  ...,  0.5007,  0.1895,  0.2763],
        ...,
        [-1.2769, -0.3433, -0.9795,  ...,  0.5901,  0.6422, -0.0337],
        [ 2.2834, -0.0694, -1.0468,  ...,  0.3699, -0.9568, -0.8544],
        [ 0.6269, -0.4410, -0.0246,  ...,  0.4569,  0.0257, -1.7585]])

In [23]:
embedding = zero_filtering(torch.pow(test, 4))
embedding, embedding.shape

(tensor([[1.0000e-08, 1.0000e-08, 1.0000e-08,  ..., 1.0000e-08, 1.0000e-08,
          1.0000e-08],
         [4.2806e-01, 1.0195e-02, 5.0638e+00,  ..., 5.1090e-01, 1.1206e-06,
          6.6914e-01],
         [2.1464e-01, 9.1817e-02, 5.6960e-02,  ..., 6.2874e-02, 1.2907e-03,
          5.8280e-03],
         ...,
         [2.6588e+00, 1.3890e-02, 9.2048e-01,  ..., 1.2129e-01, 1.7013e-01,
          1.2829e-06],
         [2.7183e+01, 2.3170e-05, 1.2006e+00,  ..., 1.8714e-02, 8.3804e-01,
          5.3299e-01],
         [1.5445e-01, 3.7813e-02, 3.6707e-07,  ..., 4.3568e-02, 4.3293e-07,
          9.5616e+00]]),
 torch.Size([40, 1024]))

In [24]:
pool_embedding = torch.mean(embedding, 1)
pool_embedding, pool_embedding.shape

(tensor([1.0000e-08, 2.6001e+00, 3.6870e+00, 2.9245e+00, 2.7390e+00, 3.3937e+00,
         2.8184e+00, 2.6545e+00, 2.7991e+00, 2.7647e+00, 3.3300e+00, 2.4390e+00,
         2.6048e+00, 2.5761e+00, 3.0490e+00, 2.8663e+00, 3.1534e+00, 2.9484e+00,
         3.1056e+00, 3.3230e+00, 2.9196e+00, 2.9802e+00, 2.8987e+00, 2.9183e+00,
         4.0912e+00, 3.4832e+00, 2.9202e+00, 3.2325e+00, 2.8156e+00, 2.9903e+00,
         2.6084e+00, 2.7154e+00, 2.4076e+00, 3.3756e+00, 3.0120e+00, 3.0570e+00,
         3.5320e+00, 3.2649e+00, 3.2761e+00, 3.4769e+00]),
 torch.Size([40]))

In [25]:
gem_embedding = torch.pow(pool_embedding, 1/4)
gem_embedding

tensor([0.0100, 1.2698, 1.3857, 1.3077, 1.2865, 1.3573, 1.2957, 1.2764, 1.2935,
        1.2895, 1.3509, 1.2497, 1.2704, 1.2669, 1.3214, 1.3012, 1.3326, 1.3104,
        1.3275, 1.3501, 1.3072, 1.3139, 1.3048, 1.3070, 1.4222, 1.3661, 1.3072,
        1.3409, 1.2954, 1.3150, 1.2708, 1.2837, 1.2456, 1.3555, 1.3174, 1.3223,
        1.3709, 1.3442, 1.3454, 1.3655])