In [1]:
import torch
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def OPW_w(x: torch.Tensor, y: torch.Tensor, a: torch.Tensor = None, b: torch.Tensor = None, std=1, verbose=0, lambda1=0.1, lambda2=10,
          tol=.5e-2, maxIter=20, p_norm='inf', metric='sqreuclidean'):
    assert y.size(1) == x.size(1), "The dimensions of instances in the input sequences must be the same!"
    N = x.size(0)
    M = y.size(0)
    col_x = torch.arange(1, N+1)/N
    col_x = col_x.view(N, 1)
    col_y = torch.arange(1, M+1)/M
    relative_pos = col_x-col_y

    l = torch.abs(relative_pos) / ((1/N**2 + 1/M**2)**0.5)
    P = torch.exp(-l**2/(2*std**2)) / (std*(2*np.pi)**0.5)

    S = lambda1 / (relative_pos**2 + 1)

    D = pdist2(x, y, metric=metric)

    K = P * torch.exp((S - D) / lambda2)

    if a is None:
        a = torch.ones(N, 1) / N

    if b is None:
        b = torch.ones(M, 1) / M

    ainvK = K / a   # [N, M]

    iter = 0
    u = torch.ones(N, 1) / N
    while iter < maxIter:
        u = 1. / torch.matmul(ainvK, (b / (torch.matmul(K.T, u))))
        
        iter += 1
        if iter % 20 == 1 or iter == maxIter:
            v = b / torch.matmul(K.T, u)    # [M, 1]
            u = 1 / torch.matmul(ainvK, v)  # [N, 1]

            criterion = torch.sum(torch.abs(v * torch.matmul(K.T, u) - b), dim=0)
            criterion = criterion.norm(p=float(p_norm))
            if abs(criterion) < tol:
                break

            iter += 1
            if verbose > 0:
                print(f"Iteration : {iter}, Criterion: {criterion}")

    U = K * D   # [N, M]
    
    dist = torch.sum(u * torch.matmul(U, v), dim=0)
    T = v.T * (u * K)
 
    return dist, T

In [3]:
def pdist2(X, Y, metric='sqreuclidean'):
    if metric.lower() == 'sqreuclidean':
        return distEucSqr(X, Y)
    elif metric.lower() == 'euclidean':
        return torch.sqrt(distEucSqr(X, Y))
    elif metric.lower() == 'L1':
        return distL1(X, Y)
    elif metric.lower() == 'cosine':
        return distCosine(X, Y)
    elif metric.lower() == 'emd':
        return distEmd(X, Y)
    elif metric.lower() == 'chisqr':
        return distChiSqr(X, Y)
    else:
        raise NotImplementedError(f'pdist - unknown metric: {metric}')


def distL1(x: torch.Tensor, y: torch.Tensor):
    return torch.abs(x.unsqueeze(1) - y).sum(dim=-1)


def distCosine(x: torch.Tensor, y: torch.Tensor, eps=1e-8):
    assert x.dtype == torch.float or y.dtype == torch.float, "Inputs must be of type float"
    cos = torch.nn.CosineSimilarity(dim=-1, eps=eps)
    return 1 - cos(x.unsqueeze(1), y)


def distEmd(x: torch.Tensor, y: torch.Tensor):
    x_cdf = torch.cumsum(x, dim=-1)
    y_cdf = torch.cumsum(y, dim=-1)

    return torch.abs(x_cdf.unsqueeze(1) - y_cdf).sum(dim=-1)


def distEucSqr(x: torch.Tensor, y: torch.Tensor):
    return torch.cdist(x, y, p=2)**2


def distChiSqr(x: torch.Tensor, y: torch.Tensor, eps=1e-10):
    a = x.unsqueeze(1) + y
    b = x.unsqueeze(1) - y
    return (b**2 / (a + eps)).sum(dim=-1) / 2

In [4]:
def get_utterances(path):
  X = []
  utterances = []
  count = 0
  with open(path, 'r') as f:
      length_X = len([line for line in f.read().splitlines()])
  with open(path, 'r') as f:
      for line in f.read().splitlines():
          count += 1
          frame = [-abs(float(i)) if i.startswith('-') else abs(float(i)) for i in line.split()]
          if len(frame) > 0:
            utterances.append(frame)
            if count == length_X:
              X.append(utterances)
          else:
            if len(utterances) > 0:
              X.append(utterances)
              utterances = []
          
  return X

In [5]:
X_train = get_utterances(path="/content/Train_Arabic_Digit.txt")
X_test = get_utterances(path="/content/Test_Arabic_Digit.txt")
print(f"Number of training sample: {len(X_train)}")
print(f"Number of testing sample: {len(X_test)}")

Number of training sample: 6600
Number of testing sample: 2200


In [6]:
import torch
import numpy as np
import json 

n_class = torch.Tensor(torch.arange(0, 10)).view(10, 1)
y_train = n_class.expand_as(torch.empty((10, 660))).contiguous().view(6600)
y_test = n_class.expand_as(torch.empty((10, 220))).contiguous().view(2200)

In [7]:
with open('fe.json', 'w') as f:
    json.dump(3, f)

In [8]:
W_dist = []
for idx_X, X in enumerate(X_test):
    if idx_X < 1700 and idx_X >= 1200:
      print(f'Calculating all distances of sample {idx_X+1}')
      W_X = []
      for idx_Y, Y in enumerate(X_train):
          dist, minT = OPW_w(torch.Tensor(X), torch.Tensor(Y), verbose=0, lambda1=0.1, lambda2=10)
          W_X.append(dist.item())
      W_dist.append(W_X)

with open('/content/distances1200-1700.json', 'w') as f:
    json.dump(W_dist, f)

Calculating all distances of sample 1201
Calculating all distances of sample 1202
Calculating all distances of sample 1203
Calculating all distances of sample 1204
Calculating all distances of sample 1205
Calculating all distances of sample 1206
Calculating all distances of sample 1207
Calculating all distances of sample 1208
Calculating all distances of sample 1209
Calculating all distances of sample 1210
Calculating all distances of sample 1211
Calculating all distances of sample 1212
Calculating all distances of sample 1213
Calculating all distances of sample 1214
Calculating all distances of sample 1215
Calculating all distances of sample 1216
Calculating all distances of sample 1217
Calculating all distances of sample 1218
Calculating all distances of sample 1219
Calculating all distances of sample 1220
Calculating all distances of sample 1221
Calculating all distances of sample 1222
Calculating all distances of sample 1223
Calculating all distances of sample 1224
Calculating all 

In [9]:
len(W_dist)

500

In [10]:
W_dist = json.load(open('/content/distances1200-1700.json'))

In [11]:
# for i in range(len(W_dist)):
#   W_dist[i][i+1000] = 1e9
w_dist = torch.Tensor(W_dist)

In [12]:
pred = w_dist.argmin(dim=1)

In [13]:
pred

tensor([3410, 3547, 3412, 3418, 3527, 3897, 3418, 3517, 3412, 3418, 3785, 3785,
        3917, 3785, 3697, 3697, 3690, 3916, 3908, 3696, 3793, 3949, 3946, 3791,
        3791, 3791, 3792, 3791, 3945, 3943, 3913, 3777, 3913, 3908, 3873, 3823,
        3686, 3727, 3913, 3911, 3883, 3635, 3938, 3671, 3677, 3677, 3671, 3645,
        3679, 3677, 3795, 3935, 3689, 3811, 3688, 3935, 3812, 3684, 3688, 3681,
        3933, 3640, 3640, 3640, 3640, 3641, 3632, 3932, 3640, 3632, 3401, 3942,
        3799, 3675, 3797, 3799, 3867, 3798, 3675, 3867, 3360, 3400, 3793, 3400,
        3809, 3308, 3786, 3360, 3786, 3368, 3797, 3798, 3862, 3894, 3896, 3650,
        3931, 3894, 3896, 3894, 3896, 3896, 3896, 3896, 3894, 3894, 3896, 3446,
        3894, 3896, 3821, 3820, 3820, 3820, 3401, 3820, 3746, 3401, 3671, 3674,
        3987, 4272, 4144, 4090, 4149, 4149, 4149, 4144, 4106, 4144, 4057,  120,
        3962, 3962, 3962, 4250, 4250, 3962, 4256, 4253, 3979, 3979,  118,   34,
         218, 3979, 3975, 4451, 3979, 39

In [14]:
sum(y_train[pred]==y_test[1200:1700]) 

tensor(466)