In [None]:
import torch
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

In [None]:
def OPW_w(x: torch.Tensor, y: torch.Tensor, a: torch.Tensor = None, b: torch.Tensor = None, std=1, verbose=0, lambda1=0.1, lambda2=10,
          tol=.5e-2, maxIter=20, p_norm='inf', metric='sqreuclidean'):
    assert y.size(1) == x.size(1), "The dimensions of instances in the input sequences must be the same!"
    N = x.size(0)
    M = y.size(0)
    col_x = torch.arange(1, N+1)/N
    col_x = col_x.view(N, 1)
    col_y = torch.arange(1, M+1)/M
    relative_pos = col_x-col_y

    l = torch.abs(relative_pos) / ((1/N**2 + 1/M**2)**0.5)
    P = torch.exp(-l**2/(2*std**2)) / (std*(2*np.pi)**0.5)

    S = lambda1 / (relative_pos**2 + 1)

    D = pdist2(x, y, metric=metric)

    K = P * torch.exp((S - D) / lambda2)

    if a is None:
        a = torch.ones(N, 1) / N

    if b is None:
        b = torch.ones(M, 1) / M

    ainvK = K / a   # [N, M]

    iter = 0
    u = torch.ones(N, 1) / N
    while iter < maxIter:
        u = 1. / torch.matmul(ainvK, (b / (torch.matmul(K.T, u))))
        
        iter += 1
        if iter % 20 == 1 or iter == maxIter:
            v = b / torch.matmul(K.T, u)    # [M, 1]
            u = 1 / torch.matmul(ainvK, v)  # [N, 1]

            criterion = torch.sum(torch.abs(v * torch.matmul(K.T, u) - b), dim=0)
            criterion = criterion.norm(p=float(p_norm))
            if abs(criterion) < tol:
                break

            iter += 1
            if verbose > 0:
                print(f"Iteration : {iter}, Criterion: {criterion}")

    U = K * D   # [N, M]
    
    dist = torch.sum(u * torch.matmul(U, v), dim=0)
    T = v.T * (u * K)
 
    return dist, T

In [None]:
def pdist2(X, Y, metric='sqreuclidean'):
    if metric.lower() == 'sqreuclidean':
        return distEucSqr(X, Y)
    elif metric.lower() == 'euclidean':
        return torch.sqrt(distEucSqr(X, Y))
    elif metric.lower() == 'L1':
        return distL1(X, Y)
    elif metric.lower() == 'cosine':
        return distCosine(X, Y)
    elif metric.lower() == 'emd':
        return distEmd(X, Y)
    elif metric.lower() == 'chisqr':
        return distChiSqr(X, Y)
    else:
        raise NotImplementedError(f'pdist - unknown metric: {metric}')


def distL1(x: torch.Tensor, y: torch.Tensor):
    return torch.abs(x.unsqueeze(1) - y).sum(dim=-1)


def distCosine(x: torch.Tensor, y: torch.Tensor, eps=1e-8):
    assert x.dtype == torch.float or y.dtype == torch.float, "Inputs must be of type float"
    cos = torch.nn.CosineSimilarity(dim=-1, eps=eps)
    return 1 - cos(x.unsqueeze(1), y)


def distEmd(x: torch.Tensor, y: torch.Tensor):
    x_cdf = torch.cumsum(x, dim=-1)
    y_cdf = torch.cumsum(y, dim=-1)

    return torch.abs(x_cdf.unsqueeze(1) - y_cdf).sum(dim=-1)


def distEucSqr(x: torch.Tensor, y: torch.Tensor):
    return torch.cdist(x, y, p=2)**2


def distChiSqr(x: torch.Tensor, y: torch.Tensor, eps=1e-10):
    a = x.unsqueeze(1) + y
    b = x.unsqueeze(1) - y
    return (b**2 / (a + eps)).sum(dim=-1) / 2

In [None]:
def get_utterances(path):
  X = []
  utterances = []
  count = 0
  with open(path, 'r') as f:
      length_X = len([line for line in f.read().splitlines()])
  with open(path, 'r') as f:
      for line in f.read().splitlines():
          count += 1
          frame = [-abs(float(i)) if i.startswith('-') else abs(float(i)) for i in line.split()]
          if len(frame) > 0:
            utterances.append(frame)
            if count == length_X:
              X.append(utterances)
          else:
            if len(utterances) > 0:
              X.append(utterances)
              utterances = []
          
  return X

In [None]:
X_train = get_utterances(path="/content/Train_Arabic_Digit.txt")
X_test = get_utterances(path="/content/Test_Arabic_Digit.txt")
print(f"Number of training sample: {len(X_train)}")
print(f"Number of testing sample: {len(X_test)}")

Number of training sample: 6600
Number of testing sample: 2200


In [None]:
import torch
import numpy as np
import json 

n_class = torch.Tensor(torch.arange(0, 10)).view(10, 1)
y_train = n_class.expand_as(torch.empty((10, 660))).contiguous().view(6600)
y_test = n_class.expand_as(torch.empty((10, 220))).contiguous().view(2200)

In [None]:
with open('fe.json', 'w') as f:
    json.dump(3, f)

In [None]:
W_dist = []
for idx_X, X in enumerate(X_test):
    if idx_X < 2200 and idx_X >= 1700:
      print(f'Calculating all distances of sample {idx_X+1}')
      W_X = []
      for idx_Y, Y in enumerate(X_train):
          dist, minT = OPW_w(torch.Tensor(X), torch.Tensor(Y), verbose=0, lambda1=0.1, lambda2=10)
          W_X.append(dist.item())
      W_dist.append(W_X)

with open('/content/distances1700-2200.json', 'w') as f:
    json.dump(W_dist, f)

Calculating all distances of sample 1701
Calculating all distances of sample 1702
Calculating all distances of sample 1703
Calculating all distances of sample 1704
Calculating all distances of sample 1705
Calculating all distances of sample 1706
Calculating all distances of sample 1707
Calculating all distances of sample 1708
Calculating all distances of sample 1709
Calculating all distances of sample 1710
Calculating all distances of sample 1711
Calculating all distances of sample 1712
Calculating all distances of sample 1713
Calculating all distances of sample 1714
Calculating all distances of sample 1715
Calculating all distances of sample 1716
Calculating all distances of sample 1717
Calculating all distances of sample 1718
Calculating all distances of sample 1719
Calculating all distances of sample 1720
Calculating all distances of sample 1721
Calculating all distances of sample 1722
Calculating all distances of sample 1723
Calculating all distances of sample 1724
Calculating all 

In [None]:
len(W_dist)

500

In [None]:
W_dist = json.load(open('/content/distances1700-2200.json'))

In [None]:
# for i in range(len(W_dist)):
#   W_dist[i][i+1000] = 1e9
w_dist = torch.Tensor(W_dist)

In [None]:
pred = w_dist.argmin(dim=1)

In [None]:
pred

tensor([5266, 5266, 3265, 5259, 5100, 5256, 5113, 5245, 5246, 5266, 5257, 4967,
        4967, 4954, 5269, 5269, 4995, 3227, 5269, 5269, 5228, 5154, 5220, 5154,
        4998, 4998, 5161, 5154, 4796, 4791, 5176, 5213, 4957, 5206, 5213, 5241,
        5278, 4959, 5215, 5213, 5131, 4963, 5170, 5213, 5213, 5206, 5170, 5213,
        4957, 5170, 5067, 4961, 4963, 4961, 4961, 5127, 4963, 4961, 4998, 4961,
        5561, 5468, 5561, 5560, 5586, 5468, 5468, 5770, 5770, 5770, 5734, 5575,
        5771, 5575, 5737, 5574, 5738, 5388, 5774, 5388, 5296, 5296, 5296, 5296,
        5374, 5296, 5374, 5296, 5373, 5296, 5341, 5347, 5341, 5346, 5343, 5347,
        5343, 5346, 5341, 5343, 5586, 5586, 5560, 5586, 5586, 5589, 5586, 5586,
        5586, 5586, 5370, 5454, 5454, 5450, 5454, 5454, 5406, 5406, 5344, 5454,
        5579, 5579, 5568, 5568, 5579, 5579, 5568, 5568, 5576, 5578, 5580, 5582,
        5584, 5582, 5584, 5584, 5583, 5584, 5584, 5584, 5560, 5770, 5574, 5560,
        5560, 5560, 5560, 5560, 5560, 53

In [None]:
sum(y_train[pred]==y_test[1700:2200]) 

tensor(496)