In [16]:
import torch
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

In [17]:
def OPW_w(x: torch.Tensor, y: torch.Tensor, a: torch.Tensor = None, b: torch.Tensor = None, std=1, verbose=0, lambda1=0.1, lambda2=10,
          tol=.5e-2, maxIter=20, p_norm='inf', metric='sqreuclidean'):
    assert y.size(1) == x.size(1), "The dimensions of instances in the input sequences must be the same!"
    N = x.size(0)
    M = y.size(0)
    col_x = torch.arange(1, N+1)/N
    col_x = col_x.view(N, 1)
    col_y = torch.arange(1, M+1)/M
    relative_pos = col_x-col_y

    l = torch.abs(relative_pos) / ((1/N**2 + 1/M**2)**0.5)
    P = torch.exp(-l**2/(2*std**2)) / (std*(2*np.pi)**0.5)

    S = lambda1 / (relative_pos**2 + 1)

    D = pdist2(x, y, metric=metric)

    K = P * torch.exp((S - D) / lambda2)

    if a is None:
        a = torch.ones(N, 1) / N

    if b is None:
        b = torch.ones(M, 1) / M

    ainvK = K / a   # [N, M]

    iter = 0
    u = torch.ones(N, 1) / N
    while iter < maxIter:
        u = 1. / torch.matmul(ainvK, (b / (torch.matmul(K.T, u))))
        
        iter += 1
        if iter % 20 == 1 or iter == maxIter:
            v = b / torch.matmul(K.T, u)    # [M, 1]
            u = 1 / torch.matmul(ainvK, v)  # [N, 1]

            criterion = torch.sum(torch.abs(v * torch.matmul(K.T, u) - b), dim=0)
            criterion = criterion.norm(p=float(p_norm))
            if abs(criterion) < tol:
                break

            iter += 1
            if verbose > 0:
                print(f"Iteration : {iter}, Criterion: {criterion}")

    U = K * D   # [N, M]
    
    dist = torch.sum(u * torch.matmul(U, v), dim=0)
    T = v.T * (u * K)
 
    return dist, T

In [18]:
def pdist2(X, Y, metric='sqreuclidean'):
    if metric.lower() == 'sqreuclidean':
        return distEucSqr(X, Y)
    elif metric.lower() == 'euclidean':
        return torch.sqrt(distEucSqr(X, Y))
    elif metric.lower() == 'L1':
        return distL1(X, Y)
    elif metric.lower() == 'cosine':
        return distCosine(X, Y)
    elif metric.lower() == 'emd':
        return distEmd(X, Y)
    elif metric.lower() == 'chisqr':
        return distChiSqr(X, Y)
    else:
        raise NotImplementedError(f'pdist - unknown metric: {metric}')


def distL1(x: torch.Tensor, y: torch.Tensor):
    return torch.abs(x.unsqueeze(1) - y).sum(dim=-1)


def distCosine(x: torch.Tensor, y: torch.Tensor, eps=1e-8):
    assert x.dtype == torch.float or y.dtype == torch.float, "Inputs must be of type float"
    cos = torch.nn.CosineSimilarity(dim=-1, eps=eps)
    return 1 - cos(x.unsqueeze(1), y)


def distEmd(x: torch.Tensor, y: torch.Tensor):
    x_cdf = torch.cumsum(x, dim=-1)
    y_cdf = torch.cumsum(y, dim=-1)

    return torch.abs(x_cdf.unsqueeze(1) - y_cdf).sum(dim=-1)


def distEucSqr(x: torch.Tensor, y: torch.Tensor):
    return torch.cdist(x, y, p=2)**2


def distChiSqr(x: torch.Tensor, y: torch.Tensor, eps=1e-10):
    a = x.unsqueeze(1) + y
    b = x.unsqueeze(1) - y
    return (b**2 / (a + eps)).sum(dim=-1) / 2

In [19]:
def get_utterances(path):
  X = []
  utterances = []
  count = 0
  with open(path, 'r') as f:
      length_X = len([line for line in f.read().splitlines()])
  with open(path, 'r') as f:
      for line in f.read().splitlines():
          count += 1
          frame = [-abs(float(i)) if i.startswith('-') else abs(float(i)) for i in line.split()]
          if len(frame) > 0:
            utterances.append(frame)
            if count == length_X:
              X.append(utterances)
          else:
            if len(utterances) > 0:
              X.append(utterances)
              utterances = []
          
  return X

In [20]:
X_train = get_utterances(path="/content/Train_Arabic_Digit.txt")
X_test = get_utterances(path="/content/Test_Arabic_Digit.txt")
print(f"Number of training sample: {len(X_train)}")
print(f"Number of testing sample: {len(X_test)}")

Number of training sample: 6600
Number of testing sample: 2200


In [21]:
import torch
import numpy as np
import json 

n_class = torch.Tensor(torch.arange(0, 10)).view(10, 1)
y_train = n_class.expand_as(torch.empty((10, 660))).contiguous().view(6600)
y_test = n_class.expand_as(torch.empty((10, 220))).contiguous().view(2200)

In [22]:
with open('fe.json', 'w') as f:
    json.dump(3, f)

In [23]:
W_dist = []
for idx_X, X in enumerate(X_test):
    if idx_X < 200 and idx_X >= 0:
      print(f'Calculating all distances of sample {idx_X+1}')
      W_X = []
      for idx_Y, Y in enumerate(X_train):
          dist, minT = OPW_w(torch.Tensor(X), torch.Tensor(Y), verbose=0, lambda1=0.1, lambda2=10)
          W_X.append(dist.item())
      W_dist.append(W_X)

with open('/content/distances.json', 'w') as f:
    json.dump(W_dist, f)

Calculating all distances of sample 1
Calculating all distances of sample 2
Calculating all distances of sample 3
Calculating all distances of sample 4
Calculating all distances of sample 5
Calculating all distances of sample 6
Calculating all distances of sample 7
Calculating all distances of sample 8
Calculating all distances of sample 9
Calculating all distances of sample 10
Calculating all distances of sample 11
Calculating all distances of sample 12
Calculating all distances of sample 13
Calculating all distances of sample 14
Calculating all distances of sample 15
Calculating all distances of sample 16
Calculating all distances of sample 17
Calculating all distances of sample 18
Calculating all distances of sample 19
Calculating all distances of sample 20
Calculating all distances of sample 21
Calculating all distances of sample 22
Calculating all distances of sample 23
Calculating all distances of sample 24
Calculating all distances of sample 25
Calculating all distances of sampl

In [24]:
len(W_dist)

200

In [25]:
W_dist = json.load(open('/content/distances.json'))

In [26]:
# for i in range(len(W_dist)):
#   W_dist[i][i+1000] = 1e9
w_dist = torch.Tensor(W_dist)

In [27]:
pred = w_dist.argmin(dim=1)

In [28]:
pred

tensor([  23,   23,  280,  280,  136, 4906,   24,  136, 4901, 4906,   90,   90,
          90,  602,  296,   90,  561,   90,   90,   25,   30,   32,   17,   39,
         153, 2130,  174,   35,   17,   34,  200,   11,  219,  126,   10,  236,
          34,   32,  219,  219, 4714, 4783, 4714, 4714, 4620, 4714, 4714, 4783,
        4716, 4660,   95,   88,  157,   88,   85,   85,   80,   95,   80,   88,
         188,  234,  309,  220,  169,    7,  290,    7,    7,  295,   21,   12,
         118,   11,  132,  307,  307,  307,  307,  106,    0,   14,   95,   22,
          95,  223,  290,    7,   80,  290,   30,   30,  231,   30,   30,  270,
         270, 4699, 4699,  270,  104,  297,  297,  186,  186,    7,    7,    9,
        4896,   78,  616,  464,  619,  377,  547,  369,  524,  377,  360, 4578,
         565,  604,  551,  606,  550,  499,  606,  606,  606,  504,  488,  451,
         539,  520,  515,  451,  549,  533,  521,  519,  538,  549,  513,  464,
         464,   65,  195,  444,  488,  4

In [29]:
sum(y_train[pred]==y_test[0:200]) 

tensor(182)