In [9]:
import torch
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

In [10]:
def OPW_w(x: torch.Tensor, y: torch.Tensor, a: torch.Tensor = None, b: torch.Tensor = None, std=1, verbose=0, lambda1=0.1, lambda2=10,
          tol=.5e-2, maxIter=20, p_norm='inf', metric='sqreuclidean'):
    assert y.size(1) == x.size(1), "The dimensions of instances in the input sequences must be the same!"
    N = x.size(0)
    M = y.size(0)
    col_x = torch.arange(1, N+1)/N
    col_x = col_x.view(N, 1)
    col_y = torch.arange(1, M+1)/M
    relative_pos = col_x-col_y

    l = torch.abs(relative_pos) / ((1/N**2 + 1/M**2)**0.5)
    P = torch.exp(-l**2/(2*std**2)) / (std*(2*np.pi)**0.5)

    S = lambda1 / (relative_pos**2 + 1)

    D = pdist2(x, y, metric=metric)

    K = P * torch.exp((S - D) / lambda2)

    if a is None:
        a = torch.ones(N, 1) / N

    if b is None:
        b = torch.ones(M, 1) / M

    ainvK = K / a   # [N, M]

    iter = 0
    u = torch.ones(N, 1) / N
    while iter < maxIter:
        u = 1. / torch.matmul(ainvK, (b / (torch.matmul(K.T, u))))
        
        iter += 1
        if iter % 20 == 1 or iter == maxIter:
            v = b / torch.matmul(K.T, u)    # [M, 1]
            u = 1 / torch.matmul(ainvK, v)  # [N, 1]

            criterion = torch.sum(torch.abs(v * torch.matmul(K.T, u) - b), dim=0)
            criterion = criterion.norm(p=float(p_norm))
            if abs(criterion) < tol:
                break

            iter += 1
            if verbose > 0:
                print(f"Iteration : {iter}, Criterion: {criterion}")

    U = K * D   # [N, M]
    
    dist = torch.sum(u * torch.matmul(U, v), dim=0)
    T = v.T * (u * K)
 
    return dist, T

In [11]:
def pdist2(X, Y, metric='sqreuclidean'):
    if metric.lower() == 'sqreuclidean':
        return distEucSqr(X, Y)
    elif metric.lower() == 'euclidean':
        return torch.sqrt(distEucSqr(X, Y))
    elif metric.lower() == 'L1':
        return distL1(X, Y)
    elif metric.lower() == 'cosine':
        return distCosine(X, Y)
    elif metric.lower() == 'emd':
        return distEmd(X, Y)
    elif metric.lower() == 'chisqr':
        return distChiSqr(X, Y)
    else:
        raise NotImplementedError(f'pdist - unknown metric: {metric}')


def distL1(x: torch.Tensor, y: torch.Tensor):
    return torch.abs(x.unsqueeze(1) - y).sum(dim=-1)


def distCosine(x: torch.Tensor, y: torch.Tensor, eps=1e-8):
    assert x.dtype == torch.float or y.dtype == torch.float, "Inputs must be of type float"
    cos = torch.nn.CosineSimilarity(dim=-1, eps=eps)
    return 1 - cos(x.unsqueeze(1), y)


def distEmd(x: torch.Tensor, y: torch.Tensor):
    x_cdf = torch.cumsum(x, dim=-1)
    y_cdf = torch.cumsum(y, dim=-1)

    return torch.abs(x_cdf.unsqueeze(1) - y_cdf).sum(dim=-1)


def distEucSqr(x: torch.Tensor, y: torch.Tensor):
    return torch.cdist(x, y, p=2)**2


def distChiSqr(x: torch.Tensor, y: torch.Tensor, eps=1e-10):
    a = x.unsqueeze(1) + y
    b = x.unsqueeze(1) - y
    return (b**2 / (a + eps)).sum(dim=-1) / 2

In [12]:
def get_utterances(path):
  X = []
  utterances = []
  count = 0
  with open(path, 'r') as f:
      length_X = len([line for line in f.read().splitlines()])
  with open(path, 'r') as f:
      for line in f.read().splitlines():
          count += 1
          frame = [-abs(float(i)) if i.startswith('-') else abs(float(i)) for i in line.split()]
          if len(frame) > 0:
            utterances.append(frame)
            if count == length_X:
              X.append(utterances)
          else:
            if len(utterances) > 0:
              X.append(utterances)
              utterances = []
          
  return X

In [13]:
X_train = get_utterances(path="/content/Train_Arabic_Digit.txt")
X_test = get_utterances(path="/content/Test_Arabic_Digit.txt")
print(f"Number of training sample: {len(X_train)}")
print(f"Number of testing sample: {len(X_test)}")

Number of training sample: 6600
Number of testing sample: 2200


In [14]:
import torch
import numpy as np
import json 

n_class = torch.Tensor(torch.arange(0, 10)).view(10, 1)
y_train = n_class.expand_as(torch.empty((10, 660))).contiguous().view(6600)
y_test = n_class.expand_as(torch.empty((10, 220))).contiguous().view(2200)

In [15]:
with open('fe.json', 'w') as f:
    json.dump(3, f)

In [16]:
W_dist = []
for idx_X, X in enumerate(X_test):
    if idx_X < 700 and idx_X >= 200:
      print(f'Calculating all distances of sample {idx_X+1}')
      W_X = []
      for idx_Y, Y in enumerate(X_train):
          dist, minT = OPW_w(torch.Tensor(X), torch.Tensor(Y), verbose=0, lambda1=0.1, lambda2=10)
          W_X.append(dist.item())
      W_dist.append(W_X)

with open('/content/distances200-700.json', 'w') as f:
    json.dump(W_dist, f)

Calculating all distances of sample 201
Calculating all distances of sample 202
Calculating all distances of sample 203
Calculating all distances of sample 204
Calculating all distances of sample 205
Calculating all distances of sample 206
Calculating all distances of sample 207
Calculating all distances of sample 208
Calculating all distances of sample 209
Calculating all distances of sample 210
Calculating all distances of sample 211
Calculating all distances of sample 212
Calculating all distances of sample 213
Calculating all distances of sample 214
Calculating all distances of sample 215
Calculating all distances of sample 216
Calculating all distances of sample 217
Calculating all distances of sample 218
Calculating all distances of sample 219
Calculating all distances of sample 220
Calculating all distances of sample 221
Calculating all distances of sample 222
Calculating all distances of sample 223
Calculating all distances of sample 224
Calculating all distances of sample 225


In [17]:
len(W_dist)

500

In [18]:
W_dist = json.load(open('/content/distances200-700.json'))

In [23]:
# for i in range(len(W_dist)):
#   W_dist[i][i+1000] = 1e9
w_dist = torch.Tensor(W_dist)

In [24]:
pred = w_dist.argmin(dim=1)

In [25]:
pred

tensor([ 530,  470,   80,  479,  539,  550,  445,  488,  532,  444,  530,  533,
         481,  481,  379,  484,  441,  484,  484,  531,  927,  845,  841,  841,
         841,  841,  849, 1661,  684,  845,  780,  780,  780,  780,  780,  780,
         889,  780,  780,  780,  671,  671,  695,  810,  695,  815,  671,  676,
         695,  695,  665,  698,  665,  698,  788,  671,  722,  665,  665,  698,
         969,  803,  803,  966,  803,  660,  796,  660,  660,  803,  823,  905,
         748,  827,  722,  827,  827,  722,  827,  827,  885,  765,  899,  667,
         883,  778,  669,  946,  946,  669,  750,  751,  751,  802,  759,  778,
         800,  943,  759,  759,  969,  915,  770,  719,  852,  852,  969,  850,
         850,  850,  711,  920,  920,  896,  889,  885,  889,  849,  849,  889,
         780,  660,  780,  780,  716,  716,  780,  780,  716,  780, 1036, 1058,
        1134, 1265, 1038, 1260, 1028, 1052, 1188, 1021, 1211, 1210, 1302, 1215,
        1217, 1215, 1215, 1316, 1215, 12

In [26]:
sum(y_train[pred]==y_test[200:700]) 

tensor(490)