In [1]:
import torch
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def OPW_w(x: torch.Tensor, y: torch.Tensor, a: torch.Tensor = None, b: torch.Tensor = None, std=1, verbose=0, lambda1=0.1, lambda2=10,
          tol=.5e-2, maxIter=20, p_norm='inf', metric='sqreuclidean'):
    assert y.size(1) == x.size(1), "The dimensions of instances in the input sequences must be the same!"
    N = x.size(0)
    M = y.size(0)
    col_x = torch.arange(1, N+1)/N
    col_x = col_x.view(N, 1)
    col_y = torch.arange(1, M+1)/M
    relative_pos = col_x-col_y

    l = torch.abs(relative_pos) / ((1/N**2 + 1/M**2)**0.5)
    P = torch.exp(-l**2/(2*std**2)) / (std*(2*np.pi)**0.5)

    S = lambda1 / (relative_pos**2 + 1)

    D = pdist2(x, y, metric=metric)

    K = P * torch.exp((S - D) / lambda2)

    if a is None:
        a = torch.ones(N, 1) / N

    if b is None:
        b = torch.ones(M, 1) / M

    ainvK = K / a   # [N, M]

    iter = 0
    u = torch.ones(N, 1) / N
    while iter < maxIter:
        u = 1. / torch.matmul(ainvK, (b / (torch.matmul(K.T, u))))
        
        iter += 1
        if iter % 20 == 1 or iter == maxIter:
            v = b / torch.matmul(K.T, u)    # [M, 1]
            u = 1 / torch.matmul(ainvK, v)  # [N, 1]

            criterion = torch.sum(torch.abs(v * torch.matmul(K.T, u) - b), dim=0)
            criterion = criterion.norm(p=float(p_norm))
            if abs(criterion) < tol:
                break

            iter += 1
            if verbose > 0:
                print(f"Iteration : {iter}, Criterion: {criterion}")

    U = K * D   # [N, M]
    
    dist = torch.sum(u * torch.matmul(U, v), dim=0)
    T = v.T * (u * K)
 
    return dist, T

In [3]:
def pdist2(X, Y, metric='sqreuclidean'):
    if metric.lower() == 'sqreuclidean':
        return distEucSqr(X, Y)
    elif metric.lower() == 'euclidean':
        return torch.sqrt(distEucSqr(X, Y))
    elif metric.lower() == 'L1':
        return distL1(X, Y)
    elif metric.lower() == 'cosine':
        return distCosine(X, Y)
    elif metric.lower() == 'emd':
        return distEmd(X, Y)
    elif metric.lower() == 'chisqr':
        return distChiSqr(X, Y)
    else:
        raise NotImplementedError(f'pdist - unknown metric: {metric}')


def distL1(x: torch.Tensor, y: torch.Tensor):
    return torch.abs(x.unsqueeze(1) - y).sum(dim=-1)


def distCosine(x: torch.Tensor, y: torch.Tensor, eps=1e-8):
    assert x.dtype == torch.float or y.dtype == torch.float, "Inputs must be of type float"
    cos = torch.nn.CosineSimilarity(dim=-1, eps=eps)
    return 1 - cos(x.unsqueeze(1), y)


def distEmd(x: torch.Tensor, y: torch.Tensor):
    x_cdf = torch.cumsum(x, dim=-1)
    y_cdf = torch.cumsum(y, dim=-1)

    return torch.abs(x_cdf.unsqueeze(1) - y_cdf).sum(dim=-1)


def distEucSqr(x: torch.Tensor, y: torch.Tensor):
    return torch.cdist(x, y, p=2)**2


def distChiSqr(x: torch.Tensor, y: torch.Tensor, eps=1e-10):
    a = x.unsqueeze(1) + y
    b = x.unsqueeze(1) - y
    return (b**2 / (a + eps)).sum(dim=-1) / 2

In [4]:
def get_utterances(path):
  X = []
  utterances = []
  count = 0
  with open(path, 'r') as f:
      length_X = len([line for line in f.read().splitlines()])
  with open(path, 'r') as f:
      for line in f.read().splitlines():
          count += 1
          frame = [-abs(float(i)) if i.startswith('-') else abs(float(i)) for i in line.split()]
          if len(frame) > 0:
            utterances.append(frame)
            if count == length_X:
              X.append(utterances)
          else:
            if len(utterances) > 0:
              X.append(utterances)
              utterances = []
          
  return X

In [5]:
X_train = get_utterances(path="/content/Train_Arabic_Digit.txt")
X_test = get_utterances(path="/content/Test_Arabic_Digit.txt")
print(f"Number of training sample: {len(X_train)}")
print(f"Number of testing sample: {len(X_test)}")

Number of training sample: 6600
Number of testing sample: 2200


In [6]:
import torch
import numpy as np
import json 

n_class = torch.Tensor(torch.arange(0, 10)).view(10, 1)
y_train = n_class.expand_as(torch.empty((10, 660))).contiguous().view(6600)
y_test = n_class.expand_as(torch.empty((10, 220))).contiguous().view(2200)

In [7]:
with open('fe.json', 'w') as f:
    json.dump(3, f)

In [8]:
W_dist = []
for idx_X, X in enumerate(X_test):
    if idx_X < 1200 and idx_X >= 700:
      print(f'Calculating all distances of sample {idx_X+1}')
      W_X = []
      for idx_Y, Y in enumerate(X_train):
          dist, minT = OPW_w(torch.Tensor(X), torch.Tensor(Y), verbose=0, lambda1=0.1, lambda2=10)
          W_X.append(dist.item())
      W_dist.append(W_X)

with open('/content/distances700-1200.json', 'w') as f:
    json.dump(W_dist, f)

Calculating all distances of sample 701
Calculating all distances of sample 702
Calculating all distances of sample 703
Calculating all distances of sample 704
Calculating all distances of sample 705
Calculating all distances of sample 706
Calculating all distances of sample 707
Calculating all distances of sample 708
Calculating all distances of sample 709
Calculating all distances of sample 710
Calculating all distances of sample 711
Calculating all distances of sample 712
Calculating all distances of sample 713
Calculating all distances of sample 714
Calculating all distances of sample 715
Calculating all distances of sample 716
Calculating all distances of sample 717
Calculating all distances of sample 718
Calculating all distances of sample 719
Calculating all distances of sample 720
Calculating all distances of sample 721
Calculating all distances of sample 722
Calculating all distances of sample 723
Calculating all distances of sample 724
Calculating all distances of sample 725


In [9]:
len(W_dist)

500

In [10]:
W_dist = json.load(open('/content/distances700-1200.json'))

In [11]:
# for i in range(len(W_dist)):
#   W_dist[i][i+1000] = 1e9
w_dist = torch.Tensor(W_dist)

In [12]:
pred = w_dist.argmin(dim=1)

In [13]:
pred

tensor([2288, 2288, 2120, 2120, 2120, 2120, 2120, 2120, 2112, 2120, 2078, 2136,
        2136, 2075, 2075, 2067, 2067, 2042, 2075, 2016, 2203, 2074, 2074, 2095,
        2104, 2016, 1988, 1988, 1988, 1988, 2073, 2084, 2266, 2073, 1999, 1999,
        2001, 1999, 1999, 1999, 2288, 2243, 2289, 2288, 2289, 2289, 2288, 2289,
        2003, 2162, 1993, 4624, 2100, 2100, 2244, 2101, 2533, 2470, 2470, 2088,
        2088, 2088, 2288, 2479, 2249,  882, 2470, 2208, 2479, 2288, 2506, 2590,
        2507, 2507, 2592, 2345, 2507, 2507, 2507, 2507, 2400, 2533, 2535, 2535,
        2535, 2535, 2600, 2535, 2609, 2609, 2550, 2349, 2592, 2592, 2550, 2592,
        2321, 2550, 2592, 2592, 2600, 2559, 2600, 2563, 2608, 2601, 2473, 2567,
        2627, 2320, 2439, 2436, 2583, 2585, 2435, 2523, 2522, 2435, 2585, 2435,
        2365, 2603, 2579, 2601, 2470, 2470, 2559, 2579, 2470, 2466, 2562, 2521,
        2364, 2459, 2562, 2562, 2562, 2562, 2561, 2561, 2466, 2480, 2533, 2533,
        2458, 2435, 2435, 2586, 2586, 25

In [14]:
sum(y_train[pred]==y_test[700:1200]) 

tensor(485)