In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
def preprocessing(X, num_components = 50):
    num_features = X.shape[1]
    
    X = X - np.mean(X, axis=0) # centering
    X = X / X.std(axis=0) # standardization

    # PCA
    cov = np.cov(X.T)
    eig_vals, eig_vecs = np.linalg.eig(cov)

    for ev in eig_vecs:
        np.testing.assert_array_almost_equal(1.0, np.linalg.norm(ev))

    # Make a list of (eigenvalue, eigenvector) tuples
    eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]

    # Sort the (eigenvalue, eigenvector) tuples from high to low
    eig_pairs.sort()
    eig_pairs.reverse()

    pca_matrix = eig_pairs[0][1].reshape(num_features, 1)

    for i in range(1, num_components):
        pca_matrix = np.hstack((pca_matrix,
                                eig_pairs[i][1].reshape(num_features, 1)))

    X = X.dot(pca_matrix)

    # Whitening
    X = X / np.sqrt(eig_vals[:num_components])
    
    return X

In [31]:
def calc(x, y):
    return np.sqrt(np.sum(np.power(x - (y / 2), 2)))

***Inception v4***

In [4]:
X_1 = np.load("./embeddings_inception_v4.npy")
X_1 = preprocessing(X_1)

In [43]:
item = X_1[12000, :]
count = 0

for i in X_1:
    if calc(item, i) > (np.linalg.norm(i) / 2):
        count += 1

print(count)

13232


***Inception ResNet v1***

In [40]:
X_2 = np.load("./embeddings.npy")
X_2 = preprocessing(X_2)

In [None]:
loops = 10
means = np.zeros(loops)

count = 0
cnt = 0
lng = len(X_2)
points = np.zeros(lng)

for j in range(loops):
    print("Loop: ", j)
    
    for i in X_2:
        indexes = np.random.choice(lng, 1000)

        for k in X_2[indexes]:
            if calc(i, k) <= (np.linalg.norm(k) / 2):
                count += 1

        points[cnt] = count
        count = 0

        if cnt % 1000 == 0: 
            print(cnt)
        cnt += 1
        
    cnt = 0
    means[j] = points.mean()
    print(means)
    points = np.zeros(lng)

Loop:  0
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
[ 0.06634928  0.          0.          0.          0.          0.          0.
  0.          0.          0.        ]
Loop:  1
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
[ 0.06634928  0.06944759  0.          0.          0.          0.          0.
  0.          0.          0.        ]
Loop:  2
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
[ 0.06634928  0.06944759  0.06793622  0.          0.          0.          0.
  0.          0.          0.        ]
Loop:  3
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
[ 0.06634928  0.06944759  0.06793622  0.07126124  0.          0.          0.
  0.          0.          0.        ]
Loop:  4
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
[ 0.06634928  0.06944759  0.06793622  0.07126124  0.06854077  0.          0.
  0.          0.          0.        ]
Loop:  5
0
1000
2000
3000

In [75]:
points.mean()

0.070883397566689335