In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
NUM_CLASSES = 10
TRAIN_DATA_DIR = '../dataset/mnist-in-csv/mnist_train.csv'
TEST_DATA_DIR = '../dataset/mnist-in-csv/mnist_test.csv'
DATASET_ROOT_DIR = '../dataset/'

In [3]:
def create_triplets(x, digit_indices):
    '''
    anchor with positive and negative triplet creation.
    '''
    triplets = []
    n = min([len(digit_indices[d]) for d in range(NUM_CLASSES)]) - 1
    for d in range(NUM_CLASSES):
        for i in range(n):
            inc = random.randrange(1, NUM_CLASSES)
            dn = (d + inc) % NUM_CLASSES
            z1, z2, z3 = digit_indices[d][i], digit_indices[d][i + 1], digit_indices[dn][i]
            triplets += [[x[z1], x[z2], x[z3]]]
#             triplets += [x[z1], x[z2], x[z3]]
            
    return np.array(triplets)

In [4]:
train_df = pd.read_csv(TRAIN_DATA_DIR)
test_df = pd.read_csv(TEST_DATA_DIR)

In [5]:
m_train = train_df.shape[0]
m_test = test_df.shape[0]

In [6]:
X_train = np.array(train_df.loc[:, train_df.columns != 'label'].values.reshape(m_train, 28, 28, 1), dtype=np.float64)
y_train = train_df['label'].values
X_test = np.array(test_df.loc[:, test_df.columns != 'label'].values.reshape(m_test, 28, 28, 1), dtype=np.float64)
y_test = test_df['label'].values

In [7]:
X_train /= 255
X_test /= 255
input_shape = X_train.shape[1:]

In [8]:
digit_indices = [np.where(y_train == i)[0] for i in range(NUM_CLASSES)]
tr_triplets = create_triplets(X_train, digit_indices)

digit_indices = [np.where(y_test == i)[0] for i in range(NUM_CLASSES)]
ts_triplets = create_triplets(X_test, digit_indices)

In [9]:
print('train X shape:', tr_triplets.shape)
print('test X shape:', ts_triplets.shape)

train X shape: (162600, 28, 28, 1)
test X shape: (26730, 28, 28, 1)


In [10]:
np.save(DATASET_ROOT_DIR + 'mnist-triplet-loss/tr_triplets.npy', tr_triplets)
np.save(DATASET_ROOT_DIR + 'mnist-triplet-loss/ts_triplets.npy', ts_triplets)