In [None]:
import itertools
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics import pairwise_distances

In [None]:
def gen_sparse_matrix(X):
    X[X < 0] = 0
    X_csr = csr_matrix(X)
    return X_csr

In [None]:
def dist_sparse_sparse(X, Y, metric="euclidean"):
    X_csr = gen_sparse_matrix(X)
    Y_csr = gen_sparse_matrix(Y)
    n_X, n_Y = X.shape[0], Y.shape[0]
    distances = np.empty(shape=(n_X, n_Y))
    
    for i in range(n_X):
        for j in range(n_Y):
            dist = 0
            X_indices = X_csr.indices[X_csr.indptr[i]:X_csr.indptr[i+1]]
            Y_indices = Y_csr.indices[Y_csr.indptr[j]:Y_csr.indptr[j+1]]
            k, l = 0, 0
            while k < len(X_indices) and l < len(Y_indices):
                if X_indices[k] == Y_indices[l]:
                    dist += (X_csr.data[k] - Y_csr.data[l]) ** 2
                    k += 1
                    l += 1
                elif k < l:
                    k += 1
                else:
                    l += 1
            distances[i, j] = np.sqrt(dist)

    return distances

In [None]:
def test_correctness(n_samples, n_features, metric="euclidean"):
    X = np.random.randint(-10, 10, size=(n_samples, n_features))
    Y = np.random.randint(-10, 10, size=(n_samples, n_features))
    
    expected_distances = pairwise_distances(X, Y, metric=metric)
    actual_distances = dist_sparse_sparse(X, Y, metric=metric)

    np.testing.assert_allclose(actual_distances, expected_distances, rtol=1e-04)

In [None]:
for n_samples, n_features in itertools.product([10, 100, 1000, 10000], [10, 100]):
    print(f"(n_samples, n_features): ({n_samples}, {n_features})")
    test_correctness(n_samples, n_features)