# Import thư viện cần thiết

In [1]:
import numpy as np
from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
from sklearn.metrics import pairwise_distances
import time

# Tạo dữ liệu mẫu

In [2]:
# Parameters
n_samples = 1000  # Số điểm dữ liệu (n)
n_features = 10000  # Chiều gốc cao (D)
target_dim = 100  # Chiều giảm (k << D)

# Generate random data (uniform [0,1])
X = np.random.rand(n_samples, n_features)

# Nếu data heavy-tailed (như PDF), apply term weighting
# Ví dụ sqrt weighting: X = np.sqrt(X)  # Hoặc 1 + np.log(X + 1) cho log

# Compute original pairwise distances

In [3]:
dist_original = pairwise_distances(X)  # Euclidean distances
mask = dist_original > 0  # Bỏ qua diagonal (self-distance=0) để tránh divide by zero

# Implement Gaussian Random Projection

In [4]:
# Gaussian (conventional, dense)
grp = GaussianRandomProjection(n_components=target_dim, random_state=42)  # random_state cho reproducible

start_time = time.time()
X_grp = grp.fit_transform(X)
time_grp = time.time() - start_time

dist_grp = pairwise_distances(X_grp)
error_grp = np.mean(np.abs(dist_grp[mask] - dist_original[mask]) / dist_original[mask])  # Mean relative error

# Implement Sparse Random Projection

In [5]:
# Sparse (thưa thớt)
density = 1 / np.sqrt(n_features)  # Very Sparse như PDF (s=√D)
# Hoặc density=1/3 cho Sparse Achlioptas (s=3)

srp = SparseRandomProjection(n_components=target_dim, density=density, random_state=42)

start_time = time.time()
X_srp = srp.fit_transform(X)
time_srp = time.time() - start_time

dist_srp = pairwise_distances(X_srp)
error_srp = np.mean(np.abs(dist_srp[mask] - dist_original[mask]) / dist_original[mask])

# Output

In [17]:
print(f"Gaussian: Time = {time_grp:.4f}s, Mean Relative Error = {error_grp:.4f}")
print(f"Sparse (density={density:.4f}): Time = {time_srp:.4f}s, Mean Relative Error = {error_srp:.4f}")

Gaussian: Time = 0.0867s, Mean Relative Error = 0.0560
Sparse (density=0.0100): Time = 0.1543s, Mean Relative Error = 0.0563
