In [1]:
import numpy as np

# dummy 1000 * 200 np.array containing random numbers and 50% missing values randomly distributed
X = np.random.rand(5000, 100)
X[X < 0.5] = np.nan

# Same thing but only work with sparse matrices
from scipy.sparse import random
X_sparse = random(500000, 1000, density=0.05, format='csr')

In [2]:
def random_projections(X, n_components=100):
    n_samples, n_features = X.shape
    components = np.random.normal(size=(n_features, n_components))
    
    X_new = np.empty((n_samples, n_components))
    for i in range(n_components):
        X_new[:,i] = np.nansum(X * components[:,i], axis=1)

    return X_new

def random_projections_sparse(X, n_components=100):
    n_samples, n_features = X.shape
    components = np.random.normal(size=(n_features, n_components))
    return X.dot(components)

In [3]:
X_new = random_projections(X, n_components=10)

In [5]:
X_new.shape

(5000, 10)

In [6]:
X_new_sparse = random_projections_sparse(X_sparse, n_components=10)

In [9]:
X_new_sparse.shape

(500000, 10)