In [1]:
import os
import glob
import time
import shutil

import numpy as np

import hapod as hp


In [None]:
memory_avail_total = hp.get_memory_size()
memory_forbidden = 2**30
print(f"total available memory is {memory_avail_total / 2**30:.2f} GB")
print(f"but {memory_forbidden / 2**30:.2f} GB will be made unavailable")

dtype = np.float64
n_rows = 3600000

n_svd_max_cols = hp.get_max_svd_columns(n_rows, 
                                memory_limit=memory_avail_total - memory_forbidden)
print(f"the largest matrix for SVD is {n_rows, n_svd_max_cols} using {dtype}")

n_chunk_max_cols = n_svd_max_cols // 2
print(f"the hapod can use chunks of {n_chunk_max_cols} columns")


total available memory is 15.56 GB
but 1.00 GB will be made unavailable
the largest matrix for SVD is (3600000, 124) using <class 'numpy.float64'>
the hapod can use chunks of 62 columns


In [3]:
n_cols = 1000
snapshots_matrix_memory = hp.get_matrix_memory_footprint((n_rows, n_cols))
print(f"a snapshots matrix of size {n_rows, n_cols} would use {snapshots_matrix_memory / 2**30:.2f} GB of memory")

n_chunks = hp.get_n_chunks_balanced(n_cols, n_chunk_max_cols=n_chunk_max_cols)
print(f"for a balanced, full, merge tree, will need {n_chunks} chunks with maximum size {n_chunk_max_cols} >= {n_cols / n_chunks:.3f} average")


a snapshots matrix of size (3600000, 1000) would use 26.82 GB of memory
for a balanced, full, merge tree, will need 32 chunks with maximum size 62 >= 31.250 average


In [4]:
work_dir = "/scratch/lfabris/hapod_test"
os.makedirs(work_dir, exist_ok=True)


In [5]:
refresh_snapshots = True
snapshots_dir = os.path.join(work_dir, "snapshots")
print(f"simulating a snapshot matrix with size {(n_rows, n_cols)} under {snapshots_dir}")

if not os.path.isdir(snapshots_dir) or refresh_snapshots:
    print(f"create snapshots under {snapshots_dir}")
    shutil.rmtree(snapshots_dir)
    os.makedirs(snapshots_dir, exist_ok=True)

    print(
        f"storing {snapshots_matrix_memory / 2**30:.3f} GB worth of snapshots"
    )

    rng = np.random.default_rng()

    elapsed_snapshots = -time.perf_counter()
    snapshots_fnames = []
    for i in range(n_cols):
        snapshot_fname = os.path.join(snapshots_dir, f"snapshot_{i:04d}.npy")
        np.save(snapshot_fname, rng.random((n_rows, 1)))

        snapshots_fnames.append(snapshot_fname)
    elapsed_snapshots += time.perf_counter()
    print(f"created {len(snapshots_fnames)} snapshot files in {elapsed_snapshots:.3f}")
else:
    snapshots_fnames = list(glob.glob(os.path.join(snapshots_dir, "*.npy")))
    print(f"found {len(snapshots_fnames)} snapshot files in {snapshots_dir}")
    

simulating a snapshot matrix with size (3600000, 1000) under /scratch/lfabris/hapod_test/snapshots
create snapshots under /scratch/lfabris/hapod_test/snapshots
storing 26.822 GB worth of snapshots
created 1000 snapshot files in 370.720


In [6]:
refresh_chunks = True
chunks_dir = os.path.join(work_dir, "chunks")
print(f"simulating chunks of maximum size {n_rows, n_chunk_max_cols} under {chunks_dir}")

if not os.path.isdir(chunks_dir) or refresh_chunks:
    print(f"create chunks under {chunks_dir}")
    shutil.rmtree(chunks_dir)
    os.makedirs(chunks_dir, exist_ok=True)

    elapsed_chunks = -time.perf_counter()
    chunks_fnames = hp.make_chunks(
        snapshots_fnames,
        chunks_dir,
        n_chunks=n_chunks,
    )
    elapsed_chunks += time.perf_counter()
    print(f"created {len(chunks_fnames)} column chunks files in {elapsed_chunks:.3f}")
else:
    chunks_fnames = list(glob.glob(os.path.join(chunks_dir, "*.npy")))
    print(f"found {len(chunks_fnames)} column chunks files in {chunks_dir}")


simulating chunks of maximum size (3600000, 62) under /scratch/lfabris/hapod_test/chunks
create chunks under /scratch/lfabris/hapod_test/chunks
created 32 column chunks files in 812.720


In [7]:
hapod_tmp_dir = os.path.join(work_dir, "tmp")

elapsed_hapod = -time.perf_counter()
Uu, ss = hp.hapod(chunks_fnames,
                rank_max=n_chunk_max_cols,
                temp_work_dir=hapod_tmp_dir,
                verbose=True)
elapsed_hapod += time.perf_counter()

print(f"finished hapod in {elapsed_hapod:.3f}")
print(f"    U.shape {Uu.shape}")
print(f"    ss.shape {ss.shape}")

np.save(os.path.join(work_dir, "U.npy"), Uu)
np.save(os.path.join(work_dir, "s.npy"), ss)


Xs
    /scratch/lfabris/hapod_test/chunks/chunk_0000.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0001.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0002.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0003.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0004.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0005.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0006.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0007.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0008.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0009.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0010.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0011.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0012.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0013.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0014.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0015.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0016.npy
    /scratch/lfabris/hapod_test/chunks/chunk_0017.npy
    /scratch/lfabris/hapo