In [1]:
import sys
import os

# 프로젝트 루트 디렉토리 경로를 추가
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [2]:
import jax
import jax.numpy as jnp
import time
#disable jax JIT
#jax.config.update("jax_disable_jit", True)

from tqdm.autonotebook import trange
from functools import partial
from JAxtar.hash import hash_func_builder
from puzzle.slidepuzzle import SlidePuzzle
from heuristic.slidepuzzle_heuristic import SlidePuzzleHeuristic
from JAxtar.hash import HashTable

  from tqdm.autonotebook import trange
2024-07-16 04:44:52.589098: W external/xla/xla/service/gpu/nvptx_compiler.cc:765] The NVIDIA driver's CUDA version is 12.2 which is older than the ptxas CUDA version (12.5.82). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


In [3]:
count = int(1e6)
puzzle = SlidePuzzle(4)
hash_func = hash_func_builder(puzzle.State)
sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(2),count))
new_sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(1),count))
table = HashTable.make_lookup_table(puzzle.State, 1, int(1e7))

lookup = jax.jit(partial(HashTable.lookup, hash_func))
start = time.time()
idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
print(time.time()-start)
#print(seeds, idx, table_idx, found)

0.4048776626586914


In [4]:
batch = 10000
parallel_insert = jax.jit(partial(HashTable.parallel_insert, hash_func))
for i in range(10):
    inserteds = []
    sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(i + 256),count))
    idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
    same_ratio = jnp.mean(found)
    for j in trange(0, count, batch):
        table, inserted = parallel_insert(table, sample[j:j+batch])
        inserteds.append(inserted)
    inserteds = jnp.concatenate(inserteds)
    idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
    print(jnp.mean(found), same_ratio, jnp.mean(inserteds))

100%|██████████| 100/100 [00:01<00:00, 68.13it/s]


1.0 0.0 1.0


100%|██████████| 100/100 [00:00<00:00, 595.20it/s]


1.0 0.0 1.0


100%|██████████| 100/100 [00:00<00:00, 577.40it/s]


1.0 0.0 1.0


100%|██████████| 100/100 [00:00<00:00, 579.69it/s]


1.0 1e-06 0.999999


100%|██████████| 100/100 [00:00<00:00, 534.25it/s]


1.0 0.0 1.0


100%|██████████| 100/100 [00:00<00:00, 538.81it/s]


1.0 2e-06 0.999998


100%|██████████| 100/100 [00:00<00:00, 508.19it/s]


1.0 0.0 1.0


100%|██████████| 100/100 [00:00<00:00, 493.83it/s]


1.0 1e-06 0.999998


100%|██████████| 100/100 [00:00<00:00, 467.84it/s]


1.0 1e-06 0.999999


100%|██████████| 100/100 [00:00<00:00, 447.61it/s]

1.0 0.0 1.0





In [5]:
another_sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(123),count))
idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, another_sample)
print(jnp.sum(found) / count)

1e-06


In [6]:
start = time.time()
inserteds = []
for i in trange(count // batch):
    table, inserted = parallel_insert(table, another_sample[i*batch:(i+1)*batch])
    inserteds.append(inserted)
print(jnp.sum(jnp.concatenate(inserteds)) / count)
print("insert time:", time.time()-start)
start = time.time()
idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
print("check time:", time.time()-start)
print(jnp.sum(found) / count)
start = time.time()
idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, new_sample)
print("check time:", time.time()-start)
print(jnp.sum(found) / count)
idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, another_sample)
print("check time:", time.time()-start)
print(jnp.sum(found) / count)

100%|██████████| 100/100 [00:00<00:00, 399.26it/s]

0.999999
insert time: 0.25668883323669434
check time: 0.010768651962280273
1.0
check time: 0.012304544448852539
1e-06
check time: 0.025035381317138672
1.0



