In [1]:
import sys
import os

# 프로젝트 루트 디렉토리 경로를 추가
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [2]:
import jax
import jax.numpy as jnp
import time
#disable jax JIT
#jax.config.update("jax_disable_jit", True)

from tqdm.autonotebook import trange
from functools import partial
from JAxtar.hash import hash_func_builder
from puzzle.slidepuzzle import SlidePuzzle
from heuristic.slidepuzzle_heuristic import SlidePuzzleHeuristic
from JAxtar.hash import HashTable

  from tqdm.autonotebook import trange


In [3]:
def _check_equal(state1, state2):
                tree_equal = jax.tree.map(lambda x, y: jnp.all(x == y), state1, state2)
                return jax.tree_util.tree_reduce(jnp.logical_and, tree_equal)

In [4]:
count = 1000
puzzle = SlidePuzzle(4)
hash_func = hash_func_builder(puzzle.State)
sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(2),count))
new_sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(1),count))
table = HashTable.build(puzzle.State, 1, int(1e4))

lookup = jax.jit(partial(HashTable.lookup, hash_func))
start = time.time()
idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
print(time.time()-start)

2024-10-17 00:56:08.026712: W external/xla/xla/service/gpu/nvptx_compiler.cc:930] The NVIDIA driver's CUDA version is 12.2 which is older than the PTX compiler version 12.6.77. Because the driver is older than the PTX compiler version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


0.3649106025695801


In [5]:
batch = 4000
parallel_insert = jax.jit(partial(HashTable.parallel_insert, hash_func))
for i in range(10):
    sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(i + 256),count))
    fidx, ftable_idx, old_found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
    batched_sample, filled = HashTable.make_batched(puzzle.State, sample, batch)
    start = time.time()
    table, inserted, _, _ = parallel_insert(table, batched_sample, filled)
    print(time.time()-start)
    idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
    for i in range(count):
        if found[i]:
            continue
        print(fidx[i], ftable_idx[i])
        print(table.table[fidx[i], ftable_idx[i]])
        print(idx[i], table_idx[i])
        print(sample[i])
        print(table.table[idx[i], table_idx[i]])
        s = sample[i]
        equal = jax.vmap(jax.vmap(_check_equal, in_axes=(0, None)), in_axes=(0, None))(table.table, s)
        args = jnp.argwhere(equal)
        arg_sample = args[0]
        print(arg_sample)
        print(table.table[arg_sample[0], arg_sample[1]])
        assert jnp.any(equal), f"not found {s}" 
    print(jnp.mean(found), jnp.mean(old_found), jnp.mean(inserted))

1.7550759315490723
1.0 0.0 0.25
1.7681143283843994
1.0 0.0 0.25
0.0022423267364501953
1.0 0.0 0.25
0.0030078887939453125
1.0 0.0 0.25
0.002910614013671875
1.0 0.0 0.25
0.002827167510986328
1.0 0.0 0.25
0.0030167102813720703
1.0 0.0 0.25
0.003612995147705078
1.0 0.0 0.25
0.0043795108795166016
1.0 0.0 0.25
0.003906965255737305
1.0 0.0 0.25


In [6]:
count = int(1e6)
puzzle = SlidePuzzle(4)
hash_func = hash_func_builder(puzzle.State)
sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(2),count))
new_sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(1),count))
table = HashTable.build(puzzle.State, 1, int(1e7))

lookup = jax.jit(partial(HashTable.lookup, hash_func))
start = time.time()
idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
print(time.time()-start)

0.36997246742248535


In [7]:
batch = 100000
parallel_insert = jax.jit(partial(HashTable.parallel_insert, hash_func))
for i in range(10):
    inserteds = []
    sample = jax.vmap(puzzle.get_initial_state)(key=jax.random.split(jax.random.PRNGKey(i + 256),count))
    idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
    same_ratio = jnp.mean(found)
    for j in trange(0, count, batch):
        table, inserted, _, table_idx = parallel_insert(table, sample[j:j+batch], jnp.ones(batch, dtype=jnp.bool_))
        inserteds.append(inserted)
    inserteds = jnp.concatenate(inserteds)
    idx, table_idx, found = jax.vmap(lookup, in_axes=(None, 0))(table, sample)
    print(jnp.mean(found), same_ratio, jnp.mean(inserteds))

100%|██████████| 10/10 [00:05<00:00,  1.85it/s]


1.0 0.0 1.0


100%|██████████| 10/10 [00:00<00:00, 171.17it/s]

1.0 0.0 1.0



100%|██████████| 10/10 [00:00<00:00, 241.48it/s]


1.0 0.0 1.0


100%|██████████| 10/10 [00:00<00:00, 233.34it/s]


1.0 2e-06 0.999998


100%|██████████| 10/10 [00:00<00:00, 201.23it/s]


1.0 1e-06 0.999999


100%|██████████| 10/10 [00:00<00:00, 195.59it/s]


1.0 0.0 1.0


100%|██████████| 10/10 [00:00<00:00, 176.04it/s]


1.0 1e-06 0.999999


100%|██████████| 10/10 [00:00<00:00, 170.51it/s]


1.0 0.0 1.0


100%|██████████| 10/10 [00:00<00:00, 152.52it/s]


1.0 1e-06 0.999999


100%|██████████| 10/10 [00:00<00:00, 138.88it/s]

1.0 1e-06 0.999999



