In [2]:
%load_ext autoreload
%autoreload 2
%cd ..

import numpy as np
from pathlib import Path
import logging
from latice.index.chroma_db import (
    ChromaLatentVectorDatabaseConfig,
    ChromaLatentVectorDatabase,
)
from latice.index.raw_dp_indexer import RawDiffractionPatternIndexer, RawIndexerConfig

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)

In [3]:
pattern_path = Path("data/N=100_noised.npy")
angles_path = Path("data/anglefile_N=100.txt")

batch_size = 32
random_seed = 42
image_size = (128, 128)
top_n = 10
orientation_threshold = 3.0

raw_dimension = image_size[0] * image_size[1]
print(f"Raw pattern dimension: {raw_dimension}")

Raw pattern dimension: 16384


In [4]:
chroma_raw_db_config = ChromaLatentVectorDatabaseConfig(
    persist_directory="notebook/.chroma_db_raw", dimension=raw_dimension, add_batch_size=300
)
chroma_raw_db = ChromaLatentVectorDatabase(config=chroma_raw_db_config)

2025-04-26 23:17:35,963 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2025-04-26 23:17:36,054 - latice.index.chroma_db - INFO - Created persistent ChromaDB at notebook/.chroma_db_raw
2025-04-26 23:17:36,056 - latice.index.chroma_db - INFO - Retrieved existing collection 'latent_vectors'


In [5]:
raw_indexer_config = RawIndexerConfig(
    pattern_path=pattern_path,
    angles_path=angles_path,
    batch_size=batch_size,
    random_seed=random_seed,
    image_size=image_size,
    top_n=top_n,
    orientation_threshold=orientation_threshold,
)

raw_indexer = RawDiffractionPatternIndexer(
    config=raw_indexer_config,
    db=chroma_raw_db, # Pass the pre-configured DB
)

2025-04-26 23:17:36,065 - latice.index.raw_dp_indexer - INFO - Using provided ChromaLatentVectorDatabase instance.
2025-04-26 23:17:36,066 - latice.index.raw_dp_indexer - INFO - Raw index dimensionality: 16384


In [6]:
# try:
#     raw_indexer.build_dictionary()
#     print(f"Index built successfully with {raw_indexer.db.get_count()} raw patterns.")
# except Exception as e:
#     print(f"Error building dictionary: {e}")

2025-04-26 12:50:55,313 - latice.index.raw_dp_indexer - INFO - Loading raw patterns from data/N=100_noised.npy using memory map
2025-04-26 12:50:55,320 - latice.index.raw_dp_indexer - INFO - Loading orientations from data/anglefile_N=100.txt
2025-04-26 12:50:55,789 - latice.index.raw_dp_indexer - INFO - Processing and adding patterns to DB in batches...


Output()

2025-04-26 18:36:39,682 - latice.index.raw_dp_indexer - INFO - Finished adding 333227 patterns to the database. Final DB count: 333227.


Index built successfully with 333227 raw patterns.


In [7]:
import time

batch_patterns_np = np.load("data/sample_pattern.npy")[:5]

start_time = time.time()
orientation_results_batch = raw_indexer.index_patterns_batch(
    batch_patterns_np, top_n=1, orientation_threshold=3.0
)
end_time = time.time()

print(
    f"Indexed {len(batch_patterns_np)} patterns in {end_time - start_time:.4f} seconds"
)
print(
    f"Average time per pattern: {(end_time - start_time) / len(batch_patterns_np):.4f} seconds"
)


Output()

Indexed 5 patterns in 90.1886 seconds
Average time per pattern: 18.0377 seconds


In [7]:
orientation_results_batch

[OrientationResult(query_vector=array([0., 0., 0., ..., 0., 0., 0.], shape=(16384,), dtype=float32), best_orientation=array([ 55.55121033,  58.85677405, 325.55121033]), candidate_orientations=array([[ 55.55121033,  58.85677405, 325.55121033],
        [182.57244414,  44.95050913, 133.25720072]]), distances=array([-1.43051147e-06,  1.33022039e-01]), mean_orientation=None, success=False, similar_indices=array([1])),
 OrientationResult(query_vector=array([0., 0., 0., ..., 0., 0., 0.], shape=(16384,), dtype=float32), best_orientation=array([ 55.05679817,  59.1744195 , 325.05679817]), candidate_orientations=array([[ 55.05679817,  59.1744195 , 325.05679817],
        [181.32338764,  44.75745748, 134.31037252]]), distances=array([-1.35898590e-05,  1.43268653e-01]), mean_orientation=None, success=False, similar_indices=array([1])),
 OrientationResult(query_vector=array([0., 0., 0., ..., 0., 0., 0.], shape=(16384,), dtype=float32), best_orientation=array([ 54.557532  ,  59.47748504, 324.557532  ]