# 🏛️ Content-Based Image Retrieval with CLIP + ChromaDB

This notebook demonstrates the usage of the project in Colab environment.

Steps:
1. Install dependencies
2. Clone repo & import modules
3. Prepare dataset
4. Build embeddings & ChromaDB collection
5. Run retrieval and visualize results

In [None]:
# Step 1: Install dependencies
!pip install -r requirements.txt

In [None]:
# Step 2: Imports
import os, glob, random
from dataset_prep import prepare_dataset
from utils import get_files_path, plot_results
from retrieval import create_collection, add_embedding, search

In [None]:
# Step 3: Prepare dataset
ROOT = "data"
TRAIN_PATH = f"{ROOT}/train"
src_zip = "archive.zip"  # make sure dataset zip is uploaded
extract_folder = "dataset_raw"

prepare_dataset(src_zip, extract_folder, TRAIN_PATH)
class_names = sorted(os.listdir(TRAIN_PATH))
print("There are", len(class_names), "classes.")

In [None]:
# Step 4: Build collection
files_path = get_files_path(path=TRAIN_PATH, class_names=class_names)

l2_collection = create_collection(name="l2_collection", space="l2")
add_embedding(l2_collection, files_path)

In [None]:
# Step 5: Test retrieval
random_class = random.choice(class_names)
test_files = glob.glob(f"{TRAIN_PATH}/{random_class}/*.*")
test_path = random.choice(test_files)
print("Choosen class:", random_class)
print("Testing image:", test_path)

results = search(test_path, l2_collection, n_results=5)
plot_results(test_path, results)