In [None]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.6 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.35.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.35.0-py3-none-any.whl.metadata (2.4 kB)
Collecting opentelemetry-sdk>=1.2.0 (from chromadb)
  Downloading opentelemetry_sdk-1.35.0-py3-none-any.whl.metadata (1.5 k

In [None]:
!pip install transformers torchvision torch pillow tmm

Collecting tmm
  Downloading tmm-0.2.0-py3-none-any.whl.metadata (2.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cu

In [None]:
from transformers import AutoImageProcessor, AutoModel
from PIL import Image
import torch
import torch.nn.functional as F

processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
model = AutoModel.from_pretrained("facebook/dinov2-base")
model.eval()

def get_dino_embedding(image_path):
    print("Image path: ", image_path)
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**inputs)
        embedding = outputs.last_hidden_state[:, 0, :].squeeze()
        normalized = torch.nn.functional.normalize(embedding, dim=0)
        return normalized.detach().cpu().numpy()

In [None]:
embedding = get_dino_embedding("/content/m1.jpg")
print("Shape: ", embedding.shape)
print("Embedding: ", embedding[:10])

Image path:  /content/m1.jpg
Shape:  (768,)
Embedding:  [-0.01656391 -0.03419908  0.01063037  0.0423287   0.07476431 -0.01908791
 -0.06424918 -0.03269591 -0.03867481 -0.02863607]


In [None]:
embedding2 = get_dino_embedding("/content/m4.jpg")
print("Shape: ", embedding2.shape)
print("Embedding: ", embedding2[:10])

Image path:  /content/m4.jpg
Shape:  (768,)
Embedding:  [-0.00243679 -0.0261363   0.03868091 -0.01036552  0.04572733  0.00226297
 -0.03249563 -0.00886352 -0.07982017 -0.07103857]


In [None]:
import chromadb
from chromadb.config import Settings
import os

os.environ["CHROMADB_TELEMETRY_ENABLED"] = "false"
client = chromadb.Client()

In [None]:
collection = client.get_or_create_collection(name="cow_muzzle_embeddings")

In [None]:
# collection.delete(where_document={"$contains": "I"})

In [None]:
print("Collection count: ", collection.count())

Collection count:  0


In [None]:
image_folder = "/content/muzzles"
image_files = [f for f in os.listdir(image_folder) if f.endswith(('.jpg', '.png', '.jpeg'))]

for idx, filename in enumerate(image_files):
    image_path = os.path.join(image_folder, filename)
    print(f"Processing: {image_path}")
    vector = get_dino_embedding(image_path)
    collection.add(
        ids=[f"img_{idx}"],
        embeddings=[vector],
        metadatas=[{"filename": filename}],
        documents=[image_path]
    )
    print(f"✅ Stored: {filename}")

Processing: /content/muzzles/m2.jpg
Image path:  /content/muzzles/m2.jpg
✅ Stored: m2.jpg
Processing: /content/muzzles/m1.jpg
Image path:  /content/muzzles/m1.jpg
✅ Stored: m1.jpg
Processing: /content/muzzles/m4.jpg
Image path:  /content/muzzles/m4.jpg
✅ Stored: m4.jpg
Processing: /content/muzzles/IMG20231118114156.jpg
Image path:  /content/muzzles/IMG20231118114156.jpg
✅ Stored: IMG20231118114156.jpg


In [None]:
query_vector = get_dino_embedding("/content/cow_muzzle.jpg")
results = collection.query(query_embeddings=[query_vector], n_results=1)

if results["distances"] and results["documents"][0]:
    match_score = 1 - results["distances"][0][0]  # cosine similarity
    percentage = match_score * 100

    print(f"Match Score: {percentage:.2f}%")
    print(f"Match Document: {results['documents'][0][0]}")

    if percentage > 85:
        print("Same cow (Registered)")
    elif percentage > 75:
        print("Possibly same cow, manual review suggested")
    else:
        print("New cow — not in database")
else:
    print("Error: No results found")

Image path:  /content/cow_muzzle.jpg
Match Score: 58.82%
Match Document: /content/muzzles/m1.jpg
New cow — not in database
