In [1]:
%pwd

'c:\\Users\\HP\\Desktop\\E-Vision-Projects\\Product_Count_API\\research'

In [2]:
import os 
%cd ..
os.getcwd()

c:\Users\HP\Desktop\E-Vision-Projects\Product_Count_API


'c:\\Users\\HP\\Desktop\\E-Vision-Projects\\Product_Count_API'

In [3]:
import torch
import torchvision.transforms as transforms
from torchvision import models
import cv2
import chromadb
import numpy as np
import glob
from PIL import Image

In [5]:
# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="./data/chroma_product_db")  # Persistent storage

# Create a collection for storing product embeddings
# collection = chroma_client.get_or_create_collection(name="product_embeddings")

collection =chroma_client.get_or_create_collection(
    name="collection_name",
    metadata={"hnsw:space": "cosine"} # l2 is the default
)

In [6]:
# Load pre-trained ResNet50 for feature extraction
class FeatureExtractor(torch.nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        model = models.resnet50(pretrained=True)
        self.feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])  # Remove classification layer

    def forward(self, x):
        x = self.feature_extractor(x)
        return x.view(x.size(0), -1)  # Flatten output

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
feature_extractor = FeatureExtractor().to(device).eval()

# Image transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

def extract_cnn_features(image):
    """Extracts feature embeddings using ResNet50."""
    image = Image.open(image).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)  # Transform & Add batch dim

    with torch.no_grad():
        features = feature_extractor(image).cpu().numpy().flatten()

    return features  # Return feature embedding



In [7]:
def store_embedding_chromadb(product_name, embedding):
    """Stores multiple embeddings for the same product in ChromaDB."""
    collection.add(
        ids=[f"{product_name}_{np.random.randint(100000)}"],  # Unique ID for each image
        embeddings=[embedding.tolist()],  # Convert to list for ChromaDB
        metadatas=[{"product_name": product_name}]  # Store metadata
    )

In [8]:
def retrieve_best_match_chromadb(query_embedding, threshold=0.5):
    """Finds the best matching product using ChromaDB similarity search."""
    results = collection.query(
        query_embeddings=[query_embedding.tolist()], 
        n_results=1  # Get the best match
    )
    
    # print(results["distances"][0][0])
    # # Check if the best match is above the threshold
    # if results["distances"][0][0] > threshold:
    #     return "New Product"

    # return results["metadatas"][0][0]["product_name"]
    return results

In [9]:
image_lis = glob.glob(os.path.join('./data/db/','*/*.jpg'))

In [10]:
db_dict ={'1000':'7-oil-red', '1001':'7-oil-green', '1002':'7-oil-black', '1003':'7-oil-brown', '1004':'7-oil-yellow', 
          '1005':'7-oil-orange', '1006':'fathima-kesha-wardhani', '1007':'nawarathna-oil-box-green',
          '1008':'nawarathna-oil-box-red', '1009':'janet-hair-fall-red', '1010':'janet-hair-fall-blue',
          '1011':'bread-growth', '1012':'7-oil-white', '1013':'castor-oil', '1014':'hair-care-oil-blue', '1015':'jasmin-coconut-hari-oil',
          '1016':'chandanalepa-box', '1017':'pears-baby-cream','1018':'parachuti-hail-oil',
          '1019':'amla-hurbal-hail-oil'
          }

In [11]:
for i, image in enumerate(image_lis):
    encodding = extract_cnn_features(image)
    image_name = db_dict[image.split('\\')[-2]]
    store_embedding_chromadb(image_name, encodding)

In [12]:
# def delete_product_by_name(product_name):
#     """Deletes all embeddings for a given product name."""
#     results = collection.get(where={"product_name": product_name})  # Retrieve all matching entries
#     ids_to_delete = results["ids"]

#     if ids_to_delete:
#         collection.delete(ids=ids_to_delete)
#         print(f"Deleted all embeddings for '{product_name}'.")
#     else:
#         print(f"No embeddings found for '{product_name}'.")

# # Example Usage
# delete_product_by_name("Coca Cola 500ml")

In [13]:
query_img = './data/test_output/cropped_image_5.jpg'
# Example: Match a new product image
query_embedding = extract_cnn_features(query_img)
matched_product = retrieve_best_match_chromadb(query_embedding)
print("Best Match:", matched_product)

Best Match: {'ids': [['7-oil-red_9348']], 'embeddings': None, 'documents': [[None]], 'uris': None, 'data': None, 'metadatas': [[{'product_name': '7-oil-red'}]], 'distances': [[0.06010927039596747]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas: 'metadatas'>]}


In [14]:
matched_product["metadatas"][0][0]["product_name"]

'7-oil-red'

In [15]:
query_img_lis = glob.glob(os.path.join('./data/test_output/','*.jpg'))

In [16]:
for i, img in enumerate(query_img_lis):
    encodding = extract_cnn_features(img)
    matched_product = retrieve_best_match_chromadb(encodding)
    print(f"Image {i+1}: {matched_product['metadatas'][0][0]['product_name']} distance: {matched_product['distances'][0][0]}")

Image 1: castor-oil distance: 0.15015782217421958
Image 2: 7-oil-black distance: 0.058799279841256435
Image 3: 7-oil-red distance: 0.011855254166439955
Image 4: 7-oil-orange distance: 0.07780938422863726
Image 5: 7-oil-yellow distance: 0.1911783526504831
Image 6: 7-oil-white distance: 0.1256673079559908
Image 7: jasmin-coconut-hari-oil distance: 0.09266074760966059
Image 8: 7-oil-orange distance: 0.017132066986998873
Image 9: amla-hurbal-hail-oil distance: 0.18190745926944385
Image 10: 7-oil-red distance: 0.06292963711472566
Image 11: 7-oil-black distance: 0.040860480607634586
Image 12: 7-oil-black distance: 0.062265289360884934
Image 13: 7-oil-yellow distance: 0.10759544869670212
Image 14: 7-oil-red distance: 0.06010927039596747
Image 15: janet-hair-fall-red distance: 0.14080361519810347
Image 16: 7-oil-black distance: 0.06671115551480289
Image 17: 7-oil-yellow distance: 0.07611340348244666
Image 18: jasmin-coconut-hari-oil distance: 0.14856678033864423
