In [2]:
import os
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image

# Check if CUDA is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load a pre-trained ResNet-101 model
model = models.resnet101(pretrained=True)
# Remove the last fully connected layer (classifier)
model = torch.nn.Sequential(*(list(model.children())[:-1]))
# Transfer the model to the GPU
model = model.to(device)
model.eval()

# Define the preprocessing pipeline
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to extract features
def extract_features(img_path, model, device):
    img = Image.open(img_path).convert('RGB')
    img_t = preprocess(img)
    batch_t = torch.unsqueeze(img_t, 0)
    # Transfer the tensor to the GPU
    batch_t = batch_t.to(device)
    
    with torch.no_grad():
        features = model(batch_t)
        features = torch.flatten(features, 1)
    return features.cpu().numpy()  # Move the features back to CPU for further processing or storage



In [3]:
# Load images from a folder
folder_path = 'Y:\SIH-main\sih\sih dataset-labels\sih dataset\prescription'  # Replace with the path to your folder
image_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

# Extract features for all images in the folder
features_list = [extract_features(image_path, model, device) for image_path in image_paths]

In [4]:
len(features_list[0][0])

2048

In [5]:
len(features_list)

11

In [6]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams

# Assume Qdrant is running locally
client = QdrantClient(":memory:")

# Create a collection
client.recreate_collection(
    collection_name="my_collection",
    vectors_config=VectorParams(size=2048, distance=Distance.COSINE),
)



True

In [7]:
from qdrant_client.http.models import PointStruct
import numpy as np

# Assume `features_list` is a list of numpy arrays with your extracted features
# and `image_paths` is a list of corresponding image paths

# Store the features in Qdrant
def to_float_list(feature_vector):
    # Convert to numpy array if not already
    if not isinstance(feature_vector, np.ndarray):
        feature_vector = np.array(feature_vector)
    # Ensure the dtype is float32
    if feature_vector.dtype != np.float32:
        feature_vector = feature_vector.astype(np.float32)
    # Flatten the array and convert to list
    return feature_vector.ravel().tolist()

# Store the features in Qdrant
client.upsert(
    collection_name="my_collection",
    points=[
        PointStruct(
            id=i,  # Convert ID to a string
            vector=to_float_list(feature),  # Convert feature vector to a list of floats
            payload={"image_path": image_paths[i]}
        )
        for i, feature in enumerate(features_list)
    ]
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [8]:
# Extract features from the input image
input_image_path = 'Y:\SIH-main\sih\sih dataset-labels\sih dataset\prescription\\ap10.png'  # Replace with your input image path
input_features = extract_features(input_image_path, model,device)

# Search for similar images in Qdrant
search_result = client.search(
    collection_name='my_collection',
    query_vector=input_features.ravel().tolist(),
    limit=5  # Number of similar images to retrieve
)

# Extract the paths of the most similar images


In [11]:
import pandas as pd

def display_search_results(search_results):
    # Create a list to store the parsed search result data
    data = []

    # Iterate over each result and extract the relevant information
    for result in search_results:
        data.append({
            'ID': result.id,
            'Score': result.score,
            'Image Path': result.payload['image_path']
        })

    # Create a pandas DataFrame from the data list
    df = pd.DataFrame(data)

    # Set the ID column as the index of the DataFrame
    df.set_index('ID', inplace=True)

    # Display the DataFrame as a table
    print(df)

display_search_results(search_result)

       Score                                         Image Path
ID                                                             
2   1.000000  Y:\SIH-main\sih\sih dataset-labels\sih dataset...
8   0.959605  Y:\SIH-main\sih\sih dataset-labels\sih dataset...
10  0.948138  Y:\SIH-main\sih\sih dataset-labels\sih dataset...
9   0.910915  Y:\SIH-main\sih\sih dataset-labels\sih dataset...
5   0.904244  Y:\SIH-main\sih\sih dataset-labels\sih dataset...


In [62]:
search_result

[ScoredPoint(id=2, version=0, score=1.0000000034742436, payload={'image_path': 'Y:\\SIH-main\\sih\\sih dataset-labels\\sih dataset\\prescription\\ap10.png'}, vector=None, shard_key=None),
 ScoredPoint(id=8, version=0, score=0.959605218278751, payload={'image_path': 'Y:\\SIH-main\\sih\\sih dataset-labels\\sih dataset\\prescription\\ap7.png'}, vector=None, shard_key=None),
 ScoredPoint(id=10, version=0, score=0.9481375430706294, payload={'image_path': 'Y:\\SIH-main\\sih\\sih dataset-labels\\sih dataset\\prescription\\ap9.png'}, vector=None, shard_key=None),
 ScoredPoint(id=9, version=0, score=0.9109149692319604, payload={'image_path': 'Y:\\SIH-main\\sih\\sih dataset-labels\\sih dataset\\prescription\\ap8.png'}, vector=None, shard_key=None),
 ScoredPoint(id=5, version=0, score=0.9042442729065638, payload={'image_path': 'Y:\\SIH-main\\sih\\sih dataset-labels\\sih dataset\\prescription\\ap4.png'}, vector=None, shard_key=None)]