In [1]:
import os
import requests
from pathlib import Path
from dotenv import load_dotenv
from pymongo import MongoClient
from pymongo.server_api import ServerApi
from azure.storage.blob import BlobServiceClient
from more_itertools import chunked

In [2]:
# Azure Blob Storage
connect_str = 'DefaultEndpointsProtocol=https;AccountName=unsplashimages;AccountKey=YwkzivPvYW8Ket+dmmNu7r1JBNLLzFMjFONtMBTL4PYmfqA5o6lsWmhPQrPN2DmQpRZwbOAnTpgY+ASttgi7ig==;EndpointSuffix=core.windows.net'
container_name = 'images800'
local_path = "D:\AIML\gimmick\Scene-Sense\sample_images"

In [3]:
# Create the BlobServiceClient object
blob_service_client = BlobServiceClient.from_connection_string(connect_str)

In [4]:
# Loading Environment Variables
load_dotenv()
username = os.getenv("MONGO_USERNAME")
password = os.getenv("MONGO_PASSWORD")

In [5]:
# Username and password for MongoDB Atlas
uri = f'mongodb+srv://{username}:{password}@scene-sense.9km2ony.mongodb.net/?retryWrites=true&w=majority'

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

In [6]:
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [7]:
db = client['scene-sense']
embeddings_collection = db['sample-images-embeddings']

In [8]:
# Function to upload image to Azure and get URL
def upload_image_to_azure_and_get_url(filename: str):
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    blob_client = blob_service_client.get_blob_client(container_name, filename)
    with open(os.path.join(local_path, filename), "rb") as data:
        blob_client.upload_blob(data)
    blob_url = f"https://{blob_service_client.account_name}.blob.core.windows.net/{container_name}/{blob_client.blob_name}"
    return blob_url

In [9]:
# Endpoint
api_endpoint = "http://148.113.143.16:9999/image_embeddings/"

In [14]:
def upload_images_to_azure(directory: str):
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    blob_urls = []

    # Get list of all files in directory
    files = os.listdir(directory)

    # Define image extensions
    img_extensions = ['.jpg', '.png', '.jpeg', '.tiff', '.bmp', '.gif']

    for file in files:
        # Check if file is an image
        extension = os.path.splitext(file)[1]
        if extension.lower() in img_extensions:
            filepath = os.path.join(directory, file)
            blob_client = blob_service_client.get_blob_client(container_name, file)
            with open(filepath, "rb") as data:
                blob_client.upload_blob(data)
    print("Uploaded images to Azure Blob Storage")

In [11]:
upload_images_to_azure(local_path)

Uploaded images to Azure Blob Storage


In [15]:
def get_image_urls_from_azure_container(connection_string, container_name):
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
    container_client = blob_service_client.get_container_client(container_name)
    
    image_urls = []
    for blob in container_client.list_blobs():
        url = f"{container_client.url}/{blob.name}"
        image_urls.append(url)
    
    return image_urls

In [16]:
class ImageUrl:
    def __init__(self, url):
        self.url = url

    def to_dict(self):
        return {'url': self.url}

In [17]:
# Function to get the image urls from the Azure container
image_urls = get_image_urls_from_azure_container(connect_str, container_name)

In [18]:
# Create a list of ImageUrl objects
image_url_objects = [ImageUrl(url).to_dict() for url in image_urls]

In [19]:
# Break up list of ImageUrl objects into chunks of 1000
chunks = list(chunked(image_url_objects, 1000))

In [None]:
for chunk in chunks:
    # Send a POST request to the API endpoint
    response = requests.post(api_endpoint, json=chunk)
    if response.status_code == 200:
        # Retrieve embeddings from response
        embeddings = response.json().get('image_embeddings', [])

        # Process the embeddings as desired
        for url, embedding in zip(chunk, embeddings):
            # Create a new document with the URL and embedding
            document = {
                'url': url['url'],  # We need to extract the url from the dictionary
                'embedding': embedding
            }

            # Insert the document into the MongoDB collection
            embeddings_collection.insert_one(document)
    else:
        print(f"Failed to process image URLs. Status code: {response.status_code}, Response: {response.text}")