In [2]:
from pymongo import MongoClient

from sentence_transformers import SentenceTransformer

import os, certifi

from PIL import Image
import requests
from io import BytesIO

from dotenv import load_dotenv

load_dotenv(dotenv_path="../.env.local")

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

model = SentenceTransformer("clip-ViT-L-14")

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {os.getenv('TMDB_API_KEY')}",
}



In [3]:
def emb_img(obj, type="url"):

    if type == "url":

        response = requests.get(obj)

        img = Image.open(BytesIO(response.content))
        
        return model.encode(img)

    elif type == "file":
        img = Image.open(BytesIO(obj.stream.read()))   
    
        return model.encode(img)
    
    elif type == "text":
        return model.encode(obj)

def connect_to_mongo():

    ca = certifi.where()

    MONGO_URI = os.getenv("MONGO_URI")

    client = MongoClient(MONGO_URI, tlsCAFile=ca)
    # Send a ping to confirm a successful connection
    try:
        client.admin.command('ping')
        print("\nPinged your deployment. You successfully connected to MongoDB!")
    except Exception as e:
        print(e)

    return client

In [26]:
client = connect_to_mongo()

query = "https://m.media-amazon.com/images/M/MV5BMTM0NjUxMDk5MF5BMl5BanBnXkFtZTcwNDMxNDY3Mw@@._V1_.jpg"

cl_db = client[os.getenv("CINELENS_DB")]

img_col = cl_db[os.getenv("IMGS_COL")]

query_emb = emb_img(query, type="url")


Pinged your deployment. You successfully connected to MongoDB!


In [27]:
from dotenv import load_dotenv

load_dotenv(dotenv_path="../.env.local")

cl_index = os.getenv("IMGS_INDEX")

pipeline = [
{
    "$vectorSearch": {
        "index": "cl_index",
        "path": "embeddings",
        "queryVector": query_emb.tolist(),
        "numCandidates": 5,
        "limit": 5,
    }
},
{
    "$project": {
        "_id": 0,
        "url": 1,
        "source": 1,
        "score": {
            "$meta": "vectorSearchScore"
        }
    }
}
]

results = list(img_col.aggregate(pipeline))

In [28]:
results

[{'url': 'https://image.tmdb.org/t/p/w500/mor5PrU9cQghuXp7qRw2LedyCRK.jpg',
  'source': 'the-shawshank-redemption',
  'score': 0.9476538896560669},
 {'url': 'https://image.tmdb.org/t/p/w500/yM67XrFH3l0GqDbnGR0Q6UjzJoO.jpg',
  'source': 'the-shawshank-redemption',
  'score': 0.906266450881958},
 {'url': 'https://image.tmdb.org/t/p/w500/avedvodAZUcwqevBfm8p4G2NziQ.jpg',
  'source': 'the-shawshank-redemption',
  'score': 0.8965779542922974},
 {'url': 'https://image.tmdb.org/t/p/w500/hchjeIPybPEgCPVBZgvFa5wHgVU.jpg',
  'source': 'the-shawshank-redemption',
  'score': 0.8898217678070068},
 {'url': 'https://image.tmdb.org/t/p/w500/kXfqcdQKsToO0OUXHcrrNCHDBzO.jpg',
  'source': 'the-shawshank-redemption',
  'score': 0.8851290941238403}]