In [7]:
import json
import base64
from PIL import Image
from rag_search.vector_db import VectorDatabase

In [None]:
#### LOAD API KEYS
with open("../keys/huggingface_key.txt","r") as f:
    huggingface_key = f.read()

with open("../keys/mvp_projects_key.txt","r") as f:
    openai_key = f.read()

In [None]:
#### INITIATE VECTOR CLASS
vec = VectorDatabase(
    text_embedding_model = "local-bge-base-en",
    image_embedding_model = "local-clip-vit-base-patch32",
    response_model = "local-mistral-3",
    captioning_model = "local-blip-2",
    huggingface_key = huggingface_key,
    save_dir = "custom_db_path" # assign to default save directory
    )

In [None]:
#### VECTORIZE ALL FILES IN FOLDER
vec.vectorize_folder(folder_path = 'rag_search/data')

In [None]:
#### SEARCH FOR RESPONSE
query = {
    "text": "How has Microsoft's revenue grown in recent years?"
    }

response = vec.run_search(
    search_content = query, 
    search_location = None # WHOLE DATABASE
)

print(json.dumps(response, indent=2))

In [None]:
#### SEARCH FOR RESPONSE - SPECIFIC FOLDER
query = {
    "text": "What is the company's strategy on returning value to shareholders?"
    }

response = vec.run_search(
    search_content = query, 
    search_location = "rag_search/data/Microsoft_FY25Q3_PressRelease.pdf" # SPECIFIC FILE
)

print(json.dumps(response, indent=2))

In [None]:
#### SEARCH FOR RESPONSE
query = {
    "text": "What are the key product pillars of Microsoft's business model?"
    }

response = vec.run_search(
    search_content = query, 
    search_location = "rag_search/data" # SPECIFIC FOLDER (in this case, same as whole database as one folder only)
    )

print(json.dumps(response, indent=2))

In [None]:
img = Image.open("./example_graph.png")
img

In [None]:
#### SEARCH FOR IMAGE
with open("./example_graph.png", "rb") as image_file:
    base64_image = base64.b64encode(image_file.read()).decode("utf-8")
    
query = {
    "text": "What drove this trend?",
    "image": [base64_image]
    }

response = vec.run_search(
    search_content = query, 
    search_location = None
)

print(json.dumps(response, indent=2))