In [3]:
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import concurrent
import PyPDF2
import os
import pandas as pd
import base64

script_dir = os.path.dirname(os.path.abspath(__name__))

client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
dir_pdfs = 'pdfs'
print(dir_pdfs)
pdf_files = [os.path.join(dir_pdfs, f) for f in os.listdir(dir_pdfs)]
print(pdf_files)

pdfs
['pdfs\\finderAI_data.pdf']


## Create vector store

In [4]:
def create_vector_store(vector_store_name : str) -> dict:

    try: 
        vector_store = client.vector_stores.create(name = vector_store_name)
        details = {
            'id' : vector_store.id , 
            'name' : vector_store.name ,
            'created_at' : vector_store.created_at , 
            'vecotr_status' : vector_store.status,
            'file_content' : vector_store.file_counts.completed

        }

        print(f"Vecotr Store is created : {details}")
        return details
    
    except Exception as e :
        print(f"There is an error happend{e}")
        return{}


In [5]:
vector_store_name = "finder_vector_store2"
vector_store_details = create_vector_store(vector_store_name)


Vecotr Store is created : {'id': 'vs_69030ae5c2bc81919f2ea7b264869651', 'name': 'finder_vector_store2', 'created_at': 1761807078, 'vecotr_status': 'completed', 'file_content': 0}


## upload file to vector store

In [6]:
def upload_single_pdf(file_path: str, vector_store_id: str):
    file_name = os.path.basename(file_path)
    try:
        file_response = client.files.create(file=open(file_path, 'rb'), purpose="assistants")
        attach_response = client.vector_stores.files.create(
            vector_store_id=vector_store_id,
            file_id=file_response.id
        )
        return {"file": file_name, "status": "success"}
    except Exception as e:
        print(f"Error with {file_name}: {str(e)}")
        return {"file": file_name, "status": "failed", "error": str(e)}
    

def upload_pdf_files_to_vector_store(vector_store_id: str):
    pdf_files = [os.path.join(dir_pdfs, f) for f in os.listdir(dir_pdfs)]
    stats = {"total_files": len(pdf_files), "successful_uploads": 0, "failed_uploads": 0, "errors": []}
    
    print(f"{len(pdf_files)} PDF files to process. Uploading in parallel...")

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(upload_single_pdf, file_path, vector_store_id): file_path for file_path in pdf_files}
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(pdf_files)):
            result = future.result()
            if result["status"] == "success":
                stats["successful_uploads"] += 1
            else:
                stats["failed_uploads"] += 1
                stats["errors"].append(result)

    return stats

In [7]:
upload_pdf_files_to_vector_store(vector_store_details["id"])

1 PDF files to process. Uploading in parallel...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:04<00:00,  4.36s/it]


{'total_files': 1, 'successful_uploads': 1, 'failed_uploads': 0, 'errors': []}

## Test RAG

In [8]:
query = "ÿπÿ∑ŸäŸÜŸä ÿ¥ŸàŸäÿ© ÿ™ŸÅÿßÿµŸäŸÑ ÿπŸÜ ÿßŸÑÿ¥Ÿäÿ±ÿßÿ™ŸàŸÜ"
search_results = client.vector_stores.search(
    query = query ,
    max_num_results = 2 ,
    rewrite_query=True , 
    vector_store_id= vector_store_details["id"]

)

In [9]:
for result in search_results.data:
    for content_item in result.content:
        print(content_item.text)
        print("---------------------------")

ŸÇÿ≥ŸÖ ÿßŸÑŸÅŸÜÿßÿØŸÇ ŸÅŸä ÿØŸÖÿ¥ŸÇ 
 1.ŸÅŸÜÿØŸÇ ŸÅŸàÿ± ÿ≥Ÿäÿ≤ŸàŸÜ ÿØŸÖÿ¥ŸÇ )ÿ≥ÿßÿ®ŸÇÿßŸã(
ÿßŸÑÿπŸÜŸàÿßŸÜ: ÿ¥ÿßÿ±ÿπ ÿ¥ŸÉÿ±Ÿä ÿßŸÑŸÇŸàÿ™ŸÑŸäÿå ŸÉŸÅÿ±ÿ≥Ÿàÿ≥ÿ©ÿå ÿØŸÖÿ¥ŸÇ 
ÿßŸÑŸáÿßÿ™ŸÅ: 963+ 11 339 1000
ÿßŸÑÿ™ÿµŸÜŸäŸÅ: 5 ŸÜÿ¨ŸàŸÖ 
ÿ≥ŸÜÿ© ÿßÿßŸÑŸÅÿ™ÿ™ÿßÿ≠: 2005
ÿßŸÑŸÖÿ≤ÿßŸäÿß: ÿßŸÜŸÅÿµŸÑÿ™ ÿ•ÿØÿßÿ±ÿ© ŸÅŸàÿ± ÿ≥Ÿäÿ≤ŸàŸÜÿ≤ ÿßŸÑÿπÿßŸÑŸÖŸäÿ© ÿπŸÜ ÿßŸÑŸÅŸÜÿØŸÇ ŸÅŸä ŸäŸàŸÜŸäŸà 2019 ÿ®ÿ≥ÿ®ÿ® 
ÿßŸÑÿπŸÇŸàÿ®ÿßÿ™ ÿßÿ£ŸÑŸÖÿ±ŸäŸÉŸäÿ©ÿå ŸÑŸÉŸÜ ÿßŸÑŸÅŸÜÿØŸÇ ŸÖÿ≥ÿ™ŸÖÿ± ŸÅŸä ÿßŸÑÿπŸÖŸÑ ÿ®ÿßÿ≥ŸÖ ŸÅŸàÿ± ÿ≥Ÿäÿ≤ŸàŸÜÿ≤ ÿ≠ÿ™Ÿâ ÿßÿ¢ŸÑŸÜ. Ÿäÿ∂ŸÖ 
297 ÿ∫ÿ±ŸÅÿ©ÿå ŸÖÿ≥ÿ®ÿ≠ÿå ŸÖÿ∑ÿßÿπŸÖ ŸÖÿ™ÿπÿØÿØÿ©ÿå ÿ•ÿ∑ÿßŸÑŸÑÿ© ÿπŸÑŸâ ÿ¨ÿ®ŸÑ ŸÇÿßÿ≥ŸäŸàŸÜ.
ŸÖÿßŸÑÿ≠ÿ∏ÿßÿ™: ŸäŸèÿ≥ÿ™ÿÆÿØŸÖ ÿ®ŸÉÿ´ÿ±ÿ© ŸÖŸÜ ŸÇÿ®ŸÑ ÿßÿ£ŸÑŸÖŸÖ ÿßŸÑŸÖÿ™ÿ≠ÿØÿ© ŸàŸàŸÉÿßÿßŸÑÿ™Ÿáÿßÿå ŸàÿßŸÑ Ÿäÿ≤ÿßŸÑ Ÿäÿ≠ŸÖŸÑ ÿßÿ≥ŸÖ Ÿàÿ¥ÿπÿßÿ±
ŸÅŸàÿ± ÿ≥Ÿäÿ≤ŸàŸÜÿ≤ ÿ±ÿ∫ŸÖ ÿπÿØŸÖ ÿßŸÜÿ™ŸÖÿßÿ¶Ÿá ŸÑŸÑÿ≥ŸÑÿ≥ŸÑÿ© .
 2.ŸÅŸÜÿØŸÇ ÿßŸÑÿ®Ÿàÿßÿ®ÿßÿ™ ÿßŸÑÿ≥ÿ®ÿπ )ÿ¥Ÿäÿ±ÿßÿ™ŸàŸÜ ÿØŸÖÿ¥ŸÇ ÿ≥ÿßÿ®ŸÇÿßŸã(
ÿßŸÑÿπŸÜŸàÿßŸÜ: ÿ≥ÿßÿ≠ÿ© ŸäŸàÿ≥ŸÅ ÿßŸÑÿπÿ∏ŸÖÿ© ‚Äì ÿ¥ÿßÿ±ÿπ ŸÖŸäÿ≥ŸÑŸàŸÜÿå ÿØŸÖÿ¥ŸÇ 
ÿßŸÑŸáÿßÿ™

## Constants

In [7]:
MODEL_INSTRUCTIONS= '''
## Identity

You are **Kuality finderAI**, an AI assistant developed by **Kuality AI**.
Your purpose is to provide accurate information about facilities in Damascus, including hospitals, restaurants, hotels, doctors, beauty centers, event organizers, and other local services and businesse . You must retrieve all information **exclusively from the provided file** that contains facility data.

If someone asks for your name, respond:

> ‚ÄúI am Kuality finderAI, your assistant from Kuality AI.‚Äù

---

## Knowledge Base

Your knowledge is based **only on the provided file**.
You must **always search the file first** before answering any question.
Do not use or rely on external knowledge, assumptions, or general data.

---

## Scope Limitation

You must only answer questions directly related to facilities in **Damascus**.

* If a user asks for information that is not found in the file, politely inform them it‚Äôs unavailable.
* Never answer questions about other cities or general topics.

**Arabic:** "ÿπÿ∞ÿ±Ÿãÿßÿå ŸÑŸäÿ≥ ŸÑÿØŸä ŸÖÿπŸÑŸàŸÖÿßÿ™ ÿ≠ŸàŸÑ Ÿáÿ∞ÿß ÿßŸÑÿ∑ŸÑÿ®. ŸäŸÖŸÉŸÜŸÉ ÿßŸÑÿ®ÿ≠ÿ´ ŸÅŸä ÿßŸÑŸÖŸÑŸÅ ÿßŸÑŸÖÿ™ŸàŸÅÿ± ŸÑŸÑŸÖÿ≤ŸäÿØ ŸÖŸÜ ÿßŸÑÿ™ŸÅÿßÿµŸäŸÑ."
**English:** "Sorry, I do not have information on this request. You can check the provided file for more details."

---

## Data Contribution

If a user wants to **add or update information** in the file, kindly direct them to contact **Kuality AI**:

üåê [https://kuality.ai/](https://kuality.ai/)
üìß **[hello@kuality.ai](mailto:hello@kuality.ai)**

---

## Multi-language Response

* Always reply in the **same language** the user uses.
* Arabic question ‚Üí answer in Arabic.
* English question ‚Üí answer in English.
* Maintain consistency in language throughout the conversation.

---

## Response Style

* Always provide **complete, relevant, and well-structured information** from the file.
* Use clear paragraphs and include labels (e.g., ‚ÄúAddress‚Äù, ‚ÄúPhone‚Äù, ‚ÄúRating‚Äù) when possible.
* Summarize only if the user requests a short version.

---

## Tone of Voice

* Maintain a **professional, friendly, and helpful** tone.
* Be patient, respectful, and use simple formal language suitable for public use.

---

## Conversation Flow

* If the question is unclear, ask for clarification first.
* If multiple records exist, show the **most accurate and complete** one.
* Keep track of context for coherent follow-up responses.

---

## Critical Rules

* **Do not include any source references or file citations** (e.g., ` ` or similar).
* **Always search the provided file** before answering.
* **Never invent or guess information**.
* **Never include citations, references, or document identifiers.**
* Use **paragraphs** instead of bullet points unless needed for clarity.
* Respond with **clean, natural text only**.

'''


MODEL_DESCRIPTION = '''
finderAI is an AI assistant specialized in providing information about facilities in Damascus, 
including hospitals, restaurants, and hotels. It retrieves details exclusively from the provided file. 
If the requested information is not in the file, it politely informs the user that the information is unavailable. 
Responses are given in the same language as the user's question, either Arabic or English, and are always clear, professional, and helpful.
'''

MODEL_NAME = '''
FinderAI2
'''

vector_store_id = "vs_69030ae5c2bc81919f2ea7b264869651"

TOOLS = [
    {"type": "file_search"}
]


TOOLS_RESOURCES = {
    "file_search": {
        "vector_store_ids": [vector_store_id]
    }
}

In [4]:
from typing import Optional , List , Dict
from openai import OpenAI
def create_assistant(client : OpenAI , 
                    name : str ,
                    model : str , 
                    description : Optional[str] , 
                    instructions : Optional[str] , 
                    tools : Optional[List[Dict]] = None ,
                    tool_resources : Optional[Dict] = None
                    ) -> dict :
    
    try: 
        if tools is None : 
            tools = []
        

        my_assistant = client.beta.assistants.create(
            model = model , 
            description= description , 
            instructions= instructions , 
            name = name ,
            tools = tools , 
            tool_resources = tool_resources
        )

        print(f"The assistant was created sucessifully and the assistant details is {my_assistant}")
        return my_assistant 

    except Exception as e:
        print(f"We have an exception while create an assistant {e}")
        return None 

In [8]:
from openai import OpenAI 
from dotenv import load_dotenv
import os 
load_dotenv()

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
assistant = create_assistant(client , MODEL_NAME , "gpt-4o-mini" , MODEL_DESCRIPTION  , MODEL_INSTRUCTIONS ,TOOLS ,TOOLS_RESOURCES )

print(assistant)

The assistant was created sucessifully and the assistant details is Assistant(id='asst_vX6KS1ppBKQNMNlW1cS0uNhN', created_at=1761811220, description="\nfinderAI is an AI assistant specialized in providing information about facilities in Damascus, \nincluding hospitals, restaurants, and hotels. It retrieves details exclusively from the provided file. \nIf the requested information is not in the file, it politely informs the user that the information is unavailable. \nResponses are given in the same language as the user's question, either Arabic or English, and are always clear, professional, and helpful.\n", instructions='\n## Identity\n\nYou are **Kuality finderAI**, an AI assistant developed by **Kuality AI**.\nYour purpose is to provide accurate information about facilities in Damascus, including hospitals, restaurants, hotels, doctors, beauty centers, event organizers, and other local services and businesse . You must retrieve all information **exclusively from the provided file** t

In [9]:
from datetime import datetime

def print_assistant_info(my_assistants):
    print(f"Total assistants found: {len(my_assistants.data)}\n")
    for assistant in my_assistants.data:
        print("===================================================")
        print(f"Assistant Name: {assistant.name}")
        print(f"Assistant ID:   {assistant.id}")
        print(f"Model:          {assistant.model}")
        
        created_date = datetime.fromtimestamp(assistant.created_at)
        print(f"Created At:     {created_date.strftime('%Y-%m-%d %H:%M:%S')}")
        
        if assistant.instructions:
            print(f"Instructions (Preview): {assistant.instructions[:100]}...")
        
        if assistant.tools:
            tool_types = [tool.type for tool in assistant.tools]
            print(f"Tools:          {', '.join(tool_types)}")

        if assistant.tool_resources and assistant.tool_resources.file_search:
            print(f"Vector Store IDs: {assistant.tool_resources.file_search.vector_store_ids}")
            
        print("===================================================\n")

In [10]:
from openai import OpenAI
import os

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

my_assistants = client.beta.assistants.list(
    order="desc",
    limit=20
)



print_assistant_info(my_assistants)


Total assistants found: 3

Assistant Name: 
FinderAI2

Assistant ID:   asst_vX6KS1ppBKQNMNlW1cS0uNhN
Model:          gpt-4o-mini
Created At:     2025-10-30 11:00:20
Instructions (Preview): 
## Identity

You are **Kuality finderAI**, an AI assistant developed by **Kuality AI**.
Your purpos...
Tools:          file_search
Vector Store IDs: ['vs_69030ae5c2bc81919f2ea7b264869651']

Assistant Name: 
FinderAI

Assistant ID:   asst_tfZvPdpsCepi4EFeh74EfJxD
Model:          gpt-4o-mini
Created At:     2025-10-28 14:02:03
Instructions (Preview): 
# System Prompt - finderAI

## Identity

You are **finderAI**, an AI assistant specialized in provi...
Tools:          file_search
Vector Store IDs: ['vs_69007217a7348191943954f62697c89b']

Assistant Name: HBR-Assistants
Assistant ID:   asst_F9452PUM881YZJ4D9wF3Tvsy
Model:          gpt-4o-mini-2024-07-18
Created At:     2025-10-01 14:35:18
Instructions (Preview): # System Prompt - Hamdan bin Rashid Center AI Assistant

## Identity
You are the official AI