In [7]:
pip install indexify langchain langchain_openai

Collecting langchain
  Downloading langchain-0.1.16-py3-none-any.whl.metadata (13 kB)
Collecting langchain_openai
  Downloading langchain_openai-0.1.3-py3-none-any.whl.metadata (2.5 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Using cached SQLAlchemy-2.0.29-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.6 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Downloading aiohttp-3.9.4-cp312-cp312-macosx_11_0_arm64.whl.metadata (7.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Using cached dataclasses_json-0.6.4-py3-none-any.whl.metadata (25 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Using cached jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-community<0.1,>=0.0.32 (from langchain)
  Downloading langchain_community-0.0.32-py3-none-any.whl.metadata (8.5 kB)
Collecting langchain-core<0.2.0,>=0.1.42 (from langchain)
  Downloading langchain_core-0.1.42-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<0.1

### Download Indeixfy and the Yolo Extractor
curl https://www.tensorlake.ai | sh

1. Start the Server
   ```bash
    ./indexify server -d
    ```

3. Download and start the extractor
   ```bash
    indexify-extractor download hub://image/yolo
    indexify-extractor join-server yolo.yolo_extractor:YoloExtractor
    ```



In [100]:
from indexify import IndexifyClient
client = IndexifyClient()

In [101]:
client.add_extraction_policy(extractor='tensorlake/yolo-extractor', name="object_detection")

In [102]:
schema = client.list_schemas()["ddls"]["ingestion"]

In [115]:
response = client.ingest_remote_file("https://extractor-files.diptanu-6d5.workers.dev/images/Central_Park_Lake.jpg", "image/png", {"location": "central park"})
content_id = response['content_id']

In [116]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI


In [117]:
def ask(prompt, question):
    model = ChatOpenAI()
    chain = (
        {"question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    return chain.invoke(question)
    
    

In [118]:
def generatate_sql_from_question(question):
    template = f"""
    Images are stored in the database with the following schema:
    {schema}

    fyi. 

    Generate the SQL query based on the following question below:

    """ + "Question: {question}"
    prompt = ChatPromptTemplate.from_template(template)
    generated_sql = ask(prompt, question)
    return generated_sql

def run_sql(query):
    query_result = client.sql_query(query)
    query_result = pformat(query_result.result).replace('{', '').replace('}', '')
    return query_result


def answer_from_results(question, generated_sql, query_result):
    template = f"""
    The question user asked is:
    {question}
    We ran a database query:  {generated_sql}
    The query returned the result: {query_result}

    FYI. 

    """
    prompt = ChatPromptTemplate.from_template(template)
    return ask(prompt, question)

def ask_question(question):
    sql_query = generatate_sql_from_question(question)
    results = run_sql(sql_query)
    answer = answer_from_results(question, sql_query, results)
    return answer
    


In [119]:
response = ask_question(f"how many people are in content_id: {content_id}?")
response


'There are 13 people in content_id: pibDeT6mnJCdKlOA.'

In [113]:
file_names=["skate.jpg", "congestion.jpg", "bushwick-bred.jpg", "141900.jpg", "132500.jpg", "123801.jpg","120701.jpg", "103701.jpg"]
file_urls = [f"https://extractor-files.diptanu-6d5.workers.dev/images/{file_name}" for file_name in file_names]
for file_url in file_urls:
    client.ingest_remote_file(file_url, "image/png", {})



In [120]:
response = ask_question("List all the content_id with boat and also print the number of boats in each of the photos?")
print(response)

Here are the content_id with the number of boats in each photo:
- Content_id: uqGr1N8l42o9kfDI, Number of boats: 8
- Content_id: Li50p9XKlhfLs9ke, Number of boats: 15
- Content_id: -K7LcspteAvmoO3s, Number of boats: 6
- Content_id: gBqaqxd7anhHY-yU, Number of boats: 18
- Content_id: pibDeT6mnJCdKlOA, Number of boats: 8
