In [68]:
import duckdb
from langchain.agents import create_agent
from langchain_openai import ChatOpenAI
from langchain_core.tools import tool
from langchain_core.messages import SystemMessage
import threading
from dotenv import load_dotenv
import os
from pathlib import Path

load_dotenv()
thread_local = threading.local()

def get_db_connection():    
    if not hasattr(thread_local, "conn") or thread_local.conn is None:
        for db_path in os.getenv("DB_PATHS").split(":"):
            try:
                conn = duckdb.connect(database=db_path, read_only=False)
                thread_local.conn = conn
                thread_local.db_path = db_path
                print(f" * Database connected: {db_path}")
                break
            except Exception as e:
                print(f" ! Failed to connect to {db_path}: {e}")
                continue
        else:
            raise Exception("Failed to connect to any database path")
    return thread_local.conn

def get_table_name():
    conn = get_db_connection()
    result = conn.execute("SELECT table_name FROM information_schema.tables WHERE table_schema='main';").fetchall()
    table_names = [row[0] for row in result]
    return table_names

def get_column_types():
    conn = get_db_connection()
    result = conn.execute("SELECT table_name, column_name, data_type FROM information_schema.columns").fetchall()
    column_types = [(row[0], row[1], row[2]) for row in result]
    for table_name, column_name, data_type in column_types:
        print(f"- {table_name}.{column_name}: {data_type}")
    return column_types

def rel_db_relationship():
    return """
## Database Schema Relationships
- orders.customer_id = customers.customer_id
- orders.order_id = order_items.order_id
- orders.order_id = order_reviews.order_id
- orders.order_id = order_payments.order_id
- order_items.product_id = products.product_id
- order_items.seller_id = sellers.seller_id
- customers.customer_zip_code_prefix = geolocation.zip_code_prefix
- sellers.seller_zip_code_prefix = geolocation.zip_code_prefix

## Important Domain Notes
- customer_id is unique per order
- Use customer_unique_id to identify repeat customers
- Product categories are stored in Portuguese
- Use product_category_name_translation when English labels are required
"""

if __name__ == "__main__":
    conn = get_db_connection()
    table_names = print(f"Table names: {get_table_name()}")

    column_types = get_column_types()
    rel_db_relationship_info = rel_db_relationship()

    agent_name = "gpt-4o-mini-2024-07-18"
    agent_model = ChatOpenAI(
        model = agent_name,
        temperature = 0.2,
        max_tokens = 5000
    )

    

 * Database connected: /media/edward/SSD-Data/My Folder/text-to-sql-ai-agent/olist.db
Table names: ['customers', 'geolocation', 'order_items', 'order_payments', 'order_reviews', 'orders', 'products', 'sellers', 'product_category_translation']
- customers.customer_id: VARCHAR
- customers.customer_unique_id: VARCHAR
- customers.customer_zip_code_prefix: BIGINT
- customers.customer_city: VARCHAR
- customers.customer_state: VARCHAR
- geolocation.geolocation_zip_code_prefix: BIGINT
- geolocation.geolocation_lat: DOUBLE
- geolocation.geolocation_lng: DOUBLE
- geolocation.geolocation_city: VARCHAR
- geolocation.geolocation_state: VARCHAR
- order_items.order_id: VARCHAR
- order_items.order_item_id: BIGINT
- order_items.product_id: VARCHAR
- order_items.seller_id: VARCHAR
- order_items.shipping_limit_date: VARCHAR
- order_items.price: DOUBLE
- order_items.freight_value: DOUBLE
- order_payments.order_id: VARCHAR
- order_payments.payment_sequential: BIGINT
- order_payments.payment_type: VARCHAR
-

In [64]:
conn.execute("SELECT table_name, column_name, data_type FROM information_schema.columns").fetchall()

[('customers', 'customer_id', 'VARCHAR'),
 ('customers', 'customer_unique_id', 'VARCHAR'),
 ('customers', 'customer_zip_code_prefix', 'BIGINT'),
 ('customers', 'customer_city', 'VARCHAR'),
 ('customers', 'customer_state', 'VARCHAR'),
 ('geolocation', 'geolocation_zip_code_prefix', 'BIGINT'),
 ('geolocation', 'geolocation_lat', 'DOUBLE'),
 ('geolocation', 'geolocation_lng', 'DOUBLE'),
 ('geolocation', 'geolocation_city', 'VARCHAR'),
 ('geolocation', 'geolocation_state', 'VARCHAR'),
 ('order_items', 'order_id', 'VARCHAR'),
 ('order_items', 'order_item_id', 'BIGINT'),
 ('order_items', 'product_id', 'VARCHAR'),
 ('order_items', 'seller_id', 'VARCHAR'),
 ('order_items', 'shipping_limit_date', 'VARCHAR'),
 ('order_items', 'price', 'DOUBLE'),
 ('order_items', 'freight_value', 'DOUBLE'),
 ('order_payments', 'order_id', 'VARCHAR'),
 ('order_payments', 'payment_sequential', 'BIGINT'),
 ('order_payments', 'payment_type', 'VARCHAR'),
 ('order_payments', 'payment_installments', 'BIGINT'),
 ('order_