In [1]:
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine

# Load environment variables from .env file
load_dotenv()
# Get the variables
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')
DB_SCHEMA = os.getenv('DB_SCHEMA')

# Construct the database URL
DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
print(DATABASE_URL)


postgresql://recitty_reader:Gl0B4lR3aDeR@178.63.67.151:5432/recitty


In [2]:
engine = create_engine(DATABASE_URL)
engine.dialect.name

'postgresql'

In [6]:
from agent.state import State
from langchain_core.messages import HumanMessage, AIMessage
from agent.graph import graph  # your compiled graph

from agent.configuration import DatabaseHandler
from sqlalchemy import MetaData, Table, select
from sqlalchemy.orm import sessionmaker



Session = sessionmaker(bind=engine)
metadata = MetaData()
metadata.reflect(bind=engine)
building = Table(
    "building",
    metadata,
    autoload_with=engine,
    schema="smart_buildings"
)

def load_filter_options():
    session = Session()

    # Get distinct categories (assuming a 'category' column exists)
    category_query = select(building.c.type).distinct()
    categories = [row[0] for row in session.execute(category_query).fetchall()]

    # Get buildings grouped by category
    building_query = select(building.c.name, building.c.type)
    buildings_by_category = {}
    for name, category in session.execute(building_query):
        buildings_by_category.setdefault(category, []).append(name)
    
    return categories, buildings_by_category


categories, buildings_by_category = load_filter_options()
print(categories)



['Administración', 'Educación', 'Comercio', 'Punto Limpio', 'Casal/Centro Cívico', 'Cultura y Ocio', 'Restauración', 'Salud y Servicios Sociales', 'Bienestar Social', 'Mercado', 'Parque', 'Industrial', 'Centros Deportivos', 'Parking', 'Policia', 'Cementerio', 'Protección Civil']


In [3]:
from sqlalchemy import inspect
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine

from langchain.chat_models import init_chat_model
from langchain_core.documents import Document
from langchain_core.language_models import BaseChatModel
from agent.configuration import Configuration
from agent.state import RelevantInfoResponse
from agent.state import State
from typing import cast, Literal
model = init_chat_model('gpt-4o-mini', model_provider='openai',temperature=0.7)
query ="Cual es el edificio con mayor consumo en Educación dentro del último mes?"
configuration = Configuration()

database_schema = configuration.database_schema



  

  from .autonotebook import tqdm as notebook_tqdm


In [38]:
from time import time
from langchain_core.runnables import RunnableConfig
from agent.state import Router
from agent.configuration import Configuration
from langchain_core.messages import SystemMessage
from agent.utils import load_chat_model
async def detect_intent(state: State, *, config: RunnableConfig) -> dict[str, Router]:

    start_time = time()
    """Analyze the user's query and determine the appropriate routing.

    This function uses a language model to classify the user's query and decide how to route it
    within the conversation flow.

    Args:
        state (State): The current state of the agent, including conversation history.
        config (RunnableConfig): Configuration with the model used for query analysis.

    Returns:
        dict[str, Router]: A dictionary containing the 'router' key with the classification result (classification type and logic).
    """
    configuration = Configuration.from_runnable_config(config)

    time_1 = time()
    
    model = load_chat_model(configuration.query_model)

    time_2 = time()
    
    messages = [SystemMessage(content=configuration.router_system_prompt)] + state.recent_messages

    time_3 = time()
    
    response = cast(
        Router, await model.with_structured_output(Router).ainvoke(messages)
    )
    time_4 = time()

    print("Time taken for each step:")
    print(f"Step 1: {time_1 - start_time:.4f} seconds")
    print(f"Step 2: {time_2 - time_1:.4f} seconds")
    print(f"Step 3: {time_3 - time_2:.4f} seconds")
    print(f"Step 4: {time_4 - time_3:.4f} seconds")
   
    return response

async def route_query(state: State) -> Literal["extract_relevant_info", "ask_for_more_info", "respond_to_general_query"]:

    """Determine the next step based on the query classification.

    Args:
        state (State): The current state of the agent, including the router's classification.

    Returns:
        Literal["extract_relevant_info", "ask_for_more_info", "respond_to_general_query"]: The next step to take.

    Raises:
        ValueError: If an unknown router type is encountered.
    """
    ROUTE_MAP = {
    "database": "extract_relevant_info",
    "more-info": "ask_for_more_info",
    "general": "respond_to_general_query"
    }
    try:
        return ROUTE_MAP[state.router["type"]]
    except KeyError:
        raise ValueError(f"Unknown router type {state.router['type']}")

In [39]:
time_0 = time()
state = State(messages=[])
query="¿Cuál es el edificio con mayor consumo en Educación dentro del último mes?"
state.messages = [query]
response = await detect_intent(state, config=RunnableConfig())
time_4 = time()
state.router = {"type":response.type,"logic": response.logic}
router_map = await route_query(state)
time_5 = time()
print(f"Step 5: {time_5 - time_4:.4f} seconds")
print(f"total Time: {time_5 - time_0:.4f} seconds")


Time taken for each step:
Step 1: 0.0096 seconds
Step 2: 0.8770 seconds
Step 3: 0.0000 seconds
Step 4: 1.2182 seconds
Step 5: 0.0001 seconds
total Time: 2.1053 seconds


In [35]:
time_0 = time()
state = State(messages=[])
query="¿Cuál es el edificio con mayor consumo en Educación dentro del último mes?"
state.messages = [query]
response = detect_intent(state, config=RunnableConfig())
time_4 = time()
state.router = {"type":response.type,"logic": response.logic}
router_map = route_query(state)
time_5 = time()
print(f"Step 5: {time_5 - time_4:.4f} seconds")
print(f"total Time: {time_5 - time_0:.4f} seconds")


Time taken for each step:
Step 1: 0.0103 seconds
Step 2: 0.8605 seconds
Step 3: 0.0000 seconds
Step 4: 1.4023 seconds
Step 5: 0.0001 seconds
total Time: 2.2736 seconds


In [4]:
subset= {'building': ['id', 'name', 'type'],
  'building_energy_consumption_metrics': ['building_id',
   'building_name',
   'energy_consumption_kw_last_month',
   'energy_consumption_kw_previous_month',
   'energy_consumption_percentage_diff_previous_month',
   'energy_consumption_kw_min_last_month',
   'energy_consumption_kw_max_last_month']}

In [46]:
import yaml
from typing import Any, Dict, List
def load_schema_from_yaml(file_path) -> None:
    """Load the entire schema definition from a YAML file."""
    try:
        with open(file_path, 'r') as f:
            schema_data = yaml.safe_load(f)
        return build_schema_context(schema_data)
    except Exception as e:
        raise ValueError(f"Failed to load schema from YAML: {str(e)}")

def build_schema_context(schema_data) -> None:
    """ Build the schema context string from loaded schema data."""
    if not schema_data:
        raise ValueError("No schema data loaded")
        
    output_str = f"Schema: {schema_data['schema']}\n\n"

    for table in schema_data.get('tables', []):
        output_str += f"Table: {table['name']}\n"
        output_str += f"   Description: {table['description']}\n"

        for column in table.get('columns', []):
            output_str += f"     • Column: {column['name']}\n"
            output_str += f"       - Type: {column['type']}\n"
            output_str += f"       - Description: {column['description']}\n"

        output_str += "\n"


    
    return output_str

schema_file_path = "src/agent/schema_context.yaml"  # Path to your YAML file
schema_context = load_schema_from_yaml(schema_file_path)
print(schema_context)


Schema: smart_buildings

Table: building
   Description: Stores information about physical buildings, useful to query by name or other building attributes
     • Column: id
       - Type: integer
       - Description: Primary key identifier
     • Column: name
       - Type: varchar(255)
       - Description: Building name
     • Column: type
       - Type: text
       - Description: Type of building (e.g., EducaciÃ³n, AdministraciÃ³n, etc.)

Table: energy_consumption
   Description: Records building energy consumption by hour over time
     • Column: id
       - Type: serial
       - Description: Auto-incrementing primary key
     • Column: building_id
       - Type: integer
       - Description: Foreign key to buildings table
     • Column: date
       - Type: timestamp
       - Description: Timestamp of the reading
     • Column: consumption_kwh
       - Type: float8
       - Description: Energy consumption in kilowatt-hours

Table: building_energy_consumption_metrics
   Description

In [5]:
output_lines = []

for table in configuration.db_handler.schema_data.get('tables', []):
    table_name = table.get('name')
    if table_name in subset:
        output_lines.append(f"\nTable: {table_name}")
        output_lines.append("Columns:")

        # Map column names to definitions
        column_defs = {col['name']: col for col in table.get('columns', [])}
        requested_columns = subset[table_name]

        for col in requested_columns:
            col_def = column_defs.get(col)
            if col_def:
                output_lines.append(f"  - {col_def['name']}: {col_def['type']}")

print("\n".join(output_lines))


Table: building
Columns:
  - id: integer
  - name: varchar(255)
  - type: text

Table: building_energy_consumption_metrics
Columns:
  - building_id: int4
  - building_name: varchar(255)
  - energy_consumption_kw_last_month: float8
  - energy_consumption_kw_previous_month: float8
  - energy_consumption_percentage_diff_previous_month: float8
  - energy_consumption_kw_min_last_month: float8
  - energy_consumption_kw_max_last_month: float8


In [25]:
result

{'buildings': {'columns': {'id': {'name': 'id', 'type': 'integer'},
   'name': {'name': 'name', 'type': 'varchar(255)'}}},
 'building_energy_consumption_metrics': {'columns': {'building_id': {'name': 'building_id',
    'type': 'int4'},
   'building_name': {'name': 'building_name', 'type': 'varchar(255)'},
   'energy_consumption_kw_last_week': {'name': 'energy_consumption_kw_last_week',
    'type': 'float8'},
   'energy_consumption_kw_previous_week': {'name': 'energy_consumption_kw_previous_week',
    'type': 'float8'},
   'energy_consumption_percentage_diff_previous_week': {'name': 'energy_consumption_percentage_diff_previous_week',
    'type': 'float8'}}}}

In [12]:
query

'Cual es el edificio con mayor consumo en Educación dentro del último mes?'

In [2]:
prompt = configuration.relevant_info_system_prompt.format(
        schema_description=database_schema
    )

messages = [
        {"role": "system", "content": prompt}
    ] + [query]

model_response = cast(RelevantInfoResponse,await model.with_structured_output(RelevantInfoResponse).ainvoke(messages))


In [3]:
model_response

{'relevant_tables': ['building', 'building_energy_consumption_metrics'],
 'relevant_columns': {'building': ['id', 'name', 'type'],
  'building_energy_consumption_metrics': ['building_id',
   'building_name',
   'energy_consumption_kw_last_month',
   'energy_consumption_kw_previous_month',
   'energy_consumption_percentage_diff_previous_month',
   'energy_consumption_kw_min_last_month',
   'energy_consumption_kw_max_last_month']}}

In [None]:
{'building': ['id', 'name', 'address'],
 'building_energy_consumption_metrics': ['building_id','building_name','energy_consumption_kw_last_week','energy_consumption_kw_previous_week','energy_consumption_percentage_diff_previous_week']}

{'building': {'id': 'id', 'name': 'name', 'address': 'address'},
 'building_energy_consumption_metrics': {'building_id': 'building_id',
  'building_name': 'building_name',
  'energy_consumption_kw_last_week': 'energy_consumption_kw_last_week',
  'energy_consumption_kw_previous_week': 'energy_consumption_kw_previous_week',
  'energy_consumption_percentage_diff_previous_week': 'energy_consumption_percentage_diff_previous_week'}}

In [None]:
    schema_context = [f"schema_name: {db_handler.schema_name}"]
    for table, columns in state.relevant_columns.items():
        schema_context.append(f"Table: {table}")
        schema_context.append("Columns:")
        for col in columns:
            schema_context.append(f"  - {col}")
        schema_context.append("")

In [26]:
db_handler = configuration.db_handler
# Prepare schema context for LLM
schema_context = [f"schema_name: {db_handler.schema_name}"]
schema_context += [db_handler.get_table_schema(table) for table in model_response["relevant_tables"]]
 

In [28]:
print("".join(schema_context))

schema_name: smart_buildingstable_name: building
columns: 
  - name: id
    type: INTEGER
  - name: name
    type: VARCHAR
  - name: type
    type: VARCHAR
  - name: building_year
    type: INTEGER
  - name: build_surface
    type: INTEGER
  - name: active_morning_proportion
    type: DOUBLE_PRECISION
  - name: active_afternoon_proportion
    type: DOUBLE_PRECISION
  - name: active_night_proportion
    type: DOUBLE_PRECISION
  - name: active_laborweek_proportion
    type: DOUBLE_PRECISION
  - name: active_weekend_proportion
    type: DOUBLE_PRECISION
  - name: location_id
    type: INTEGER
  - name: cadastral_reference
    type: VARCHAR
  - name: cups
    type: VARCHAR
  - name: floors_above_ground
    type: INTEGER
  - name: floors_below_ground
    type: INTEGER
table_name: building_energy_consumption_metrics
columns: 
  - name: building_id
    type: INTEGER
  - name: building_name
    type: VARCHAR
  - name: energy_consumption_kw_last_week
    type: DOUBLE_PRECISION
  - name: energy_

In [12]:
    
import yaml
from pathlib import Path
from typing import Optional, Dict, Any

def load_schema_from_yaml(file_path: Path) -> None:
    """Load the entire schema definition from a YAML file."""
    try:
        with open(file_path, 'r') as f:
            schema_data = yaml.safe_load(f)
        schema_context = build_schema_context(schema_data)
        return schema_context
    except Exception as e:
        raise ValueError(f"Failed to load schema from YAML: {str(e)}")

def build_schema_context(schema_data) -> None:
    """ Build the schema context string from loaded schema data."""

    context_lines = []
    context_lines.append(f"Schema: {schema_data['schema']}")
    context_lines.append(f"Description: {schema_data.get('description', 'No description')}")

    for table in schema_data.get('tables', []):
        context_lines.append(f"\nTable: {table['name']}")
        context_lines.append(f"Description: {table.get('description', 'No description')}")
        
        for column in table.get('columns', []):
            context_lines.append(f"  Column: {column['name']} - {column.get('description', 'No description')}")
    schema_context = "\n".join(context_lines)
    return schema_context
# Example usage
schema_file = Path("src/agent/schema_context.yaml")
schema_context = load_schema_from_yaml(schema_file)

In [14]:
print(schema_context)


Schema: smart_building
Description: Contains energy consumption data for smart buildings

Table: buildings
Description: Stores information about physical buildings, useful to query by name or other building attributes
  Column: id - Primary key identifier
  Column: name - Building name
  Column: address - Physical address of the building

Table: energy_consumption
Description: Records building energy consumption by hour over time
  Column: id - Auto-incrementing primary key
  Column: building_id - Foreign key to buildings table
  Column: date - Timestamp of the reading
  Column: consumption_kwh - Energy consumption in kilowatt-hours

Table: building_energy_consumption_metrics
Description: Provides aggregated energy consumption metrics per building with weekly/monthly comparisons. Useful to get weekly/monthly energy consumption metrics for a building.
  Column: building_id - Foreign key to buildings table
  Column: building_name - Name of the building
  Column: energy_consumption_kw_las

In [20]:
from sqlalchemy import text
from sqlalchemy.orm import Session

def fetch_unique_column_values(session: Session, table_name: str, columns: list[str]) -> dict[str, list[str]]:
    values_by_column = {}
    for col in columns:
        query = text(f"SELECT DISTINCT {col} FROM {table_name} WHERE {col} IS NOT NULL")
        result = session.execute(query).fetchall()
        values = [str(row[0]) for row in result]
        values_by_column[col] = values
    return values_by_column

# Example usage
with engine.connect() as connection:
     with Session(connection) as session:
         
         table_name = 'smart_buildings.building'
         columns = ['name', 'type']
         unique_values = fetch_unique_column_values(session, table_name, columns)



In [7]:
import pickle
def save_vectorstore(vectorstore: dict, save_path: str):
    with open(save_path, "wb") as f:
        pickle.dump(vectorstore, f)

save_vectorstore(unique_values, "src/vectorstore.pkl")

In [11]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
# Load the model
model = SentenceTransformer('all-MiniLM-L6-v2')

def build_vectorstore(values_by_column: dict[str, list[str]], model: SentenceTransformer) -> dict:
    vectorstore = {}
    for col, values in values_by_column.items():
        embeddings = model.encode(values)
        index = faiss.IndexFlatL2(embeddings.shape[1])
        index.add(np.array(embeddings))
        vectorstore[col] = {
            "index": index,
            "values": values
        }
    return vectorstore

# Build the vectorstore
vectorstore = build_vectorstore(unique_values, model)
# Save the vectorstore to a file
save_vectorstore(vectorstore, "src/vectorstore.pkl")

In [13]:
vectorstore["name"]["values"]

['Auditori - Conservatori',
 'Casa de Casablanca',
 'Associació De Pensionistes I Jubilats',
 'Ceip Federico García Lorca',
 'Pista de Botxes Benviure',
 'Escuela Antoni Tàpies',
 'Cp Especial Secanet',
 'Escola/CEIP Serrallo',
 'Cp La Hispanidad',
 'Proteccion Civil',
 'Associació de Veïns Colomí Parc',
 'Local Municipal De La Ermita',
 'Edifici Antic Hospital',
 'Escola Pau Casals',
 'Casal De Barri La Unión',
 'Escuela Antoni Gaudí',
 'Annex Ateneu Pablo Picasso',
 'Biblioteca Municipal',
 'Oficina Recursos Humanos',
 'Consultorio Medico Cala',
 'Cap Montbaig',
 'Camp Municipal De Futbol',
 'Deixalleria - Centre Logístic Municipal',
 'Habitatge Carrer Nou 3, 1º 2ª',
 'Viladecans Informació (ovi)',
 'Casal Municipal De Ponent',
 'Complejo Deportivo Ángel Nieto',
 'Mercado Municipal Y Urbanismo',
 'La Gralla',
 'Sede Administrativa',
 'Punt De Trobada Jove',
 'Estadio de Fútbol Joan Baptista Milà',
 'Lago Parque de la Muntanyeta',
 'Recinto de Servicios Municipales',
 'Escola Mestral 

In [6]:
import faiss
import numpy as np
import pickle
from sentence_transformers import SentenceTransformer
# Load the model
model = SentenceTransformer('all-MiniLM-L6-v2')

def load_vectorstore(save_path: str) -> dict:
    with open(save_path, "rb") as f:
        vectorstore = pickle.load(f)

    return vectorstore

# Load the vectorstore from a file
vectorstore = load_vectorstore("src/vectorstore.pkl")
# Example usage
query = "¿Cómo han sido el consumo mínimo del edificio Atrium Esports en comparación con los picos máximos?"
query_embedding = model.encode([query])[0]
query_embedding = np.array(query_embedding).reshape(1, -1)
index = vectorstore["name"]["index"]
D, indices = index.search(query_embedding, k=5)  # Get top 5 nearest neighbors
values = vectorstore["name"]["values"]
results = [values[i] for i in indices[0]]  # Get the corresponding values
print("Top 5 results:", results)


Top 5 results: ['Atrium Viladecans Esports', 'Atrium Arts Escèniques Teatre', 'Campo de Fútbol Marianao', 'Guardería La Marta', 'Camp Municipal De Futbol Torre-roja']


In [4]:
from sqlalchemy.dialects import registry
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import inspect
inspector = inspect(engine)
try:
    print(inspector.get_table_names(schema='public'))
    print(inspector.get_view_names(schema='public'))
except SQLAlchemyError as e:
    print(f"Error al obtener nombres de tablas: {e}")


['buildings', 'energy_consumption', 'building_energy_consumption_metrics']
[]


In [8]:
inspector = inspect(engine)
try:
    # Get the schema of the table
    schema_name= DB_SCHEMA
 
    columns = inspector.get_columns('building_energy_consumption_metrics', schema=schema_name)
    schema = "table_name: " + 'location' + "\n"
    schema += "columns: \n"
    for column in columns:
        schema += f"  - name: {column['name']}\n"
        schema += f"    type: {column['type'].__class__.__name__}\n"
    
    print(schema)
except SQLAlchemyError as e:
    print(f"Error al obtener el esquema de la tabla location: {e}")
    

table_name: location
columns: 
  - name: building_id
    type: INTEGER
  - name: building_name
    type: VARCHAR
  - name: energy_consumption_kw_last_week
    type: DOUBLE_PRECISION
  - name: energy_consumption_kw_previous_week
    type: DOUBLE_PRECISION
  - name: energy_consumption_percentage_diff_previous_week
    type: DOUBLE_PRECISION
  - name: energy_consumption_kw_last_month
    type: DOUBLE_PRECISION
  - name: energy_consumption_kw_previous_month
    type: DOUBLE_PRECISION
  - name: energy_consumption_percentage_diff_previous_month
    type: DOUBLE_PRECISION



In [11]:
DB_SCHEMA

'smart_buildings'

In [24]:
from sqlalchemy import inspect, text
# Define the query
schema = DB_SCHEMA
query = "SELECT * FROM smart_buildings.location LIMIT 5"
with engine.begin() as connection:  # begin() ensures one transaction context
    if schema:
        connection.execute(text(f"SET search_path TO {schema}"))
    result = connection.execute(text(query))
    query_result = result.fetchall()

query_result
            

[(1, 'Vilaseca'),
 (3, 'Tarragona'),
 (4, 'Santboi'),
 (5, 'Boadilla'),
 (6, 'Viladecans')]

In [2]:
from sqlalchemy import create_engine,text,inspect

# Replace 'your_database.db' with the path to your SQLite file
db_path = "energy_consumption.db"
engine = create_engine(f"sqlite:///{db_path}")

# Test the connection
with engine.connect() as connection:
    result = connection.execute(text("SELECT sqlite_version();"))
    print("SQLite Version:", result.scalar())

SQLite Version: 3.45.3


In [3]:
def get_table_names(db_path):
    """Returns a list of table names in the SQLite database."""
    engine = create_engine(f"sqlite:///{db_path}")
    inspector = inspect(engine)
    return inspector.get_table_names()

def get_table_schema(db_path, table_names):
    """Returns a dictionary containing schema details for given table names."""
    engine = create_engine(f"sqlite:///{db_path}")
    inspector = inspect(engine)
    
    schema = {}
    for table in table_names:
        columns = inspector.get_columns(table)
        schema[table] = [
            {"name": col["name"], "type": str(col["type"]), "nullable": col["nullable"]}
            for col in columns
        ]
    
    return schema

table_names = get_table_names(db_path)
print("Table Names:", table_names)

table_schema = get_table_schema(db_path, table_names)
print("Table Schema:")
for table, columns in table_schema.items():
    print(f"\n{table}")
    for col in columns:
        print(f"{col['name']}: {col['type']} (nullable: {col['nullable']})")

Table Names: ['buildings', 'energy_metrics', 'weather_data']
Table Schema:

buildings
building_id: INTEGER (nullable: True)
name: TEXT (nullable: False)
building_type: TEXT (nullable: False)
construction_year: INTEGER (nullable: True)
total_area: REAL (nullable: True)
cluster_id: INTEGER (nullable: True)
location: TEXT (nullable: True)
address: TEXT (nullable: True)
floors: INTEGER (nullable: True)
occupancy_rate: REAL (nullable: True)
heating_type: TEXT (nullable: True)
cooling_type: TEXT (nullable: True)
last_renovation_year: INTEGER (nullable: True)
energy_certificate: TEXT (nullable: True)

energy_metrics
metric_id: INTEGER (nullable: True)
building_id: INTEGER (nullable: True)
date: DATE (nullable: True)
weekly_consumption_kwh: REAL (nullable: True)
monthly_consumption_kwh: REAL (nullable: True)
last_week_consumption_kwh: REAL (nullable: True)
last_month_consumption_kwh: REAL (nullable: True)
consumption_vs_cluster_pct: REAL (nullable: True)
consumption_vs_type_pct: REAL (nullable

In [6]:
print(table_schema)

{'buildings': [{'name': 'building_id', 'type': 'INTEGER', 'nullable': True}, {'name': 'name', 'type': 'TEXT', 'nullable': False}, {'name': 'building_type', 'type': 'TEXT', 'nullable': False}, {'name': 'construction_year', 'type': 'INTEGER', 'nullable': True}, {'name': 'total_area', 'type': 'REAL', 'nullable': True}, {'name': 'cluster_id', 'type': 'INTEGER', 'nullable': True}, {'name': 'location', 'type': 'TEXT', 'nullable': True}, {'name': 'address', 'type': 'TEXT', 'nullable': True}, {'name': 'floors', 'type': 'INTEGER', 'nullable': True}, {'name': 'occupancy_rate', 'type': 'REAL', 'nullable': True}, {'name': 'heating_type', 'type': 'TEXT', 'nullable': True}, {'name': 'cooling_type', 'type': 'TEXT', 'nullable': True}, {'name': 'last_renovation_year', 'type': 'INTEGER', 'nullable': True}, {'name': 'energy_certificate', 'type': 'TEXT', 'nullable': True}], 'energy_metrics': [{'name': 'metric_id', 'type': 'INTEGER', 'nullable': True}, {'name': 'building_id', 'type': 'INTEGER', 'nullable':

In [7]:
query = "SELECT name, building_type, construction_year, total_area, occupancy_rate FROM buildings LIMIT 5;"
with engine.connect() as connection:
    result = connection.execute(text(query))
    rows = result.fetchall()
    for row in rows:
        print(row)


('Edificio Linda Fields', 'Industrial', 1964, 987.71, 0.45)
('Edificio Walker Vista', 'Hospital', 1954, 1081.05, 0.63)
('Edificio Hicks Greens', 'Hospital', 1985, 16283.89, 0.82)
('Edificio Ryan Parks', 'Residencial', 1993, 2493.1, 0.53)
('Edificio Reid Park', 'Hospital', 2018, 2934.11, 0.66)


In [17]:
import yaml
from pathlib import Path

# Load from file
def load_schema_definition(file_path):
    with open(file_path, 'r') as f:
        return yaml.safe_load(f)

# Example usage
schema_file = Path(r'C:\Users\scerda\Documents\langgraph-postgres-energy-data\src\agent\schema_context.yaml')
schema_data = load_schema_definition(schema_file)

def load_schema_context(schema_data):
    """Load schema details from the provided data structure."""
    schema_description = []
    # Access the schema information
    schema_description.append(f"Schema: {schema_data['schema']}")

    # Access table information
    for table in schema_data['tables']:
    
        schema_description.append(f"\nTable: {table['name']}")

        schema_description.append(f"Description: {table['description']}")
        for column in table['columns']:
    
            schema_description.append(f"  Column: {column['name']} - {column['description']}")
    
    return "\n".join(schema_description)

def load_table_description(schema_data, table_name):
    """Load table description from the provided data structure."""
    for table in schema_data['tables']:
        if table['name'] == table_name:
            return {"name": table["name"],
                    "description": table["description"],
                    "columns": table["columns"]}
    return None

load_table_description(schema_data, 'buildings')

{'name': 'buildings',
 'description': 'Stores information about physical buildings, useful to query by name or other building attributes',
 'columns': [{'name': 'id',
   'type': 'integer',
   'description': 'Primary key identifier'},
  {'name': 'name', 'type': 'varchar(255)', 'description': 'Building name'},
  {'name': 'address',
   'type': 'text',
   'description': 'Physical address of the building'}]}