In [1]:
from IPython.display import Markdown, display

In [2]:
from sqlalchemy import (
    create_engine,
    MetaData,
    Table,
    Column,
    String,
    Integer,
    select,
)
from sqlalchemy import text

In [3]:
# Database connection parameters
db_params = {
    'dbname': 'db_daily_weather',
    'user': 'sadewawicak',
    'password': 'postgres',
    'host': 'localhost',
    'port': '5432'
}

# Connect to the PostgreSQL database
# Create the connection string
connection_string = f"postgresql+psycopg2://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['dbname']}?options=-csearch_path=public"

# Create the SQLAlchemy engine
engine = create_engine(connection_string)
metadata_obj = MetaData()

In [4]:
from llama_index.core import SQLDatabase, ServiceContext
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
Settings.llm = Ollama(model="mistral", request_timeout=120.0)
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

In [7]:
# from ollama import ServiceContext, LLM

llm = Ollama(model="mistral", request_timeout=60.0)

# try to connect and response
# response = llm.complete("What is the capital of Indonesia?")
# response

In [8]:
sql_database = SQLDatabase(engine, include_tables=["weather_indonesia"])

In [9]:
stmt = select(
    Column('time'),
    Column('city'),
    Column('sunrise'),
    Column('sunset')
).select_from(text('weather_indonesia'))
stmt

<sqlalchemy.sql.selectable.Select object at 0x307e1f8c0>

In [10]:
with engine.connect() as connection:
    results = connection.execute(stmt).fetchall()
    print(results)

[('2024-01-01', 'Yogyarakrta', '2024-01-01T05:24', '2024-01-01T17:59'), ('2024-01-02', 'Yogyarakrta', '2024-01-02T05:25', '2024-01-02T17:59'), ('2024-01-03', 'Yogyarakrta', '2024-01-03T05:26', '2024-01-03T18:00'), ('2024-01-04', 'Yogyarakrta', '2024-01-04T05:26', '2024-01-04T18:00'), ('2024-01-05', 'Yogyarakrta', '2024-01-05T05:27', '2024-01-05T18:00'), ('2024-01-06', 'Yogyarakrta', '2024-01-06T05:27', '2024-01-06T18:01'), ('2024-01-07', 'Yogyarakrta', '2024-01-07T05:28', '2024-01-07T18:01'), ('2024-01-08', 'Yogyarakrta', '2024-01-08T05:28', '2024-01-08T18:02'), ('2024-01-09', 'Yogyarakrta', '2024-01-09T05:29', '2024-01-09T18:02'), ('2024-01-10', 'Yogyarakrta', '2024-01-10T05:29', '2024-01-10T18:02'), ('2024-01-11', 'Yogyarakrta', '2024-01-11T05:30', '2024-01-11T18:02'), ('2024-01-12', 'Yogyarakrta', '2024-01-12T05:30', '2024-01-12T18:03'), ('2024-01-13', 'Yogyarakrta', '2024-01-13T05:31', '2024-01-13T18:03'), ('2024-01-14', 'Yogyarakrta', '2024-01-14T05:31', '2024-01-14T18:03'), ('202

In [11]:
from llama_index.core.indices.struct_store.sql_query import (
    SQLTableRetrieverQueryEngine,
)
from llama_index.core.objects import (
    SQLTableNodeMapping,
    ObjectIndex,
    SQLTableSchema,
)
from llama_index.core.query_engine import NLSQLTableQueryEngine
from llama_index.core import VectorStoreIndex

query_engine = NLSQLTableQueryEngine(
    sql_database=sql_database,
    tables=["weather_indonesia"],
)

In [12]:
query_str = "Which city in the table?"
response = query_engine.query(query_str)
response

Response(response=' Based on the given query and SQL response, it seems there was an error in executing the SQL statement provided. I cannot directly synthesize a response from the query results because no results were returned due to the error. However, I can tell you that the SQL statement "SELECT DISTINCT city FROM weather\\_indonesia;" is intended to retrieve unique cities from the `weather_indonesia` table if it exists and has a column named \'city\'. If you have access to a working database with this table, please ensure its syntax and structure are correct before running this query.', source_nodes=[NodeWithScore(node=TextNode(id_='7e6d0657-0df0-4b69-8eb4-39d251d0635e', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="Error: Statement 'SELECT DISTINCT city FROM weather\\\\_indonesia;' is invalid SQL.", mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', meta

In [45]:
query_str = "Return the top 5 temperature_2m_max with the highest value?"
response = query_engine.query(query_str)
response

Response(response=' Based on the query results, the top 5 cities with the highest temperature_2m_max values are all "Yogyarakrta" with the following temperatures: [33.8, 33.5, 33.4, 33.2, 33.2].', source_nodes=[NodeWithScore(node=TextNode(id_='589b2b7b-a1a9-4043-a3c8-d2c49f5d1f52', embedding=None, metadata={'sql_query': 'SELECT city, temperature_2m_max FROM weather_indonesia ORDER BY temperature_2m_max DESC LIMIT 5;', 'result': [('Yogyarakrta', 33.8), ('Yogyarakrta', 33.5), ('Yogyarakrta', 33.4), ('Yogyarakrta', 33.2), ('Yogyarakrta', 33.2)], 'col_keys': ['city', 'temperature_2m_max']}, excluded_embed_metadata_keys=['sql_query', 'result', 'col_keys'], excluded_llm_metadata_keys=['sql_query', 'result', 'col_keys'], relationships={}, text="[('Yogyarakrta', 33.8), ('Yogyarakrta', 33.5), ('Yogyarakrta', 33.4), ('Yogyarakrta', 33.2), ('Yogyarakrta', 33.2)]", mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: 

In [13]:
# Query Time Retrieval for tables for text-to-sql
from llama_index.core.indices.struct_store.sql_query import (
    SQLTableRetrieverQueryEngine,
)
from llama_index.core.objects import (
    SQLTableNodeMapping,
    ObjectIndex,
    SQLTableSchema,
)
from llama_index.core import VectorStoreIndex


In [14]:
weather_stats_text = (
    """
    You are an AI trained to analyze weather data for Indonesia. The data is stored in a table called "weather_indonesia" with the following columns and descriptions:

    time: The date of weather data (format: text, ISO8601).
    weather_code: The most severe weather condition on a given day (WMO code, float).
    temperature_2m_max: Maximum daily air temperature at 2 meters above ground (°C, float).
    temperature_2m_min: Minimum daily air temperature at 2 meters above ground (°C, float).
    sunrise: Sun rise times (ISO8601 text).
    sunset: Sun set times (ISO8601 text).
    daylight_duration: Number of seconds of daylight per day (float).
    sunshine_duration: The number of seconds of sunshine per day (float). Sunshine duration is always less than daylight duration.
    uv_index_max: Maximum UV index during the day (float).
    uv_index_clear_sky_max: Maximum UV index under clear sky conditions (float).
    wind_speed_10m_max: Maximum wind speed at 10 meters above ground (float, km/h).
    wind_direction_10m_dominant: Dominant wind direction at 10 meters above ground (float, degrees).
    city: Name of the city (text).

    Important Note: If you are unsure of the correct answer or the generated SQL query is incorrect, respond with: "I can't answer that."
    """
)


sql_database_ = SQLDatabase(engine)
table_node_mapping = SQLTableNodeMapping(sql_database_)
table_schema_objs = [
    SQLTableSchema(table_name="weather_indonesia", context_str=weather_stats_text)  # modify the tuple to a string
]  # add a SQLTableSchema for each table

obj_index = ObjectIndex.from_objects(
    table_schema_objs,
    table_node_mapping,
    VectorStoreIndex,
)
query_engine = SQLTableRetrieverQueryEngine(
    sql_database_, obj_index.as_retriever(similarity_top_k=1)
)

In [18]:
response = query_engine.query("What is the highest city in the data?")
response.response.split()
# display(Markdown(f"{response}"))

' The query result would depend on the availability of a table containing city elevation data. Based on the given query and the provided table description, it appears that such information is not available in the \'weather_indonesia\' table. Therefore, I cannot provide an answer to the question "What is the highest city in the data?" with certainty.'