Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
973a433
First attempt at genericizing data source
jcheng5 Apr 4, 2025
8de0ac7
Unify prompts by adding chevron Python dependency
jcheng5 Apr 4, 2025
53c7df3
Make prompt aware of what engine is being used
jcheng5 Apr 18, 2025
a2122f2
Replace SQLite support with SQLAlchemy support
jcheng5 Apr 18, 2025
a218fb9
Don't fail when given table name's case differs from SQLAlchemy Inspe…
jcheng5 Apr 23, 2025
dc0814e
Forgot import
jcheng5 May 1, 2025
9d95d1d
Have server() return proper class with typed methods, instead of dict
jcheng5 Jun 2, 2025
aeb87dd
Auto-create sqlite database for example
jcheng5 Jun 2, 2025
c38b567
Have init() take data frame or sqlalchemy engine directly
jcheng5 Jun 2, 2025
e7972e8
Merge remote-tracking branch 'origin/main' into generic-datasource-im…
jcheng5 Jun 3, 2025
57922b3
Use GPT-4.1 by default, not GPT-4, yuck
jcheng5 Jun 3, 2025
84d30ad
Merge remote-tracking branch 'origin/generic-datasource' into generic…
jcheng5 Jun 3, 2025
a08764b
Update README
jcheng5 Jun 3, 2025
374bdfb
this should significantly speed up schema generation
npelikan Jun 6, 2025
e294b1b
another speedup
npelikan Jun 6, 2025
b179ea6
ruff formatting
npelikan Jun 6, 2025
2cbe199
updating so formatting checks pass
npelikan Jun 6, 2025
8f59aa7
adding a generic r datasource
npelikan Jun 7, 2025
2ececf5
critical change: should return a lazy table rather than executing by …
npelikan Jun 7, 2025
f4ca445
edits to test suite and devtools::check() passing
npelikan Jun 7, 2025
c9b03da
Merge pull request #1 from posit-dev/main
npelikan Jun 7, 2025
48503f0
example update
npelikan Jun 7, 2025
4809615
error message for a footgun
npelikan Jun 9, 2025
a1ae3b6
Merge branch 'main' into r-generic-datasource
npelikan Jun 12, 2025
24ef182
Merge pull request #4 from npelikan/r-generic-datasource
npelikan Jun 12, 2025
3b289c7
update to use s3 classes to simplify the code
npelikan Jun 19, 2025
7052d6e
Merge pull request #5 from npelikan/r-generic-datasource
npelikan Jun 19, 2025
146777a
README update
npelikan Jun 19, 2025
9911965
added injection of SQL dialect into prompt. Also cleaned up test naming
npelikan Jun 19, 2025
8d05d7f
more simplification
npelikan Jun 19, 2025
b18b570
Merge branch 'main' into main
npelikan Jun 25, 2025
41c9e1e
merge fix
npelikan Jun 25, 2025
e347110
small dep edit
npelikan Jun 26, 2025
753c5af
Code review
jcheng5 Jun 26, 2025
1ee065b
more tests, and code review edits
npelikan Jun 26, 2025
5492b0f
testing changes
npelikan Jun 27, 2025
1ff4fe5
more test passing
npelikan Jun 27, 2025
eb9104c
cleaning up gitignores
npelikan Jun 27, 2025
09231fa
updating python datasource to prevent collisions
npelikan Jun 27, 2025
9e53ca3
Merge remote-tracking branch 'posit-dev/main'
npelikan Jul 1, 2025
150e550
fix for github actions
npelikan Jul 1, 2025
c589444
adding tests to python github action (as we have some tests now!)
npelikan Jul 1, 2025
98b2f29
edits for gha
npelikan Jul 1, 2025
3fd17e4
makefile edit
npelikan Jul 1, 2025
e6731be
air format
npelikan Jul 8, 2025
d45820f
code cleanup, better tests, and dropping `glue` dependency
npelikan Jul 9, 2025
3f55974
Fix error in qc.df() when no query is active
jcheng5 Jul 16, 2025
395e116
Adding dplyr::sql() identifier to get_lazy_query() to fix failing tests.
npelikan Jul 17, 2025
d86888d
adding more tests to cover the empty execute_data query use case and …
npelikan Jul 17, 2025
765250e
description edit to pass routine test
npelikan Jul 17, 2025
6432fa1
edit to remove `tbl` output per discussion on #28
npelikan Jul 28, 2025
de0a31e
better data source nested identifier handling
npelikan Jul 29, 2025
b6eeb4a
fixing a missing quote identifier
npelikan Jul 29, 2025
32a65fc
doc cleanup
npelikan Jul 29, 2025
1325ed1
a bit more helpful error message
npelikan Jul 29, 2025
0d01d82
even more helpful erroring
npelikan Jul 29, 2025
982c58f
Merge remote-tracking branch 'posit-dev/main'
npelikan Aug 1, 2025
08daee8
Merge remote-tracking branch 'posit-dev/main'
npelikan Aug 27, 2025
bd04d25
Merge remote-tracking branch 'posit-dev/main'
npelikan Sep 3, 2025
bc2ce5a
fix to df_to_html
npelikan Sep 3, 2025
82f2e40
formatting
npelikan Sep 3, 2025
fc53bb0
more formatting
npelikan Sep 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions pkg-py/src/querychat/querychat.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,20 @@ def df_to_html(df: IntoFrame, maxrows: int = 5) -> str:
HTML string representation of the table

"""
# Convert to Narwhals DataFrame if it's not already one
if isinstance(df, (nw.LazyFrame, nw.DataFrame)):
df_short = df.lazy().head(maxrows).collect()
nrow_full = df.lazy().select(nw.len()).collect().item()
nw_df = df
else:
raise TypeError("df must be a Narwhals DataFrame or LazyFrame")
# Try to convert using nw.from_native (supports pandas and other formats)
try:
nw_df = nw.from_native(df)
except Exception as e:
raise TypeError(
"df must be a Narwhals DataFrame, LazyFrame, or compatible DataFrame (e.g., pandas)",
) from e

df_short = nw_df.lazy().head(maxrows).collect()
nrow_full = nw_df.lazy().select(nw.len()).collect().item()

# Generate HTML table
table_html = df_short.to_pandas().to_html(
Expand Down
116 changes: 116 additions & 0 deletions pkg-py/tests/test_query_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import sqlite3
import tempfile
from pathlib import Path

import pandas as pd
import pytest
from sqlalchemy import create_engine
from src.querychat.datasource import DataFrameSource, SQLAlchemySource
from src.querychat.querychat import df_to_html


@pytest.fixture
def sample_dataframe():
"""Create a sample pandas DataFrame for testing."""
return pd.DataFrame(
{
"id": [1, 2, 3, 4, 5],
"name": ["Alice", "Bob", "Charlie", "Diana", "Eve"],
"age": [25, 30, 35, 28, 32],
"salary": [50000, 60000, 70000, 55000, 65000],
},
)


@pytest.fixture
def test_db_engine_with_data():
"""Create a temporary SQLite database with test data."""
temp_db = tempfile.NamedTemporaryFile(delete=False, suffix=".db") # noqa: SIM115
temp_db.close()

conn = sqlite3.connect(temp_db.name)
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE employees (
id INTEGER PRIMARY KEY,
name TEXT,
age INTEGER,
salary REAL
)
""")

test_data = [
(1, "Alice", 25, 50000),
(2, "Bob", 30, 60000),
(3, "Charlie", 35, 70000),
(4, "Diana", 28, 55000),
(5, "Eve", 32, 65000),
]

cursor.executemany(
"INSERT INTO employees (id, name, age, salary) VALUES (?, ?, ?, ?)",
test_data,
)

conn.commit()
conn.close()

engine = create_engine(f"sqlite:///{temp_db.name}")
yield engine

# Cleanup
Path(temp_db.name).unlink()


def test_df_to_html_with_dataframe_source_result(sample_dataframe):
"""Test that df_to_html() works with results from DataFrameSource.execute_query()."""
source = DataFrameSource(sample_dataframe, "employees")

# Execute query to get pandas DataFrame
result_df = source.execute_query("SELECT * FROM employees WHERE age > 25")

# This should succeed after the fix
html_output = df_to_html(result_df)

# Verify the HTML contains expected content
assert isinstance(html_output, str)
assert "<table" in html_output
assert "Bob" in html_output
assert "Charlie" in html_output
assert "Diana" in html_output
assert "Eve" in html_output


def test_df_to_html_with_sqlalchemy_source_result(test_db_engine_with_data):
"""Test that df_to_html() works with results from SQLAlchemySource.execute_query()."""
source = SQLAlchemySource(test_db_engine_with_data, "employees")

# Execute query to get pandas DataFrame
result_df = source.execute_query("SELECT * FROM employees WHERE age > 25")

# This should succeed after the fix
html_output = df_to_html(result_df)

# Verify the HTML contains expected content
assert isinstance(html_output, str)
assert "<table" in html_output
assert "Bob" in html_output
assert "Charlie" in html_output
assert "Diana" in html_output
assert "Eve" in html_output


def test_df_to_html_with_truncation(sample_dataframe):
"""Test that df_to_html() properly truncates large datasets."""
source = DataFrameSource(sample_dataframe, "employees")

# Execute query to get all rows
result_df = source.execute_query("SELECT * FROM employees")

# Test with maxrows=3 to trigger truncation
html_output = df_to_html(result_df, maxrows=3)

# Should show truncation message
assert "Showing only the first 3 rows out of 5" in html_output
assert "<table" in html_output
Loading