In [2]:
# Run imports
import ibis
from ibis import _
import easier as ezr
import pandas as pd


In [3]:
# Should be no tables until you run this notebook through once

conn = ibis.postgres.connect(url=ezr.pg_creds_from_env())
print(conn.list_tables())

['one', 'two']


In [3]:
# Create tables I will use for testing
pg = ezr.PG(**ezr.pg_creds_from_env(kind='dict'))
pg.query("""
-- Deleting the tables if they already exist
DROP TABLE IF EXISTS one CASCADE;
DROP TABLE IF EXISTS two CASCADE;

-- Creating the 'two' table
CREATE TABLE two (
    id SERIAL PRIMARY KEY,
    time TIMESTAMP WITH TIME ZONE,
    junk INTEGER
);

-- Creating the 'one' table
CREATE TABLE one (
    id SERIAL PRIMARY KEY,
    time TIMESTAMP WITH TIME ZONE,
    two_id INTEGER, 
    junk INTEGER
);

""")
pg.run()


# Populate tables with two dataframes
conn = ibis.postgres.connect(url=ezr.pg_creds_from_env())

df_one = pd.DataFrame([
    {'id': 1, 'two_id': 10, 'time': pd.Timestamp('2023-12-01 00:01:38.966677'), 'junk': 1},
    {'id': 2, 'two_id': 20, 'time': pd.Timestamp('2023-12-01 00:02:50.506458'), 'junk': 1},
])
df_two = pd.DataFrame([
    {'id': 10,  'time': pd.Timestamp('12/10/2023'), 'junk': 1},
    {'id': 20,  'time': pd.Timestamp('12/20/2023'), 'junk': 1},
])

one = conn.tables.one
two = conn.tables.two


conn.insert('one', df_one)
conn.insert('two', df_two)

In [4]:
# Construct an ibis query
one = conn.tables.one
one = one[one.id.isin((1, 2, ))]
one = one[['id', 'two_id', 'time']].rename({'one_time': 'time'})
two = conn.tables.two
two = two[['id',]]
three = one.left_join(two, (one.two_id==two.id))

# Print the sql the query should run
print()
ibis.show_sql(three)
print()

# Actually execute the query.  Everything looks as expected.
three.execute()



WITH t0 AS (
  SELECT
    t4.id AS id,
    t4.time AS time,
    t4.two_id AS two_id,
    t4.junk AS junk
  FROM one AS t4
  WHERE
    t4.id IN (1, 2)
), t1 AS (
  SELECT
    t0.id AS id,
    t0.two_id AS two_id,
    t0.time AT TIME ZONE 'UTC' AS time
  FROM t0
), t3 AS (
  SELECT
    t1.id AS id,
    t1.two_id AS two_id,
    t1.time AT TIME ZONE 'UTC' AS one_time
  FROM t1
), t2 AS (
  SELECT
    t4.id AS id
  FROM two AS t4
)
SELECT
  CAST(t3.id AS INT) AS id,
  t3.two_id,
  t3.one_time AT TIME ZONE 'UTC' AS one_time,
  CAST(t2.id AS INT) AS id_right
FROM t3
LEFT OUTER JOIN t2
  ON t3.two_id = t2.id



Unnamed: 0,id,two_id,one_time,id_right
0,1,10,2023-12-01 00:01:38.966677+00:00,10
1,2,20,2023-12-01 00:02:50.506458+00:00,20


## Unexpected results here.
I do an exact copy/paste of the sql above and create a table from it.
The returned datframe does not have timezone aware timestamps for the `one_time` field. It seems to me like it should.

In [5]:
table = conn.sql("""
WITH t0 AS (
  SELECT
    t4.id AS id,
    t4.time AS time,
    t4.two_id AS two_id,
    t4.junk AS junk
  FROM one AS t4
  WHERE
    t4.id IN (1, 2)
), t1 AS (
  SELECT
    t0.id AS id,
    t0.two_id AS two_id,
    t0.time AT TIME ZONE 'UTC' AS time
  FROM t0
), t3 AS (
  SELECT
    t1.id AS id,
    t1.two_id AS two_id,
    t1.time AT TIME ZONE 'UTC' AS one_time
  FROM t1
), t2 AS (
  SELECT
    t4.id AS id
  FROM two AS t4
)
SELECT
  CAST(t3.id AS INT) AS id,
  t3.two_id,
  t3.one_time AT TIME ZONE 'UTC' AS one_time,
  CAST(t2.id AS INT) AS id_right
FROM t3
LEFT OUTER JOIN t2
  ON t3.two_id = t2.id
    """, 
    # schema=output_schema
)
table.execute()

Unnamed: 0,id,two_id,one_time,id_right
0,1,10,2023-12-01 00:01:38.966677,10
1,2,20,2023-12-01 00:02:50.506458,20


## Explictly supplying an output schema works

In [9]:
output_schema = ibis.schema({'id': 'int64', 'two_id': 'int64', 'one_time': "timestamp('UTC')", 'id_right': 'int64'})

table = conn.sql("""
WITH t0 AS (
  SELECT
    t4.id AS id,
    t4.time AS time,
    t4.two_id AS two_id,
    t4.junk AS junk
  FROM one AS t4
  WHERE
    t4.id IN (1, 2)
), t1 AS (
  SELECT
    t0.id AS id,
    t0.two_id AS two_id,
    t0.time AT TIME ZONE 'UTC' AS time
  FROM t0
), t3 AS (
  SELECT
    t1.id AS id,
    t1.two_id AS two_id,
    t1.time AT TIME ZONE 'UTC' AS one_time
  FROM t1
), t2 AS (
  SELECT
    t4.id AS id
  FROM two AS t4
)
SELECT
  CAST(t3.id AS INT) AS id,
  t3.two_id,
  t3.one_time AT TIME ZONE 'UTC' AS one_time,
  CAST(t2.id AS INT) AS id_right
FROM t3
LEFT OUTER JOIN t2
  ON t3.two_id = t2.id
  """,
                 schema=output_schema
        
)
table.execute()

Unnamed: 0,id,two_id,one_time,id_right
0,1,10,2023-12-01 00:01:38.966677+00:00,10
1,2,20,2023-12-01 00:02:50.506458+00:00,20
