In [1]:
import tyr

# Required to get relative path of test datasets
import os

from pprint import pprint

In [2]:
# Load schema from pkl
source = tyr.lineage.schema.core.load_schema_from_pkl(
    os.path.abspath(os.path.join(os.getcwd(), "..", "tests/saved_schema/source.pkl"))
)
staging = tyr.lineage.schema.core.load_schema_from_pkl(
    os.path.abspath(os.path.join(os.getcwd(), "..", "tests/saved_schema/staging.pkl"))
)

In [3]:
# Defining connections
conn = tyr.database.connections.Connection(
    name="test",
    syntax="duckdb",
    database=os.path.abspath(os.path.join(os.getcwd(), "..", "tests/test.duckdb")),
    read_only=False,
)

In [4]:
# Before creating schema we need to execute the settings code to ensure the tables can be built
print(source.settings.sql)
conn.execute(source.settings.sql)

print("\n")

print(staging.settings.sql)
conn.execute(staging.settings.sql)

INSTALL 'spatial';

LOAD 'spatial'


INSTALL 'spatial';

LOAD 'spatial'


<tyr.database.connections.Response at 0x7fc267c94610>

In [5]:
# Create the schema. Build upstream schema first
print("SOURCE")
print("________________________________________________________________________")
tyr.database.core.create_tables(source, conn=conn)

print("\n")
print("STAGING")
print("________________________________________________________________________")
tyr.database.core.create_tables(staging, conn=conn)

SOURCE
________________________________________________________________________
Retrieving build order...
Iterating through build order...
car_location
car_telemetry
circuits
meetings
race_control
session_status
track_status
sessions
weather
results
['car_location', 'car_telemetry', 'circuits', 'meetings', 'race_control', 'session_status', 'track_status', 'sessions', 'weather', 'results']
DROP TABLE IF EXISTS source.car_location;

CREATE TABLE source.car_location AS
SELECT DISTINCT *
FROM read_csv_auto('/home/miles/tyr/tests/datasets/car_location_session_*.tsv', delim='\t', header=True, union_by_name=True)
DROP TABLE IF EXISTS source.car_telemetry;

CREATE TABLE source.car_telemetry AS
SELECT DISTINCT *
FROM read_csv_auto('/home/miles/tyr/tests/datasets/car_telemetry_session_*.tsv', delim='\t', header=True, union_by_name=True)
DROP TABLE IF EXISTS source.circuits;

CREATE TABLE source.circuits AS
SELECT DISTINCT *
FROM st_read('/home/miles/tyr/tests/datasets/f1_circuits.geojson')
DROP 

In [6]:
# Check that tables exist in the table catalogue

print("TABLES:")
display(conn.tables())

print("\n")
print("COLUMNS:")
display(conn.columns())

TABLES:


Unnamed: 0,table_catalog,table_schema,table_name,table_type,self_referencing_column_name,reference_generation,user_defined_type_catalog,user_defined_type_schema,user_defined_type_name,is_insertable_into,is_typed,commit_action,TABLE_COMMENT
0,test,singapore,car_location,BASE TABLE,,,,,,YES,NO,,
1,test,singapore,car_telemetry,BASE TABLE,,,,,,YES,NO,,
2,test,singapore,circuits,BASE TABLE,,,,,,YES,NO,,
3,test,singapore,meetings,BASE TABLE,,,,,,YES,NO,,
4,test,singapore,race_control,BASE TABLE,,,,,,YES,NO,,
5,test,singapore,results,BASE TABLE,,,,,,YES,NO,,
6,test,singapore,sessions,BASE TABLE,,,,,,YES,NO,,
7,test,singapore,session_status,BASE TABLE,,,,,,YES,NO,,
8,test,singapore,track_status,BASE TABLE,,,,,,YES,NO,,
9,test,singapore,weather,BASE TABLE,,,,,,YES,NO,,




COLUMNS:


Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,...,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable,COLUMN_COMMENT
0,test,singapore,car_location,driver_number,1,,YES,INTEGER,,,...,,,,,,,,,,
1,test,singapore,car_location,event_ts,2,,YES,TIMESTAMP,,,...,,,,,,,,,,
2,test,singapore,car_location,status,3,,YES,VARCHAR,,,...,,,,,,,,,,
3,test,singapore,car_location,x,4,,YES,FLOAT,,,...,,,,,,,,,,
4,test,singapore,car_location,y,5,,YES,FLOAT,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,test,staging,weather,pressure,4,,YES,"DECIMAL(15,0)",,,...,,,,,,,,,,
266,test,staging,weather,rainfall,5,,YES,"DECIMAL(15,0)",,,...,,,,,,,,,,
267,test,staging,weather,track_temp,6,,YES,"DECIMAL(15,1)",,,...,,,,,,,,,,
268,test,staging,weather,wind_direction,7,,YES,"DECIMAL(15,0)",,,...,,,,,,,,,,


In [7]:
# Now that the tables are built, we can query them with the connection
conn.execute(staging.tables.results.sql).df()

Unnamed: 0,session_key,driver_number,broadcast_name,abbreviation,driver_id,team_name,team_colour,team_id,first_name,last_name,...,position,classified_position,grid_position,qualifying_1,qualifying_2,qualifying_3,classified_time,session_status,points,laps_completed
0,9165,31,E OCON,OCO,ocon,Alpine,2293D1,alpine,Esteban,Ocon,...,18.0,,8.0,NaT,NaT,NaT,NaT,Retired,0,42
1,9165,14,F ALONSO,ALO,alonso,Aston Martin,358C75,aston_martin,Fernando,Alonso,...,15.0,15.0,7.0,NaT,NaT,NaT,0 days 00:01:27.603000,Finished,0,62
2,9165,1,M VERSTAPPEN,VER,max_verstappen,Red Bull Racing,3671C6,red_bull,Max,Verstappen,...,5.0,5.0,11.0,NaT,NaT,NaT,0 days 00:00:21.441000,Finished,10,62
3,9165,2,L SARGEANT,SAR,sargeant,Williams,37BEDD,williams,Logan,Sargeant,...,14.0,14.0,18.0,NaT,NaT,NaT,0 days 00:01:26.889000,Finished,0,62
4,9165,22,Y TSUNODA,TSU,tsunoda,AlphaTauri,5E8FAA,alphatauri,Yuki,Tsunoda,...,19.0,,15.0,NaT,NaT,NaT,NaT,Retired,0,0
5,9165,16,C LECLERC,LEC,leclerc,Ferrari,F91536,ferrari,Charles,Leclerc,...,4.0,4.0,3.0,NaT,NaT,NaT,0 days 00:00:21.177000,Finished,12,62
6,9165,4,L NORRIS,NOR,norris,McLaren,F58020,mclaren,Lando,Norris,...,2.0,2.0,4.0,NaT,NaT,NaT,0 days 00:00:00.812000,Finished,18,62
7,9165,24,G ZHOU,ZHO,zhou,Alfa Romeo,C92D4B,alfa,Guanyu,Zhou,...,12.0,12.0,19.0,NaT,NaT,NaT,0 days 00:01:23.649000,Finished,0,62
8,9165,55,C SAINZ,SAI,sainz,Ferrari,F91536,ferrari,Carlos,Sainz,...,1.0,1.0,1.0,NaT,NaT,NaT,0 days 01:46:37.418000,Finished,25,62
9,9165,44,L HAMILTON,HAM,hamilton,Mercedes,6CD3BF,mercedes,Lewis,Hamilton,...,3.0,3.0,5.0,NaT,NaT,NaT,0 days 00:00:01.269000,Finished,16,62


In [8]:
# Ensure that the connection is closed when finished
conn.close()