# Includes

In [None]:
# SQL
import sqlalchemy as sa
from sqlalchemy.engine import URL
from sqlalchemy import create_engine
import duckdb

# file system
import os

# data manipulation
import pandas as pd

# timing
from time import time

# utility
import yaml

# Variables

In [None]:
server = None
port = None
warehouse = None
shard0 = None
shard1 = None
items = None
username = None
password = None

target = 'dev'
profile = 'sitecore_warehouse'

start_path = os.path.expanduser("~\\.dbt\\")
with open(f"{start_path}profiles.yml", "r", encoding="utf-8") as f: 
    config = yaml.safe_load(f) 
    server = config[profile]["outputs"][target]["server"]
    port = config[profile]["outputs"][target]["port"]
    warehouse = config[profile]["outputs"][target]["database"]
    shard0 = config[profile]["outputs"][target]["database_shard0"]
    shard1 = config[profile]["outputs"][target]["database_shard1"]
    items = config[profile]["outputs"][target]["database_items"]
    username = config[profile]["outputs"][target]["user"]
    password = config[profile]["outputs"][target]["password"]

# Connection

In [None]:
connection_string = f'''Driver={{SQL Server}};
                        Server={server};
                        Port={port};
                        Database={warehouse};
                        uid={username};
                        pwd={password};'''
connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string}, fast_executemany=True)
connection_engine = create_engine(connection_url)

# Functions

In [None]:
def import_data(table_list):
    for table in table_list:
        filepath = f'parquet/{table}.parquet'
        df = pd.read_parquet(filepath, engine='fastparquet')
        df.to_sql(name=table, con=connection_engine, if_exists='append', chunksize=1000, index=False)

In [None]:
# cursor = connection_engine.cursor()
# from pandas import read_parquet
# df = read_parquet('<file-path>', engine='fastparquet')
# df.fillna(value='', inplace=True)
# for index, row in df.iterrows():
#      sql = "INSERT INTO " +  + "([" + cols + "]) VALUES (" + "?," * (len(row) - 1) + "?)"
#      cursor.execute(sql, tuple(row))
#      connection_engine.commit()

# Export to Parquet

In [None]:
table_list = [
    'cdp_guests', 
    'cdp_sessions',
    'cdp_events'
]

import_data(table_list)