# Includes

In [None]:
# SQL
import pyodbc 
import sqlalchemy as sa
from sqlalchemy.engine import URL
from sqlalchemy import create_engine

# file system
import os

# timing
import time

# utility
import re
import yaml

# dbt
import sys 
import dbt
from dbt.cli.main import dbtRunner, dbtRunnerResult

# Variables

In [None]:
server = None
port = None
warehouse = None
shard0 = None
shard1 = None
items = None
username = None
password = None

target = 'dev'
profile = 'sitecore_warehouse'

start_path = os.path.expanduser("~\\.dbt\\")
with open(f"{start_path}profiles.yml", "r", encoding="utf-8") as f: 
    config = yaml.safe_load(f) 
    server = config[profile]["outputs"][target]["server"]
    port = config[profile]["outputs"][target]["port"]
    warehouse = config[profile]["outputs"][target]["database"]
    shard0 = config[profile]["outputs"][target]["database_shard0"]
    shard1 = config[profile]["outputs"][target]["database_shard1"]
    items = config[profile]["outputs"][target]["database_items"]
    username = config[profile]["outputs"][target]["user"]
    password = config[profile]["outputs"][target]["password"]

# Database Connection

In [None]:
connection_string = f'''Driver={{SQL Server}};
                        Server={server};
                        Port={port};
                        Database={warehouse};
                        uid={username};
                        pwd={password};'''
connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
connection_engine = create_engine(connection_url)

# Functions

In [None]:
def run_script(script):
    with connection_engine.connect() as connection:
        script_start = time.time()
        with open(f'on-prem/{script}', "r", encoding='utf-8') as file:
            statements = file.read().split("GO--")
            for stmt in statements:
                stmt = stmt.strip()
                if stmt:                    
                    stmt = stmt.replace('${warehouse}', warehouse)
                    stmt = stmt.replace('${items}', items)
                    stmt = stmt.replace('${shard0}', shard0)
                    stmt = stmt.replace('${shard1}', shard1)
                    connection.execute(sa.text(stmt))
                    connection.commit()
        script_end = time.time()
        print(f"Script {script} took {((script_end - script_start) / 60):.1f} minutes to complete.")

In [13]:
def run_dbt(args):

    # initialize
    dbt = dbtRunner()

    # run the command
    res: dbtRunnerResult = dbt.invoke(args)

    #for r in res.result:
    #    print(f"{r.node.name}: {r.status}")


# Extract and Load

In [None]:
script_arr = [
    # EXTRACTION 1 - XDB EXTRACT
    "ContactFacets-Classification.sql"
    ,"ContactFacets-ContactBehaviorProfile.sql"
    ,"ContactFacets-Emails.sql"
    ,"ContactFacets-EngagementMeasures.sql"
    ,"ContactFacets-Personal.sql"
    ,"ContactFacets-TestCombinations.sql"
    ,"InteractionFacets-IP.sql"
    ,"InteractionFacets-ProfileScores.sql"
    ,"InteractionFacets-UserAgentInfo.sql"
    ,"InteractionFacets-WebVisit.sql"
    ,"Interactions-Events.sql"
    # EXTRACTION 2 - VALUE UPDATES AND PERSONA/RULE JSON EXTRACT
    ,"Interactions-Events-ValueUpdates.sql"
    ,"Interactions-Events-EligibleRules.sql"
    ,"Interactions-Events-ExposedRules.sql"
    ,"Interactions-Events-Delta.sql"
    # EXTRACTION 3 - CONTENT LOOKUP
    ,"Items-Campaigns.sql"
    ,"Items-Goals.sql"
    ,"Items-Pages.sql"
    ,"Items-Profiles.sql"
]

In [None]:
start_time = time.time()

for script in script_arr:
    run_script(script)
    
end_time = time.time()
print(f"All scripts took {((end_time - start_time) / 60):.1f} minutes to complete.")