# Import Packages

In [0]:
import sys

sys.path.append("/Workspace/Shared/lib/")
import os
import logging
import time
import requests
from datetime import datetime
from pyspark.sql import SparkSession
from pyspark.sql.utils import AnalysisException
from params import get_env, get_catalog, get_schema, get_table, get_url, get_volume

# Logging

In [0]:
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(name)s [%(levelname)s] %(message)s",
    stream=sys.stdout,
    force=True,
)

# Params

In [0]:
env, catalog_suffix = get_env()
catalog = get_catalog()
schema = get_schema()
table = get_table()
url = get_url()
volume = get_volume()

print(f"env = {env}")
print(f"catalog_suffix = {catalog_suffix}")
print(f"catalog = {catalog}")
print(f"schema = {schema}")
print(f"table = {table}")
print(f"url = {url}")
print(f"volume = {volume}")

In [0]:
catalog = "featlib_dev"
schema = "components"

# Split Adjustment Factor

In [0]:
spark.sql(
    f"""
    CREATE OR REPLACE TABLE {catalog}.{schema}.split_adj AS
        -- Get the unique symbols from the split table
        WITH base_table AS (
            SELECT DISTINCT act_symbol FROM raw{catalog_suffix}.stocks.split
        ),
        -- Create a new table with a row for each symbol and a NULL split for date 2000-01-01
        new_split_table AS (
            SELECT 
                act_symbol,
                DATE '2000-01-01' as ex_date,
                NULL as to_factor,
                NULL as for_factor
            FROM base_table
            UNION ALL
            SELECT * FROM raw{catalog_suffix}.stocks.split
        ),
        -- Calculate the adjustment factor
        split_factors AS (
            -- Calculate adjustment factors and order the rows
            SELECT
                act_symbol,
                ex_date AS adj_date,
                try_divide(double(for_factor), double(to_factor)) AS adj_factor
            FROM new_split_table
        ),
        -- Order the factors and assign end dates
        ordered_factors AS (
            -- Assign end dates and ensure chronological order
            SELECT
                act_symbol,
                adj_date,
                CAST (LEAD(adj_date) OVER (PARTITION BY act_symbol ORDER BY adj_date ASC) - INTERVAL '1 DAY' AS DATE) AS end_adj_date,
                adj_factor,
                ROW_NUMBER() OVER (PARTITION BY act_symbol ORDER BY adj_date DESC) AS reverse_row_num
            FROM split_factors
        ),
        -- Calculate the cumulative adjustment factor
        cumulative_factors AS (
            -- Compute cumulative adjustment factors in reverse order
            SELECT
                act_symbol,
                adj_date,
                end_adj_date,
                adj_factor,
                -- Reverse chronological cumulative multiplication
                CASE 
                    WHEN end_adj_date IS NULL 
                        THEN 1
                    WHEN lead(end_adj_date) OVER (PARTITION BY act_symbol ORDER BY adj_date ASC) IS NULL
                        THEN lead(adj_factor) OVER (PARTITION BY act_symbol ORDER BY adj_date ASC)
                    --ELSE product(adj_factor) OVER (PARTITION BY act_symbol ORDER BY reverse_row_num ASC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
                    ELSE EXP(SUM(LN(adj_factor)) OVER (PARTITION BY act_symbol ORDER BY reverse_row_num ASC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING))
                END AS cum_adj_factor
            FROM ordered_factors
        )
        -- Final output
        SELECT
            act_symbol,
            adj_date,
            end_adj_date,
            adj_factor,
            cum_adj_factor
        FROM cumulative_factors
        ORDER BY adj_date ASC;
    """
)

# Pricing Inc. Splits

In [0]:
spark.sql(
    f"""
    -- 17457
    CREATE OR REPLACE TEMPORARY VIEW tmp_pricing01_inc_splits AS (
        with ohlcv_split as (
            select
                a.date,
                a.act_symbol,
                a.open * COALESCE(f.cum_adj_factor, 1) as open,
                a.high * COALESCE(f.cum_adj_factor, 1) as high,
                a.low * COALESCE(f.cum_adj_factor, 1) as low,
                a.close * COALESCE(f.cum_adj_factor, 1) as close,
                a.volume / COALESCE(f.cum_adj_factor, 1) as volume
            FROM raw{catalog_suffix}.stocks.ohlcv a
            LEFT JOIN featlib{catalog_suffix}.components.split_adj f 
                ON a.act_symbol=f.act_symbol 
                AND a.date >= f.adj_date 
                AND (a.date <= f.end_adj_date OR f.end_adj_date IS NULL)
        )
        select * from ohlcv_split
    )
    """
)

In [0]:
%sql
with cte as (select distinct act_symbol from tmp_pricing01_inc_splits) select count(*) from cte

# Remove Symbols With History Gaps
