In [0]:
# # No need to reintall if was installed in previous notebook on same cluster
%pip install /dbfs/FileStore/libs/common_stock_classes-0.1.0-py3-none-any.whl --force-reinstall

In [0]:

from common_stock_classes  import  HelperMethods, SCDType2Handler
from pyspark.sql.functions import col, explode, monotonically_increasing_id, lit, cast , concat, expr, date_format, sha2, concat_ws, sequence
from pyspark.sql.types import DateType, TimestampType   # sha2,concat_ws,trim, lit
from delta.tables import DeltaTable
from datetime import  datetime, date, timedelta


In [0]:


silver_path = "abfss://silver@andrewstockmarket.dfs.core.windows.net/delta-tables/main"  # Target location for Silver Delta table
dim_symbol_path = "abfss://gold@andrewstockmarket.dfs.core.windows.net/delta-tables/dim-symbol"
fact_daily_path = "abfss://gold@andrewstockmarket.dfs.core.windows.net/delta-tables/fact-daily-summary"
dimDate_folder = "abfss://gold@andrewstockmarket.dfs.core.windows.net/delta-tables/dim-date"



In [0]:
## load data
df_silver_c = spark.read.format("delta").load(silver_path).filter("__lastmodified > current_date() - interval 10 days")

df_silver_c.cache()
df_silver_c.count()


## Dim Symbol

In [0]:


parameters = {
        "businessColumns" : "Symbol,ExchangeName,Currency",
        "typeIColumns" : "", 
        "tableType" : "Dim"
        }

scd2Handler =  SCDType2Handler(parameters)
df_dimSymbol =  df_silver_c.select("Symbol","ExchangeName","Currency", "Type", "ExchangeTimeZone").distinct()
scd2Handler.refresh_timestamp()
add_audit_columns =  scd2Handler.add_audit_columns


df_dimSymbol = df_dimSymbol.alias("d").join(spark.table("stocks.silver.symbol_lookup").alias("l") , on= "Symbol")
df_dimSymbol = df_dimSymbol.transform(add_audit_columns)



deltaTable = DeltaTable.forPath(spark, dim_symbol_path)
scd2Handler.delta_merge_typeII(deltaTable, df_dimSymbol)



## Fact Trading

In [0]:


df_dim_symbol_c = spark.table(f"delta.`{dim_symbol_path}` ")
df_dim_symbol_c.cache()
df_dim_symbol_c.count()


In [0]:


df_dim_symbol = df_dim_symbol_c.filter("__CurrentFlag = True")
df_fact = df_silver_c.filter("__CurrentFlag = True")
df_fact =  df_fact.withColumn("DateID", date_format(col("Date"), "yyyyMMdd").cast("int")).drop("Date")


# Add Hash Key for Dim Symbol 

businessColumns  = ["Symbol", "ExchangeName", "Currency"] 

df_fact = df_fact.withColumn("DimSymbolBusinessHash" , sha2( concat_ws("|", *businessColumns), 256))


df_fact = df_fact.alias("f").join(df_dim_symbol.alias("d"), on = expr("f.DimSymbolBusinessHash = d.__BusinessKeyHash"), how = "left" ) \
                .selectExpr("d.SymbolSID",
                            "f.Volume", 
                            "f.High",
                            "f.Low",
                            "f.Open",
                            "f.Close",
                            "f.DateID"
                )



parameters = {
        "businessColumns" : "SymbolSID,DateID",
        "typeIColumns" : "Volume,High,Low,Close,Open", 
        "tableType" : "Fact"
        }

scd2Handler =  SCDType2Handler(parameters)

scd2Handler.refresh_timestamp()
add_audit_columns =  scd2Handler.add_audit_columns
df_fact = df_fact.transform(add_audit_columns)

deltaTable = DeltaTable.forPath(spark, fact_daily_path)
scd2Handler.delta_merge_typeII(deltaTable, df_fact)




## Populate Dim Table if needed

In [0]:
from datetime import datetime, timedelta

hm = HelperMethods(spark=spark)
df_dimdate = spark.read.format("delta").load(dimDate_folder)


max_date_dimDate =  df_dimdate.selectExpr(" cast(max(date) as date) as max_date").head()[0]
min_date_silver =  df_silver_c.selectExpr(" cast(min(date) as date) as min_date").head()[0]
max_date_silver =  df_silver_c.selectExpr(" cast(max(date) as date) as max_date").head()[0]
end_date =  date( max_date_silver.year , 12, 31)

if not max_date_dimDate: 

    hm.update_DimDate_fromRange(dimDate_path = dimDate_folder, end_date = end_date )
 

## populate DimDate table if we dont have records with dates 
elif max_date_dimDate < max_date_silver:
    
    start_date = max_date_dimDate + timedelta(days=1)
    
    hm.update_DimDate_fromRange(dimDate_path = dimDate_folder, start_date = start_date, end_date = end_date )
    
    
