In [0]:
%pip install /dbfs/FileStore/libs/common_stock_classes-0.1.0-py3-none-any.whl --force-reinstall


In [0]:
from common_stock_classes  import SCDType2Handler, GetStockData
import json
from pyspark.sql.functions import col, explode
from pyspark.sql.types import DateType, TimestampType 
from delta.tables import DeltaTable

In [0]:
api_key = dbutils.secrets.get(scope="kv-stock-market", key="twelvedata-apikey")
getstocks = GetStockData(api_key)

bronze_path = "abfss://bronze@andrewstockmarket.dfs.core.windows.net/"  # Bronze data stored in Parquet or Delta
silver_path = "abfss://silver@andrewstockmarket.dfs.core.windows.net/delta-tables/main"  # Target location for Silver Delta table

# stock_data = getstocks.get_historical_stock_data()
# print( json.dumps(stock_data, indent=4)) 


### Get Data from API

In [0]:


year_ranges = [
    {"start_date": "2019-01-01", "end_date": "2019-12-31"},
    {"start_date": "2020-01-01", "end_date": "2020-12-31"},
    {"start_date": "2021-01-01", "end_date": "2021-12-31"},
    {"start_date": "2022-01-01", "end_date": "2022-12-31"},
    {"start_date": "2023-01-01", "end_date": "2023-12-31"},
    {"start_date": "2024-01-01", "end_date": "2024-12-31"},
    {"start_date": "2025-01-01", "end_date": "2025-12-31"},
]

for year_range in year_ranges:


    symbols = [["VOO","TSLA", "TM", "F","AAPL"], ["MSFT", "NVDA", "JPM", "GS", "MS"] ] 

    for symbol_list in symbols:
        json_data  = getstocks.get_historical_stock_data(symbols= symbol_list, **year_range)

        df = spark.createDataFrame(json_data)  
        df = df.withColumn("symbol", col("meta").symbol).filter( col("status") == "ok" ).drop(df.status)

        df.cache()
        df.count()

        # display ( df )

        df.write \
            .format("parquet") \
            .mode("overwrite") \
            .partitionBy( "symbol"  ) \
            .save(bronze_path)

                
    ### Tranformt Bronze -> Silver
    df_bronze = spark.read.format("parquet").load(bronze_path).drop("symbol")
    df_bronze = df_bronze.withColumn( "values", explode("values") )
    df_bronze = df_bronze.select(
        col("meta").getItem("symbol").alias("Symbol"),
        col("meta").getItem("exchange").alias("ExchangeName"),
        col("meta").getItem("currency").alias("Currency"),
        col("meta").getItem("type").alias("Type"),
        col("meta").getItem("exchange_timezone").alias("ExchangeTimeZone"),
        col("values").getItem("volume").alias("Volume"),
        col("values").getItem("high").alias("High"),
        col("values").getItem("low").alias("Low"),
        col("values").getItem("close").alias("Close"),
        col("values").getItem("open").alias("Open"),
        col("values").getItem("datetime").alias("Date"))
    df_bronze = df_bronze.dropDuplicates()
    parameters = {
            "businessColumns" : "Symbol,ExchangeName,Currency,Date",
            "typeIColumns" : "",
            "tableType" : "Stage"
            }
    scd2Handler =  SCDType2Handler(parameters)
    scd2Handler.refresh_timestamp()
    add_audit_columns =  scd2Handler.add_audit_columns
    df_bronze = df_bronze.transform(add_audit_columns)
    deltaTable = DeltaTable.forPath(spark, silver_path)
    scd2Handler.delta_merge_typeII(deltaTable, df_bronze)





In [0]:
# %sql
# select * from stocks.silver.silver_table limit  10 

In [0]:
display(df_bronze)

### Bronze -> Silver

In [0]:

### Tranformt Bronze -> Silver

df_bronze = spark.read.format("parquet").load(bronze_path).drop("symbol")


df_bronze = df_bronze.withColumn( "values", explode("values") )

df_bronze = df_bronze.select(
    col("meta").getItem("symbol").alias("Symbol"),
    col("meta").getItem("exchange").alias("ExchangeName"),
    col("meta").getItem("currency").alias("Currency"),
    col("meta").getItem("type").alias("Type"),
    col("meta").getItem("exchange_timezone").alias("ExchangeTimeZone"),
    col("values").getItem("volume").alias("Volume"),
    col("values").getItem("high").alias("High"),
    col("values").getItem("low").alias("Low"),
    col("values").getItem("close").alias("Close"),
    col("values").getItem("open").alias("Open"),
    col("values").getItem("datetime").alias("Date"))

df_bronze = df_bronze.dropDuplicates()


parameters = {
        "businessColumns" : "Symbol,ExchangeName,Currency,Date",
        "typeIColumns" : "",
        "tableType" : "Stage"
        }

scd2Handler =  SCDType2Handler(parameters)
scd2Handler.refresh_timestamp()
add_audit_columns =  scd2Handler.add_audit_columns

df_bronze = df_bronze.transform(add_audit_columns)


deltaTable = DeltaTable.forPath(spark, silver_path)
scd2Handler.delta_merge_typeII(deltaTable, df_bronze)



In [0]:


# df_history = spark.sql(f" select count(*) from  delta.`{silver_path}`  ")
# df_history.show()
