# Install Packages

In [0]:
!pip install ta

# Import Packages

In [0]:
import sys
sys.path.append("/Workspace/Shared/lib/")
import os
import logging
import time
import requests
from datetime import datetime
from pyspark.sql import SparkSession
from pyspark.sql.utils import AnalysisException
from params import get_env, get_catalog, get_schema, get_table
from ta import add_all_ta_features
from ta.utils import dropna

# Logging

In [0]:
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(name)s [%(levelname)s] %(message)s",
    stream=sys.stdout,
    force=True,
)

# Params

In [0]:
env, catalog_suffix = get_env()
catalog = get_catalog()
schema = get_schema()
table = get_table()

print(f"env = {env}")
print(f"catalog_suffix = {catalog_suffix}")
print(f"catalog = {catalog}")
print(f"schema = {schema}")
print(f"table = {table}")

# Single Example

In [0]:
df_nvda = spark.sql(f"select * FROM featlib{catalog_suffix}.components.pricing where act_symbol = 'NVDA' order by date")
df_nvda_pd = df_nvda.toPandas()

# Add ta features filling NaN values
df_nvda_pd_feat = add_all_ta_features(df_nvda_pd, open="open", high="high", low="low", close="close", volume="volume", fillna=False)\
    .drop(['open', 'high', 'low', 'close', 'volume'], axis=1)

df_nvda_pd_feat.display()

In [0]:
df_nvda_pd_feat_melted = df_nvda_pd_feat.melt(id_vars=["date", "act_symbol"], var_name="id", value_name="values")
display(df_nvda_pd_feat_melted)

In [0]:
df_nvda_pd_feat_spark = spark.createDataFrame(df_nvda_pd_feat)
display(df_nvda_pd_feat_spark)

In [0]:
from pyspark.sql.functions import expr

# Get a list of the columns you want to unpivot
# Exclude 'date' and 'act_symbol' as they are your ID columns
columns_to_unpivot = [col for col in df_nvda_pd_feat_spark.columns if col not in ["date", "act_symbol"]]

# Construct the 'stack' expression dynamically
# The format is 'number_of_columns, "col1_name", col1_value, "col2_name", col2_value, ...'
stack_expression = ", ".join([f"'{col}', {col}" for col in columns_to_unpivot])
num_columns = len(columns_to_unpivot)

# Apply the stack function
df_nvda_pd_feat_spark_pivoted = df_nvda_pd_feat_spark.selectExpr(
    "date",
    "act_symbol",
    f"stack({num_columns}, {stack_expression}) as (id, value)"
)

display(df_nvda_pd_feat_spark_pivoted)

# Create Features