In [0]:
from databricks.feature_store import FeatureStoreClient
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType, TimestampType
from datetime import datetime

# Initialize Feature Store client
fs = FeatureStoreClient()
spark = SparkSession.builder.getOrCreate()

# Table 1: Text features
text_schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("user_id", StringType(), False),
    StructField("datetime", TimestampType(), False),
    StructField("text", StringType(), False)
])
text_data = [
    (1, "userA", datetime(2026, 2, 22, 10, 0, 0), "I feel anxious and overwhelmed."),
    (2, "userB", datetime(2026, 2, 22, 11, 0, 0), "I'm doing well today."),
    (3, "userC", datetime(2026, 2, 22, 12, 0, 0), "Sometimes I feel sad."),
    # Tier 0: Stable / Positive
    (4, "Eric", datetime(2026, 2, 22, 10, 0), "I had a really productive morning at Amazon today. Feeling steady and balanced."),
    (4, "Eric", datetime(2026, 2, 22, 11, 0), "The weather in Atlanta is beautiful. Just finished a short walk and I feel refreshed."),
    
    # Tier 1: Moderate Distress (Anxiety/Stress/Depression)
    (4, "Eric", datetime(2026, 2, 22, 12, 0), "The project deadline is looming and I can feel my heart racing. It's hard to focus on the code."),
    (4, "Eric", datetime(2026, 2, 22, 13, 0), "I feel quite lonely today. Even with people around, I just feel disconnected and a bit empty."),
    (4, "Eric", datetime(2026, 2, 22, 14, 0), "My chest feels tight and I can't seem to stop worrying about things I can't control."),
    
    # Tier 2: Critical / High Risk (Safety Override Triggers)
    (4, "Eric", datetime(2026, 2, 22, 15, 0), "Everything feels far too heavy to carry. I'm starting to wonder if people would just be better off without me."),
    (4, "Eric", datetime(2026, 2, 22, 16, 0), "I feel completely hopeless. There's no way out of this pain and I just want it to end."),
    
    # Contextual / Nuanced (Testing "Masked" Stress)
    (4, "Eric", datetime(2026, 2, 22, 17, 0), "I'm smiling and doing my work, but internally I'm just dragging myself through every minute."),
    (4, "Eric", datetime(2026, 2, 22, 18, 0), "Tried to do some box breathing like Aura suggested, but my mind is still spiraling about the master's thesis.")
]

text_df = spark.createDataFrame(text_data, text_schema)
fs.create_table(
    name="message_features",
    primary_keys=["id"],
    df=text_df,
    description="Text features with id, user_id, datetime, and text."
)

# Table 2: Biomarker features (with datetime)
biomarker_columns = [
    "DHRb.cvc", "NHRd.0204.sde", "NHR.0204.cv", "NHR.0406.sd", "NHR.0406.cv", "NHR.0002.sd", "NHR.0002.cv",
    "ISf.stg.wdh", "IS.hri.wd", "ACj.st.60mk", "AC.st.15m", "AC.st.30m", "AC.st.60m.wd", "AC.st.15m.wd",
    "AC.st.30m.wd", "AC.hr.60m.wd", "AC.hr.30m.wd", "ICVl.st.wd", "ICV.hr", "ICV.hr.wd", "peaks.st",
    "peaks.st.wd", "acrom.st", "F.st.wd", "beta.hr", "acro.hr", "F.hr", "beta.hr.wd", "acro.hr.wd",
    "F.hr.wd", "sleep.offset", "sleep.midpoint", "sleep.offset.wd", "sleep.offset.wd.sd", "sleep.midpoint.wd",
    "sleep.midpoint.wd.sd"
]
biomarker_schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("user_id", StringType(), False),
    StructField("datetime", TimestampType(), False)
] + [StructField(col, FloatType(), False) for col in biomarker_columns])
biomarker_data = [
    tuple([1, "userA", datetime(2026, 2, 22, 10, 0, 0)] + [0.5]*len(biomarker_columns)),
    tuple([2, "userB", datetime(2026, 2, 22, 11, 0, 0)] + [0.6]*len(biomarker_columns)),
    tuple([3, "userC", datetime(2026, 2, 22, 12, 0, 0)] + [0.7]*len(biomarker_columns))
]
biomarker_df = spark.createDataFrame(biomarker_data, biomarker_schema)
fs.create_table(
    name="biomarker_features",
    primary_keys=["id"],
    df=biomarker_df,
    description="Biomarker features with id, user_id, datetime, and wearable columns."
)

print("Feature tables 'message_features' and 'biomarker_features' (with datetime) created and registered.")

Feature tables 'message_features' and 'biomarker_features' (with datetime) created and registered.


In [0]:
# Read the table as a Spark DataFrame
synthetic_df = spark.table("workspace.database.synthetic_wearable_phq_full")
synthetic_df.show(5)
synthetic_df.printSchema()

+------------------+-------------------+-------------------+-------------------+--------------------+-------------------+-------------------+------------------+-------------------+------------------+------------------+--------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+--------------------+-------------------+-------------------+------------------+------------------+--------------------+-------------------+------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+-------------------+------------------+-------------------+--------------------+
|         phq_score|           DHRb.cvc|      NHRd.0204.sde|        NHR.0204.cv|         NHR.0406.sd|        NHR.0406.cv|        NHR.0002.sd|       NHR.0002.cv|        ISf.stg.wdh|         IS.hri.wd|       ACj.st.60mk|           AC.st.15m|          AC.st.30m|  