In [0]:
from pyspark.sql.functions import col, current_timestamp, rand

from datetime import datetime, timedelta

# Parameters
env = dbutils.widgets.get("env")
database = dbutils.widgets.get("database")
schema = dbutils.widgets.get("schema")
target_date = dbutils.widgets.get("target_date")

# Read sensor configuration
sensor_df = spark.table(f"`{env}_{database}`.`{schema}`.`sensors`")

# Generate 15 min intervals for the given date
start_time = datetime.strptime(target_date, "%Y-%m-%d")
end_time = start_time + timedelta(days=365)
intervals = [start_time + timedelta(minutes=15 * i) for i in range(0, 35040)]
intervals_df = spark.createDataFrame([(ts,) for ts in intervals], ["reading_time"])

# Cross join sensors with intervals
samples_df = sensor_df.crossJoin(intervals_df)

samples_df = samples_df \
    .withColumn( "reading", col("valid_min") + (col("valid_max") - col("valid_min")) * rand()) \
    .withColumn("reading_string",col("reading")) \
    .withColumn("system_timestamp", current_timestamp())

# Select and rename columns for iot_sensor_data
final_df = samples_df.select(
    col("home_id"),
    col("device_id"),
    col("sensor_id"),
    col("reading"),
    col("reading_string"),
    col("unit"),
    col("reading_time"),
    col("system_timestamp")
)
# Insert into iot_sensor_data table
final_df.write.format("csv").mode("append").save("/Volumes/disk/data/home/landing_zone/sensor_data/")