In [0]:
import boto3
from botocore.exceptions import NoCredentialsError
import json
from pyspark.sql.types import *
from pyspark.sql.functions import col

# Step 1: Get AWS credentials from Databricks Secrets
aws_access_key = dbutils.secrets.get(scope="aws-secrets", key="aws-access-key")
aws_secret_key = dbutils.secrets.get(scope="aws-secrets", key="aws-secret-key")

# Step 2: Initialize boto3 client for S3
s3 = boto3.client(
    's3',
    aws_access_key_id=aws_access_key,
    aws_secret_access_key=aws_secret_key
)

bucket = 'databricks-practice-sk'
key = 'raw_data/contoh.json'

try:
    # Step 3: Download JSON from S3
    obj = s3.get_object(Bucket=bucket, Key=key)
    json_data = obj['Body'].read().decode('utf-8')
    data_dict = json.loads(json_data)
except NoCredentialsError:
    print("AWS credentials not found!")
    data_dict = None

if data_dict is not None:

    # Step 4: Define schema matching your JSON structure
    schema = StructType([
        StructField("time", StringType(), True),

        StructField("systems", StructType([
            StructField("equipment_id", StringType(), True),
            StructField("component", StructType([
                StructField("columns", StructType([
                    StructField("column_id", StringType(), True)
                ]), True)
            ]), True),
            StructField("im_id", StringType(), True)  # changed from im_equipment_id
        ]), True),

        StructField("users", StructType([
            StructField("my_submit", StructType([
                StructField("id", StringType(), True),
                StructField("datetime", StringType(), True)
            ]), True),
            StructField("my_review", StructType([
                StructField("id", StringType(), True),
                StructField("datetime", StringType(), True)
            ]), True),
            StructField("my_approval", StructType([
                StructField("id", StringType(), True),
                StructField("datetime", StringType(), True)
            ]), True),
            StructField("my_status", StructType([
                StructField("status", StringType(), True)
            ]), True)
        ]), True),

        StructField("methods", StructType([
            StructField("sop", StringType(), True),
            StructField("id", StringType(), True),
            StructField("meth_id", StringType(), True),   # updated
            StructField("temp_id", StringType(), True),   # updated
            StructField("report_template_version", StringType(), True),
            StructField("seq_id", StringType(), True),    # updated
            StructField("seq_version", StringType(), True) # updated
        ]), True),

        StructField("runs", ArrayType(StructType([
            StructField("in_num", StringType(), True),
            StructField("in_name", StringType(), True),
            StructField("pk", StringType(), True)
        ])), True),

        StructField("sst", ArrayType(StructType([
            StructField("fk", StringType(), True),
            StructField("sst_res", StructType([    # changed from sst_result
                StructField("number", StringType(), True),
                StructField("in_num", StringType(), True),
                StructField("in_name", StringType(), True),
                StructField("sst_name", StringType(), True),
                StructField("peak", StringType(), True),
                StructField("eval_result", StringType(), True),
                StructField("result", StringType(), True)
            ]), True)
        ])), True),

        StructField("results", ArrayType(StructType([
            StructField("fk", StringType(), True),
            StructField("result", StructType([
                StructField("samp_id", StringType(), True),   # updated
                StructField("as_id", StringType(), True),     # updated
                StructField("Type", StringType(), True),
                StructField("comp", StringType(), True),      # updated
                StructField("unit", StringType(), True),
                StructField("det_id", StringType(), True),    # updated
                StructField("result", StringType(), True),    # updated
                StructField("number_of_averaged_samples", StringType(), True)
            ]), True)
        ])), True)
    ])


    # Step 5: Create Spark DataFrame
    bronze_df = spark.createDataFrame([data_dict], schema=schema)

    # Step 6: Show schema and data for verification
    print("Schema:")
    bronze_df.printSchema()
    print("Data:")
    bronze_df.show(truncate=False)

    bronze_df.write.format("delta").mode("overwrite").saveAsTable("workspace.bronze_schema.bronze_table")






else:
    print("No data to process.")