In [0]:
# -------------------------------
# Step 1: Define AWS credentials using Databricks secrets
# -------------------------------
aws_access_key = dbutils.secrets.get(scope="aws-secrets", key="aws-access-key")
aws_secret_key = dbutils.secrets.get(scope="aws-secrets", key="aws-secret-key")
input_file_path = "s3a://databricks-practice-sk/raw_data/contoh.json"
output_path = "s3a://databricks-practice-sk/processed_data/output.delta"

# -------------------------------
# Step 2: Configure Spark for AWS S3
# -------------------------------
spark.conf.set("fs.s3a.access.key", aws_access_key)
spark.conf.set("fs.s3a.secret.key", aws_secret_key)
spark.conf.set("fs.s3a.endpoint", "s3.amazonaws.com")

# -------------------------------
# Step 3: Read JSON file from S3
# -------------------------------
bronze_df = spark.read.option("multiline", "true").json(input_file_path)

# Show schema and sample data
bronze_df.printSchema()
display(bronze_df)

# -------------------------------
# Step 4: Process the data (example: select key columns)
# -------------------------------
# For now, let's just pass the same DataFrame
silver_df = bronze_df

# -------------------------------
# Step 5: Write back to S3 as Delta table
# -------------------------------
silver_df.write.format("delta").mode("overwrite").save(output_path)

print(f"Data written successfully to {output_path}")