In [None]:
from pyspark.sql import SparkSession

def create_spark_session() -> SparkSession:
    spark = (
        SparkSession.builder
        .appName("Dual Storage Integration Delta")
        .enableHiveSupport()
        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
        .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
        .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
        .config("spark.hadoop.fs.s3minio.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
        .getOrCreate()
    )
    return spark

spark = create_spark_session()

s3_options = {
    "fs.s3a.access.key": "",
    "fs.s3a.secret.key": "",
    "fs.s3a.endpoint": "s3.us-east-2.amazonaws.com",
    "fs.s3a.region": "us-east-2",
    "fs.s3a.path.style.access": "false"
}

minio_delta_options = {
    "fs.s3a.access.key": "admin",
    "fs.s3a.secret.key": "senhasegura",
    "fs.s3a.endpoint": "http://minio:9000",
    "fs.s3a.path.style.access": "true",
    "fs.s3a.connection.ssl.enabled": "false"
}

df = (
    spark
    .read
    .format("parquet")
    .options(**s3_options)
    .load("s3a://dev-lab-02-us-east-2-landing/soccer/league/")
)

df.show()


(
    df
    .write
    .format("delta")
    .options(**minio_delta_options)
    .mode("overwrite")
    .save("s3a://bucket-bronze-zone/soccer/league/")
)

print("Tabela Delta gravada com sucesso no MinIO!")

spark.stop()

+--------------------+---------------------+--------------+----------------------+-----+--------------------+----------+
|     _airbyte_raw_id|_airbyte_extracted_at| _airbyte_meta|_airbyte_generation_id|   id|                name|country_id|
+--------------------+---------------------+--------------+----------------------+-----+--------------------+----------+
|9202d4ef-8836-419...| 2025-05-08 03:17:...|{35826647, []}|                     3|    1|Belgium Jupiler L...|         1|
|e3a53785-5171-442...| 2025-05-08 03:17:...|{35826647, []}|                     3| 1729|England Premier L...|      1729|
|5cd88425-2ebf-40f...| 2025-05-08 03:17:...|{35826647, []}|                     3| 4769|      France Ligue 1|      4769|
|c23131b2-e6ab-4a8...| 2025-05-08 03:17:...|{35826647, []}|                     3| 7809|Germany 1. Bundes...|      7809|
|01dd48c8-f68b-490...| 2025-05-08 03:17:...|{35826647, []}|                     3|10257|       Italy Serie A|     10257|
|0b766c8f-d617-4d4...| 2025-05-0