In [None]:
import os
from pyspark.sql import SparkSession

# Set environment variables
os.environ['SPARK_VERSION'] = '3.1'

# Paths to JAR files
iceberg_jar_path = "/opt/glue/jars/iceberg-spark3-runtime-0.12.0.jar"
hadoop_aws_jar_path = "/opt/glue/jars/hadoop-aws-3.2.0.jar"
aws_sdk_jar_path = "/opt/glue/jars/aws-java-sdk-bundle-1.11.375.jar"
pydeequ_jar_path = "/opt/glue/jars/deequ-glue-1.0-SNAPSHOT-jar-with-dependencies.jar"

# Initialize Spark session with Iceberg, S3, and PyDeequ configurations
spark = SparkSession.builder \
    .config("spark.jars", ",".join([iceberg_jar_path, hadoop_aws_jar_path, aws_sdk_jar_path, pydeequ_jar_path])) \
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog") \
    .config("spark.sql.catalog.spark_catalog.type", "hive") \
    .config("spark.sql.catalog.master_catalog", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.master_catalog.type", "hadoop") \
    .config("spark.sql.catalog.master_catalog.warehouse", "s3://ecommerce-data-lake-us-east-1-dev/04_master/") \
    .config("spark.sql.catalog.curated_catalog", "org.apache.iceberg.spark.SparkCatalog") \
    .config("spark.sql.catalog.curated_catalog.type", "hadoop") \
    .config("spark.sql.catalog.curated_catalog.warehouse", "s3://ecommerce-data-lake-us-east-1-dev/06_curated/") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.aws.credentials.provider", "com.amazonaws.auth.DefaultAWSCredentialsProviderChain") \
    .getOrCreate()

print("Spark session initialized successfully")
