In [0]:

from pyspark.sql import SparkSession

# Load AWS credentials from helper module
import sys
# путь к папке utilities (проверить в твоём Workspace!)
sys.path.append('/Workspace/Users/your_user/Utilities/aws_keys.py')

from aws_keys import AWS_ACCESS_KEY, AWS_SECRET_KEY ,s3_path

# Spark configs for S3 access
spark = SparkSession.builder \
    .appName("VC_Lakehouse_Pipeline_Bronze") \
    .getOrCreate()

spark._jsc.hadoopConfiguration().set("fs.s3a.access.key", AWS_ACCESS_KEY)
spark._jsc.hadoopConfiguration().set("fs.s3a.secret.key", AWS_SECRET_KEY)
spark._jsc.hadoopConfiguration().set("fs.s3a.endpoint", "s3.eu-north-1.amazonaws.com")
spark._jsc.hadoopConfiguration().set("fs.s3a.aws.credentials.provider",
                                     "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")


# Read CSV into Bronze DataFrame
df_bronze = spark.read.format("csv") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load(s3_path)

print("Bronze data loaded from S3")
df_bronze.show(5)

# Target Delta table path (Databricks storage)
bronze_table_path = "/mnt/bronze/investments_VC"

# Write into Delta table
df_bronze.write.format("delta") \
    .mode("overwrite") \
    .save(bronze_table_path)

spark.sql("CREATE TABLE IF NOT EXISTS bronze_investments_VC USING DELTA LOCATION '/mnt/bronze/investments_VC'")

print("Bronze Delta table saved: bronze_investments_VC")