# Spark with MinIO Example

This notebook demonstrates how to use Spark with MinIO object storage.

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

# Create Spark session with MinIO configuration
spark = SparkSession.builder \
    .appName("MinIO Example") \
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000") \
    .config("spark.hadoop.fs.s3a.access.key", "minioadmin") \
    .config("spark.hadoop.fs.s3a.secret.key", "minioadmin") \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false") \
    .getOrCreate()

print("Spark session created!")
print(f"Spark version: {spark.version}")

In [None]:
# Create sample data
data = [(1, "Alice", 25), (2, "Bob", 30), (3, "Charlie", 35)]
columns = ["id", "name", "age"]

df = spark.createDataFrame(data, columns)
df.show()

# Save to MinIO
df.write.mode("overwrite").parquet("s3a://data-lake/sample-data/")
print("Data saved to MinIO!")

In [None]:
# Read from MinIO
df_read = spark.read.parquet("s3a://data-lake/sample-data/")
df_read.show()
print("Data read from MinIO!")