In [2]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
        .appName("MinIO Connection Test") \
        .getOrCreate()

print("Spark Session created successfully!")

# -------------------------------------------------------------------------

data = [
    ("2024-01-01", "M-Class", 0.55),
    ("2024-01-02", "C-Class", 0.12),
    ("2024-01-03", "X-Class", 0.99)
]
columns = ["date", "flare_type", "intensity"]

df = spark.createDataFrame(data, columns)
print("\nLocal DataFrame Created!")
df.show()

# -------------------------------------------------------------------------

s3_path = "s3a://raw-data/test-connection/"
print(f"Attempting to write to {s3_path} ...")
try:
    df.write.mode("overwrite").parquet(s3_path)
    print("Write Successful! Data is in MinIO.")
except Exception as e:
    print(f"Write Failed! Error: {e}")

# -------------------------------------------------------------------------

print(f"\nAttempting to read back from {s3_path} ...")
try:
    df_read = spark.read.parquet(s3_path)
    print("Read Successful! Here is the data from MinIO:")
    df_read.show()
    count = df_read.count()
    print(f"Total rows read: {count}")
    if count == 3:
        print("GREAT SUCCESS! Your Data Engineering Pipeline is READY.")
    else:
        print("Warning: Row count doesn't match.")
except Exception as e:
    print(f"Read Failed! Error: {e}")


Spark Session created successfully!

Local DataFrame Created!
+----------+----------+---------+
|      date|flare_type|intensity|
+----------+----------+---------+
|2024-01-01|   M-Class|     0.55|
|2024-01-02|   C-Class|     0.12|
|2024-01-03|   X-Class|     0.99|
+----------+----------+---------+

Attempting to write to s3a://raw-data/test-connection/ ...
Write Successful! Data is in MinIO.

Attempting to read back from s3a://raw-data/test-connection/ ...
Read Successful! Here is the data from MinIO:
+----------+----------+---------+
|      date|flare_type|intensity|
+----------+----------+---------+
|2024-01-01|   M-Class|     0.55|
|2024-01-02|   C-Class|     0.12|
|2024-01-03|   X-Class|     0.99|
+----------+----------+---------+

Total rows read: 3
GREAT SUCCESS! Your Data Engineering Pipeline is READY.
