In [1]:
import os
from pyspark.sql import SparkSession

# --------------------------
# ✅ Environment Setup
# --------------------------
os.environ["HADOOP_HOME"] = "C:\\hadoop"
os.environ["PATH"] += ";C:\\hadoop\\bin"

# Make sure Spark uses the right Python
os.environ["PYSPARK_PYTHON"] = r"C:\Users\subba\anaconda3\envs\bigdataenv\python.exe"
os.environ["PYSPARK_DRIVER_PYTHON"] = r"C:\Users\subba\anaconda3\envs\bigdataenv\python.exe"

# --------------------------
# ✅ Start Spark Session with Native IO disabled
# --------------------------
spark = SparkSession.builder \
    .appName("WriteCSVExample") \
    .config("spark.driver.memory", "2g") \
    .config("spark.python.worker.reuse", "false") \
    .config("spark.hadoop.hadoop.security.group.mapping", "false") \
    .config("spark.hadoop.io.nativeio.enabled", "false") \
    .getOrCreate()

# --------------------------
# ✅ Create Sample DataFrame
# --------------------------
data = [
    (1, "Alice", 23),
    (2, "Bob", 30),
    (3, "Charlie", 28),
    (4, "David", 35)
]
columns = ["id", "name", "age"]

df = spark.createDataFrame(data, columns)

print("✅ Original DataFrame:")
df.show()

# --------------------------
# ✅ Write DataFrame to CSV
# --------------------------
output_path = "lab2_output/task4_result"

df.write \
  .option("header", "true") \
  .mode("overwrite") \
  .csv(output_path)

print(f"✅ DataFrame written successfully to folder: {output_path}")

# --------------------------
# ✅ Read Back CSV to confirm
# --------------------------
df2 = spark.read.csv(output_path, header=True, inferSchema=True)
print("✅ DataFrame read back from CSV:")
df2.show()

# --------------------------
# ✅ Stop Spark
# --------------------------
spark.stop()


✅ Original DataFrame:
+---+-------+---+
| id|   name|age|
+---+-------+---+
|  1|  Alice| 23|
|  2|    Bob| 30|
|  3|Charlie| 28|
|  4|  David| 35|
+---+-------+---+

✅ DataFrame written successfully to folder: lab2_output/task4_result
✅ DataFrame read back from CSV:
+---+-------+---+
| id|   name|age|
+---+-------+---+
|  3|Charlie| 28|
|  1|  Alice| 23|
|  4|  David| 35|
|  2|    Bob| 30|
+---+-------+---+

