In [6]:
from pyspark.sql import SparkSession
from pyspark.sql import Row
import os

# Initialize a SparkSession
spark = SparkSession.builder \
    .appName("DataFrameWriterSaveModesExample") \
    .getOrCreate()

# Sample data
data = [
    Row(name="Alice", age=25, country="USA"),
    Row(name="Bob", age=30, country="UK")
]

# Additional data for append mode
additional_data = [
    Row(name="Carlos", age=35, country="Spain"),
    Row(name="Daisy", age=40, country="Australia")
]

# Create DataFrames
df = spark.createDataFrame(data)
additional_df = spark.createDataFrame(additional_data)

# Define output path
output_path = "/home/itv010047/tmp"

In [7]:
# Function to list files in a directory
def list_files_in_directory(path):
    files = os.listdir(path)
    return files

# Show initial DataFrame
print("Initial DataFrame:")
df.show()

Initial DataFrame:
+-----+---+-------+
| name|age|country|
+-----+---+-------+
|Alice| 25|    USA|
|  Bob| 30|     UK|
+-----+---+-------+



In [None]:
# Write to CSV format using overwrite mode
df.write.csv(output_path, mode="overwrite", header=True)
print("Files after overwrite mode:", list_files_in_directory(output_path))

# Show additional DataFrame
print("Additional DataFrame:")
additional_df.show()

In [None]:
# Write to CSV format using append mode
additional_df.write.csv(output_path, mode="append", header=True)
print("Files after append mode:", list_files_in_directory(output_path))

In [None]:
# Write to CSV format using ignore mode
additional_df.write.csv(output_path, mode="ignore", header=True)
print("Files after ignore mode:", list_files_in_directory(output_path))

In [None]:
# Write to CSV format using errorIfExists mode
try:
    additional_df.write.csv(output_path, mode="errorIfExists", header=True)
except Exception as e:
    print("An error occurred in errorIfExists mode:", e)

In [None]:
# Stop the SparkSession
spark.stop()