In [0]:
# Read the Delta table into a Spark DataFrame
catalog_name = "sam"
schema_name = "oecd"
ia_table_name = "IA_ita2020_matrix_f3a4c0c9"

df_matrix = spark.table(f"{catalog_name}.{schema_name}.{ia_table_name}")
display(df_matrix)

Sector,Impact
0,-1.5881140895228818
1,-1.0135630128423876
2,-1.2966814601026833
3,-4.858178242064764
4,1.9933717800950304
5,2.1889784487949187
6,0.2928543286912758
7,-0.1857329330846195
8,3.5328297959107893
9,-0.2229547544511997


In [0]:
print(df_matrix)

DataFrame[Sector: bigint, Impact: double]


In [0]:
import os
# To store the table as a single CSV file it must first be repartioned into a temporary file.
#
# The use of a temporary directory is because of how Spark handles file output. When Spark writes out files, even if you specify a
# single partition, it writes the output to a directory, creating part files within that directory. 
# To ensure to get a single CSV file these parts need to be handled and moved them as needed.
#

# Define the path variables
catalog_name = "sam"
schema_name = "oecd"
volume_name = "ia_outputs"  # this is a managed volume in the unity catalog
table_name = "ita2020_matrix_f3a4c0c9.csv"

# Final path in the catalog where the single CSV file should be stored
final_path = f"/Volumes/{catalog_name}/{schema_name}/{volume_name}/csv/{table_name}"

# Temporary path for storing the repartitioned DataFrame in dbfs
temp_path = f"/dbfs/Volumes/{catalog_name}/{schema_name}/{volume_name}/csv/{table_name}"

# Repartition the DataFrame to have only one partition
df_repartitioned = df_matrix.repartition(1)

# Write the repartitioned DataFrame to the temporary path
df_repartitioned.write.option("header", "true").csv(temp_path)

# Move the single CSV file to the final path
# List the files in the temporary directory
files = dbutils.fs.ls(temp_path)

# Find the CSV file in the temporary directory
csv_file = [file.path for file in files if file.path.endswith(".csv")][0]

# Move the file from the temporary directory to the final path
dbutils.fs.mv(csv_file, final_path)

# Clean up the temporary directory
dbutils.fs.rm(temp_path, True)

print(f"DataFrame written to single CSV file at {final_path}")


DataFrame written to single CSV file at /Volumes/sam/oecd/ia_outputs/csv/ita2020_matrix_f3a4c0c9.csv


In [0]:
# This code works but it splits the CSV in multiple files inside a directory

# Define the path to store the CSV file in the Unity Catalog volume
catalog_name = "sam"
schema_name = "oecd"
volume_name = "outputs"
path = f"/mnt/{catalog_name}/{schema_name}/{volume_name}/IA_ita2020_matrix_f3a4c0c9.csv"

# Write the DataFrame to CSV format
df_matrix.write.option("header", "true").csv(path)

print(f"DataFrame stored in CSV format at {path}")


DataFrame stored in CSV format at /mnt/sam/oecd/outputs/IA_ita2020_matrix_f3a4c0c9.csv


In [0]:
# This code works but it splits the CSV in multiple files inside a directory

df_matrix.write.format("csv").mode("overwrite").option("header", "true").save("sam.oecd.outputs/IA_ita2020_matrix_f3a4c0c9.csv")