In [0]:
dbutils.widgets.text("sql_query", "")
dbutils.widgets.text("database", "")
sql_query = dbutils.widgets.get("sql_query")
database = dbutils.widgets.get("database")

if not database:
  database = "ifrs17_cur"

spark.sql(f"USE {database}")

if not sql_query:
  sql_query = "SELECT * FROM data_ifrs17 LIMIT 10"

In [0]:
df = spark.sql(sql_query)

In [0]:
import hashlib
import time

timestamp = str(int(time.time() * 10000))
short_hash = hashlib.md5(timestamp.encode()).hexdigest()[:6]
base_path = f"dbfs:/tmp/api_responses/result_{short_hash}"

In [0]:
# Set the chunk size (number of records per file)
chunk_size = 1000

# Split the DataFrame into chunks of 1000 records
total_rows = df.count()
num_chunks = (total_rows // chunk_size) + (1 if total_rows % chunk_size > 0 else 0)

file_paths = []
for i in range(num_chunks):
    start = i * chunk_size
    end = start + chunk_size
    chunk_df = df.limit(end).subtract(df.limit(start))  # Get the chunk

    # Write the chunk to a CSV file
    chunk_path = f"{base_path}_part_{i+1}.csv"
    chunk_df.coalesce(1).write.mode("overwrite").option("header", "true").csv(chunk_path)

    # Retrieve the actual CSV file from the directory (DBFS stores as directory + part file)
    files = dbutils.fs.ls(chunk_path)
    csv_file = [file.path for file in files if file.path.endswith(".csv")][0]
    
    file_paths.append((csv_file, chunk_df.count()))  # Store the file path and record count

In [None]:
# Create a summary CSV file with file paths and record counts
summary_path = f"{base_path}_summary.csv"
summary_data = [(path, count) for path, count in file_paths]
summary_df = spark.createDataFrame(summary_data, ["file_path", "record_count"])

# Write the summary CSV file
summary_df.coalesce(1).write.mode("overwrite").option("header", "true").csv(summary_path)

# Retrieve the summary file path
summary_file = [file.path for file in dbutils.fs.ls(summary_path) if file.path.endswith(".csv")][0]

# Exit the notebook and return the summary file path
dbutils.notebook.exit(summary_file)