In [0]:
files =[f.name for f in  dbutils.fs.ls('/Volumes/workspace/default/databrics_data/')]
print(files)

In [0]:
%pip install fastavro


In [0]:
import os
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.orc as orc
import xml.etree.ElementTree as ET

# Use Databricks Community-safe path
base_path = "/databricks/driver/databrics_data/"
os.makedirs(base_path, exist_ok=True)

# Sample data
data = {
    "id": [1, 2, 3],
    "name": ["Preeti", "Karthik", "Aarav"],
    "department": ["IT", "Finance", "HR"],
    "salary": [70000, 65000, 60000]
}

df = pd.DataFrame(data)

# CSV
df.to_csv(os.path.join(base_path, "employees.csv"), index=False)

# JSON
df.to_json(os.path.join(base_path, "employees.json"), orient="records", lines=True)

# Excel
df.to_excel(os.path.join(base_path, "employees.xlsx"), index=False)

# Parquet
pq.write_table(pa.Table.from_pandas(df), os.path.join(base_path, "employees.parquet"))

# ORC
orc.write_table(pa.Table.from_pandas(df), os.path.join(base_path, "employees.orc"))

# XML
root = ET.Element("employees")
for _, row in df.iterrows():
    emp = ET.SubElement(root, "employee")
    for col, val in row.items():
        ET.SubElement(emp, col).text = str(val)
ET.ElementTree(root).write(os.path.join(base_path, "employees.xml"))

# TXT
with open(os.path.join(base_path, "notes.txt"), "w") as f:
    f.write("This is a dummy plain text file for Databricks demo.\n")

print("✅ Dummy files created successfully at:", base_path)
