In [0]:
import os

# 1. Standard setup: Get environment and define paths
dbutils.widgets.text("env", "dev", "Environment")
env = dbutils.widgets.get("env")

catalog_name = "jet_engine_predictive_maintenance"
schema_name = f"{env}"
volume_name = "raw_data"

# Destination path in Unity Catalog
volume_path = f"/Volumes/{catalog_name}/{schema_name}/{volume_name}"

# Source path in the Git repo (finds the absolute path)
repo_data_path = os.path.join(os.path.dirname(os.getcwd()), "data")

print(f"Running for environment: '{env}'")
print(f"Source Repo Path: '{repo_data_path}'")
print(f"Target Volume Path: '{volume_path}'")

# 2. Create the necessary UC objects
spark.sql(f"CREATE CATALOG IF NOT EXISTS {catalog_name}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog_name}.{schema_name}")
spark.sql(f"CREATE VOLUME IF NOT EXISTS {catalog_name}.{schema_name}.{volume_name}")

# --- THE DEFINITIVE FIX STARTS HERE ---

# 3. Use Python's native file handling to read from the repo and write to the Volume
print("\nSyncing files from Git repo to the Volume using Python I/O...")

# Get a list of all CSV files from the local repo folder
csv_files_in_repo = [f for f in os.listdir(repo_data_path) if f.endswith('.csv')]

for file_name in csv_files_in_repo:
    # Source path on the local disk
    local_file_path = os.path.join(repo_data_path, file_name)
    
    # Destination path in the UC Volume
    volume_file_path = os.path.join(volume_path, file_name)

    # Read the file content using standard Python
    with open(local_file_path, "r") as f:
        file_content = f.read()

    # Write the content to the Volume using dbutils.fs.put()
    dbutils.fs.put(volume_file_path, file_content, overwrite=True)
    print(f"  - Copied '{file_name}' to Volume.")

print("✅ Sync complete.")

# --- END OF FIX ---


# 4. Read from the Volume and create the tables (this part is the same)
print("\nStarting table ingestion from Volume...")
csv_files_in_volume = [f.name for f in dbutils.fs.ls(volume_path) if f.name.endswith('.csv')]

if not csv_files_in_volume:
    print("No CSV files found in the Volume.")
else:
    for csv_file in csv_files_in_volume:
        file_path = os.path.join(volume_path, csv_file)
        table_name = os.path.splitext(csv_file)[0].replace('-', '_')
        full_table_path = f"{catalog_name}.{schema_name}.{table_name}"

        df = spark.read.format("csv") \
                      .option("header", "true") \
                      .option("inferSchema", "true") \
                      .load(file_path)

        df.write.mode("overwrite").saveAsTable(full_table_path)
        print(f"✅ Successfully ingested '{csv_file}' to table '{table_name}'.")

print("\n--- Ingestion complete! ---")