In [4]:
BASE = "/home/jovyan/work"
RAW = f"{BASE}/data/raw"
BRONZE = f"{BASE}/parquet/bronze"

In [6]:
from pyspark.sql.functions import current_timestamp, lit
import os

def load_to_bronze(name):
    path = f"{RAW}/{name}.csv"
    if not os.path.exists(path):
        print(f"⚠️ File not found: {path}")
        return
    df = (
        spark.read.format("csv")
        .option("header", "true")
        .option("inferSchema", "true")
        .load(path)
        .withColumn("_ingest_ts", current_timestamp())
        .withColumn("_source", lit(name))
    )
    df.write.mode("overwrite").parquet(f"{BRONZE}/{name}")
    print(f"✅ {name} → bronze written ({df.count()} rows)")


In [8]:
# 1) Run ingestion for each file
for name in ["users", "food_items", "intake_logs", "water_logs", "mood_logs"]:
    load_to_bronze(name)


✅ users → bronze written (60 rows)
✅ food_items → bronze written (20 rows)
✅ intake_logs → bronze written (150 rows)
✅ water_logs → bronze written (120 rows)
✅ mood_logs → bronze written (80 rows)


In [10]:
# 2) Peek & schema check 
datasets = ["users","food_items","intake_logs","water_logs","mood_logs"] 
for t in datasets: 
  try: 
    df = spark.read.parquet(f"{BRONZE}/{t}") 
    print(f"\n=== bronze/{t} ===") 
    print("rows:", df.count()) 
    df.printSchema() 
    df.show(5, truncate=False) 
  except Exception as e: 
    print(f"Skipped {t}: {e}")


=== bronze/users ===
rows: 60
root
 |-- user_id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- dob: date (nullable = true)
 |-- age: integer (nullable = true)
 |-- goals: string (nullable = true)
 |-- bmi: double (nullable = true)
 |-- _ingest_ts: timestamp (nullable = true)
 |-- _source: string (nullable = true)

+-------+---------+------+----------+---+-----+----+-------------------------+-------+
|user_id|name     |gender|dob       |age|goals|bmi |_ingest_ts               |_source|
+-------+---------+------+----------+---+-----+----+-------------------------+-------+
|1      |Krishna  |F     |2003-08-17|22 |NULL |24.6|2025-10-30 09:20:48.78916|users  |
|2      |Radharani|F     |2002-03-31|23 |NULL |20.0|2025-10-30 09:20:48.78916|users  |
|3      |Kishori  |F     |2000-10-01|25 |NULL |19.6|2025-10-30 09:20:48.78916|users  |
|4      |Jagannath|M     |1999-11-15|25 |NULL |21.3|2025-10-30 09:20:48.78916|users  |
|5      |Vittha