In [None]:
# Databricks notebook or Python script
# ------------------------------------
# Title: Explore TPCH Sample Database
# Description:
#   This script inspects the sample "tpch" database available in Databricks.
#   It lists available databases, enumerates all tables in "tpch",
#   prints their schemas, and displays sample records.
# ------------------------------------


In [None]:

# Import required packages
from pyspark.sql import SparkSession
import pandas as pd


In [None]:

# Create or get existing Spark session
spark = SparkSession.builder.getOrCreate()


In [None]:

# ---------------------------------------------------
# Step 1: List all available databases
# ---------------------------------------------------
print("=== AVAILABLE DATABASES ===")
databases = spark.catalog.listDatabases()
for db in databases:
    print(f"- {db.name}")
print()


In [None]:

# ---------------------------------------------------
# Step 2: List all tables in the 'tpch' database
# ---------------------------------------------------
print("=== TABLES IN 'tpch' DATABASE ===")
tables = spark.catalog.listTables("tpch")

# Display as a Pandas DataFrame for readability
tables_df = pd.DataFrame(
    [(t.name, t.tableType) for t in tables],
    columns=["Table", "Type"]
)
print(tables_df)
print()


In [None]:

# ---------------------------------------------------
# Step 3: Inspect the schema of a single table
# ---------------------------------------------------
# Example: Inspect the 'customer' table
print("=== SCHEMA: tpch.customer ===")
customer_df = spark.table("tpch.customer")
customer_df.printSchema()
print()


In [None]:

# ---------------------------------------------------
# Step 4: Display a small data sample
# ---------------------------------------------------
# NOTE: In Databricks notebooks, use `display(customer_df.limit(5))`
# In a plain Python script, use `.show()`
print("=== SAMPLE DATA: tpch.customer ===")
customer_df.show(5)
print()


In [None]:

# ---------------------------------------------------
# Step 5: Inspect schemas of all tables (optional)
# ---------------------------------------------------
print("=== ALL TABLE SCHEMAS IN 'tpch' ===")
for t in tables:
    print(f"--- {t.name} ---")
    df = spark.table(f"tpch.{t.name}")
    df.printSchema()
    print()
