In [0]:
%python
# Example of extracting schema information using Spark SQL
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("ExportOlympicCatalog").getOrCreate()

# Set the correct catalog
spark.sql("USE CATALOG olympics")

# List all databases
databases = spark.sql("SHOW DATABASES").collect()

# Iterate through each database and list tables
for db in databases:
    db_name = db['databaseName']
    tables = spark.sql(f"SHOW TABLES IN {db_name}").collect()
    
    for table in tables:
        table_name = table['tableName']
        schema = spark.sql(f"DESCRIBE {db_name}.{table_name}").collect()
        
        # Print or save the schema information
        print(f"Schema for {db_name}.{table_name}:")
        for column in schema:
            print(column)

In [0]:
%python
import csv
import io

# Use dbutils to handle DBFS file operations
dbutils.fs.mkdirs('/mnt')

output = io.StringIO()
schema_writer = csv.writer(output)
schema_writer.writerow(['Database', 'Table', 'Column', 'DataType', 'Comment'])

for db in databases:
    db_name = db['databaseName']
    tables = spark.sql(f"SHOW TABLES IN {db_name}").collect()
    
    for table in tables:
        table_name = table['tableName']
        schema = spark.sql(f"DESCRIBE {db_name}.{table_name}").collect()
        
        for column in schema:
            schema_writer.writerow([db_name, table_name, column['col_name'], column['data_type'], column['comment']])

csv_content = output.getvalue()
dbutils.fs.put('/mnt/exported_olympic_schemas.csv', csv_content, overwrite=True)


In [0]:
%python
# List the files in the /mnt directory to verify the CSV file location
display(dbutils.fs.ls('/mnt'))

In [0]:
%python
# Download the file from DBFS to the local file system
local_path = '/tmp/exported_olympic_schemas.csv'
dbfs_path = '/mnt/exported_olympic_schemas.csv'

# Copy the file from DBFS to the local file system
dbutils.fs.cp(dbfs_path, 'file:/Desktop')

# Display the local file path for download
local_path

In [0]:
%python
# Download the file from DBFS to the local file system
local_path = '/tmp/exported_olympic_schemas.csv'
dbfs_path = '/mnt/exported_olympic_schemas.csv'

# Copy the file from DBFS to the local file system
dbutils.fs.cp(dbfs_path, local_path)

# Display the local file path for download
local_path