Create Tables Without Defining Schemas

In [None]:
from delta.tables import *
from pyspark.sql.functions import current_date

# Delete any existing data
if (DeltaTable.isDeltaTable(spark, f"{schemaName}.{tableName}")):
    spark.sql(f"DELETE FROM {schemaName}.{tableName} \
    WHERE loading_date = current_date()")

# Read data
df = spark.read.parquet(f"Files/{schemaName}/{filePath}/{tableName}.parquet")

# Add a loading date column to the DataFrame
df = df.withColumn("loading_date", current_date())

# Write the data to the Delta table with schema merge
df.write.format("delta") \
    .mode("append") \
    .partitionBy("loading_date") \
    .option("mergeSchema", "true") \
    .saveAsTable(f"{schemaName}.{tableName}")

Define Schemas with the DataFrame API

In [None]:
from pyspark.sql.types import *

# Define the schema
schema = StructType([
    StructField("CustomerID", IntegerType(), True),
    StructField("NameStyle", BooleanType(), True),
    StructField("Title", StringType(), True),
    StructField("FirstName", StringType(), True)
])

# Read the raw JSON data and apply the schema
df = spark.read.schema(schema).json("/path/to/raw/data")

# Write the DataFrame to a Delta table
df.write.format("delta").saveAsTable("adventureworks.customer")

SQL DDL Statements

In [None]:
%sql
CREATE SCHEMA IF NOT EXISTS adventureworks;

CREATE TABLE customer (
    CustomerID INT COMMENT 'Customer identifier',
    NameStyle BOOLEAN COMMENT 'Style of the name',
    Title STRING COMMENT 'Title of the customer',
    FirstName STRING COMMENT 'First name of the customer'
)
USING delta