In [2]:
# cerate materialized_etl table with transformation

table_exists = spark.sql("SHOW TABLES IN materialized_etl LIKE 'orders_etl'").count() > 0
if not table_exists:
    spark.sql("""
    CREATE OR REPLACE TABLE materialized_etl.orders_etl AS
    SELECT 
        SalesOrderNumber,
        cast(SalesOrderLineNumber as varchar(20)) SalesOrderLineNumber,
        OrderDate,
        CustomerName,
        Email,
        Item,
        Quantity,
        cast(UnitPrice as decimal(18,2))UnitPrice,
        cast(Tax as decimal(18,2))Tax,
        'I' action_type,
        current_timestamp() row_insert_timestamp,
        current_timestamp() row_update_timestamp
    FROM dbo_1.orders
    where year(OrderDate) in (2020, 2019)
    """)
    print("Table 'materialized_etl.orders_etl' created successfully.")
else:
    print("Table 'materialized_etl.orders' already exists.")

StatementMeta(, 766f0a55-b856-4b0f-98b8-9681d176c4df, 4, Finished, Available, Finished)

Table 'materialized_etl.orders_etl' created successfully.


In [1]:
spark.sql("drop table materialized_etl.orders_etl")

StatementMeta(, 766f0a55-b856-4b0f-98b8-9681d176c4df, 3, Finished, Available, Finished)

DataFrame[]

In [None]:
from pyspark.sql import *
spark.sql("""
CREATE OR REPLACE VIEW materialized_etl.orders_etl_vw AS
SELECT 
    SalesOrderNumber,
    cast(SalesOrderLineNumber as varchar(20)) AS SalesOrderLineNumber,
    OrderDate,
    CustomerName,
    Email,
    Item,
    Quantity,
    cast(UnitPrice as decimal(18,2)) AS UnitPrice,
    cast(Tax as decimal(18,2)) AS Tax,
    'I' AS action_type,
    current_timestamp() AS row_insert_timestamp,
    current_timestamp() AS row_update_timestamp
FROM dbo_1.orders
WHERE year(OrderDate) in (2020)
""")

In [None]:
# Access the view as a table
view_data = spark.table("orders_etl_vw")

# Work with the data
display(view_data)

In [2]:
# cerate materialized_t tables

table_exists = spark.sql("SHOW TABLES IN materialized_t LIKE 'orders'").count() > 0

if not table_exists:
    spark.sql("""
    CREATE TABLE materialized_t.orders (
        SalesOrderNumber      VARCHAR(20) NOT NULL,
        SalesOrderLineNumber  VARCHAR(20) NOT NULL,
        OrderDate             DATE,
        CustomerName          STRING,
        Email                 STRING,
        Item                  VARCHAR(100),
        Quantity              INT,
        UnitPrice             DECIMAL(18, 2),
        Tax                   DECIMAL(18, 2),
        action_type           CHAR(1),
        row_insert_timestamp  TIMESTAMP,
        row_update_timestamp  TIMESTAMP
    )
    USING delta
    """)
    print("Table 'materialized_t.orders' created successfully.")
else:
    print("Table 'materialized_t.orders' already exists.")

StatementMeta(, 2e69b725-db9e-407b-b71c-66f486440c33, 4, Finished, Available, Finished)

Table 'materialized_t.orders_etl' created successfully.


In [1]:
# cerate materialized_temp_t tables

table_exists = spark.sql("SHOW TABLES IN materialized_temp_t LIKE 'orders'").count() > 0

if not table_exists:
    spark.sql("""
    CREATE TABLE materialized_t.orders (
        SalesOrderNumber      VARCHAR(20) NOT NULL,
        SalesOrderLineNumber  VARCHAR(20) NOT NULL,
        OrderDate             TIMESTAMP,
        CustomerName          STRING,
        Email                 STRING,
        Item                  VARCHAR(100),
        Quantity              INT,
        UnitPrice             DECIMAL(18, 2),
        Tax                   DECIMAL(18, 2),
        action_type           CHAR(1),
        row_insert_timestamp  TIMESTAMP,
        row_update_timestamp  TIMESTAMP
    )
    USING delta
    """)
    print("Table 'materialized_temp_t.orders' created successfully.")
else:
    print("Table 'materialized_temp_t.orders' already exists.")

StatementMeta(, 65d2e38d-ed0f-4ddb-8561-b60cb4bc640d, 3, Finished, Available, Finished)

Table 'materialized_temp_t.orders' already exists.


In [3]:
%%sql
--update materialized_etl.orders_etl
--set CustomerName = 'Thomas Mathew',
--email ='tmathew@oakwoodsystem.com'
select * from materialized_etl.orders_etl
where SalesOrderNumber ='SO45777'


StatementMeta(, 016c5993-c8ef-4260-a4b3-a5fbd87b36a6, 4, Finished, Available, Finished)

<Spark SQL result set with 1 rows and 12 fields>

In [1]:
spark.sql("DROP TABLE IF EXISTS materialized_t.orders")


StatementMeta(, 25eefa9d-0534-43e2-9ace-ad6b3d4b4dcb, 3, Finished, Available, Finished)

DataFrame[]

In [2]:
spark.sql("DELETE FROM materialized_t.orders where year(OrderDate)=2021")

StatementMeta(, 02499a5c-4ba8-44d3-bb5a-6b1f68b60b93, 4, Finished, Available, Finished)

DataFrame[num_affected_rows: bigint]

In [5]:
table_exists = spark.sql("SHOW TABLES IN materialized_etl LIKE 'products_etl'").count() > 0
if not table_exists:
    spark.sql("""
    CREATE TABLE materialized_etl.product_etl AS
    SELECT 
        ProductID,
        ProductName,
        Category,
        CustomerName,
        cast(ListPrice as decimal(18,2))ListPrice,
        'I' action_type,
        current_timestamp() row_insert_timestamp,
        current_timestamp() row_update_timestamp
    FROM dbo_1.products
    where Category in ("Mountain Bikes","Road Bikes")
    """)
    print("Table 'materialized_etl.products_etl' created successfully.")
else:
    print("Table 'materialized_etl.products_etl' already exists.")

StatementMeta(, db76c265-0083-4c72-a57c-8b178f5ae368, 7, Finished, Available, Finished)

Table 'materialized_etl.orders' already exists.


In [6]:
table_exists = spark.sql("SHOW TABLES IN materialized_t LIKE 'products'").count() > 0
if not table_exists:
    spark.sql("""
    CREATE TABLE materialized_t.products
    (
        ProductID VARCHAR(20)       NOT NULL,
        ProductName VARCHAR(100)    NOT NULL,
        Category                    STRING ,
        ListPrice                   DECIMAL(18,2),
        action_type                 CHAR(1),
        row_insert_timestamp        TIMESTAMP,
        row_update_timestamp        TIMESTAMP
    ) 
    USING DELTA
    """)
    print("Table 'materialized_t.products' created successfully.")
else:
    print("Table 'materialized_t.products' already exists.")

StatementMeta(, db76c265-0083-4c72-a57c-8b178f5ae368, 8, Finished, Available, Finished)

Table 'materialized_t.products' already exists.


In [12]:
%%sql
select client,company_code,belnr,count(*)
 from materialized_etl.acct_doc_hdr_etl
 group by client,company_code,belnr
 --having count(*)>1

StatementMeta(, 37607f34-8c1a-4e50-95b6-86303aecd199, 13, Finished, Available, Finished)

<Spark SQL result set with 30 rows and 4 fields>

In [13]:
%%sql
create table materialized_etl.acct_doc_hdr_etl as
select 
    cast(mandt as varchar(3)) client,
    cast(bukrs as varchar(4)) company_code,
    cast(belnr as varchar(10)) acct_doc_nbr,
    gjahr,
    blart,
    bldat,
    budat,
    monat,
    waers,
    xblnr,
    bktxt,
    stjah,
    stblg,
    xnetb,
    cast(action_type as char(1)) action_type,
    cast(row_insert_timestamp as TIMESTAMP) row_insert_timestamp,
    cast(row_update_timestamp as TIMESTAMP) row_update_timestamp
from dbo_1.sap_bkpf    


StatementMeta(, 37607f34-8c1a-4e50-95b6-86303aecd199, 14, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [14]:
%%sql
create table materialized_t.acct_doc_hdr
(
    client varchar(3) not null,
    company_code varchar(4) not null,
    acct_doc_nbr varchar(10) not null,
    gjahr string,
    blart string,
    bldat timestamp,
    budat timestamp,
    monat string,
    waers string,
    xblnr string,
    bktxt string,
    stjah string,
    stblg string,
    xnetb string,
    action_type char(1),
    row_insert_timestamp timestamp,
    row_update_timestamp timestamp
)    


StatementMeta(, 37607f34-8c1a-4e50-95b6-86303aecd199, 15, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [1]:
%%sql
select * from materialized_t.acct_doc_hdr

StatementMeta(, 0f1d0464-fb69-416b-847b-56b3476992d7, 2, Finished, Available, Finished)

<Spark SQL result set with 30 rows and 17 fields>