<div style="text-align: center; line-height: 0; padding-top: 9px;">
  <img src="https://blog.scholarnest.com/wp-content/uploads/2023/03/scholarnest-academy-scaled.jpg" alt="ScholarNest Academy" style="width: 1400px">
</div>

#####Cleanup previous runs

In [0]:
%run ../utils/cleanup

#####Setup

In [0]:
base_dir = "/mnt/files"
spark.sql(f"CREATE CATALOG IF NOT EXISTS dev")
spark.sql(f"CREATE DATABASE IF NOT EXISTS dev.demo_db")

flight_schema_ddl = """FL_DATE DATE, OP_CARRIER STRING, OP_CARRIER_FL_NUM INT, ORIGIN STRING, 
          ORIGIN_CITY_NAME STRING, DEST STRING, DEST_CITY_NAME STRING, CRS_DEP_TIME INT, DEP_TIME INT, 
          WHEELS_ON INT, TAXI_IN INT, CRS_ARR_TIME INT, ARR_TIME INT, CANCELLED STRING, DISTANCE INT"""

flight_time_df = (spark.read.format("json")
                    .schema(flight_schema_ddl)
                    .option("dateFormat", "M/d/y")
                    .load(f"{base_dir}/dataset_ch7/flight-time.json")
)

flight_time_df.write.format("delta").mode("overwrite").saveAsTable("dev.demo_db.flight_time_tbl")

#####1. Read delta table using Spark SQL

In [0]:
%sql
select * from dev.demo_db.flight_time_tbl

#####2. Read delta table using dataframe api

In [0]:
spark.read.format("delta").table("dev.demo_db.flight_time_tbl").display()

####3. Read delta format data from an external location
1. Create external location
2. Read using dataframe API
3. Create external table

#####3.1. Create external location

In [0]:
%sql
CREATE EXTERNAL LOCATION IF NOT EXISTS external_data
URL 'abfss://dbfs-container@prashantsa.dfs.core.windows.net/external'
WITH (CREDENTIAL `scholarnest-storage-credential`)

#####3.2. Read using Dataframe API

In [0]:
(spark.read
    .format("delta")
    .load("abfss://dbfs-container@prashantsa.dfs.core.windows.net/external/flight-time")
    .display()
)

#####3.3. Create external table

In [0]:
%sql
CREATE TABLE IF NOT EXISTS dev.demo_db.flight_time_ext_tbl (
    FL_DATE DATE, 
    OP_CARRIER STRING, 
    OP_CARRIER_FL_NUM INT, 
    ORIGIN STRING, 
    ORIGIN_CITY_NAME STRING, 
    DEST STRING, 
    DEST_CITY_NAME STRING, 
    CRS_DEP_TIME INT, 
    DEP_TIME INT, 
    WHEELS_ON INT, 
    TAXI_IN INT, 
    CRS_ARR_TIME INT, 
    ARR_TIME INT, 
    CANCELLED STRING, 
    DISTANCE INT
) USING DELTA
LOCATION "abfss://dbfs-container@prashantsa.dfs.core.windows.net/external/flight-time"

In [0]:
%sql
select * from dev.demo_db.flight_time_ext_tbl

&copy; 2021-2023 ScholarNest Technologies Pvt. Ltd. All rights reserved.<br/>
Apache, Apache Spark, Spark and the Spark logo are trademarks of the <a href="https://www.apache.org/">Apache Software Foundation</a>.<br/>
Databricks, Databricks Cloud and the Databricks logo are trademarks of the <a href="https://www.databricks.com/">Databricks Inc</a>.<br/>
<br/>
<a href="https://www.scholarnest.com/privacy/">Privacy Policy</a> | 
<a href="https://www.scholarnest.com/terms/">Terms of Use</a> | <a href="https://www.scholarnest.com/contact/">Contact Us</a>