In [0]:
%sql
select current_schema(), current_catalog();

use catalog ecommerce;
use schema v01;

In [0]:
%sql
LIST '/Volumes/ecommerce/v01/raw/AdventureWorks_Customers/'

In [0]:
%sql
SELECT * 
FROM read_files(
  '/Volumes/ecommerce/v01/raw/AdventureWorks_Customers/',
  format => "parquet" 
)

### ## Adding Column Metadata on Ingestion
****

In [0]:
%sql
use catalog ecommerce;
use schema v01;

--Create an empty table 
CREATE TABLE historical_users_bronze AS 
SELECT *,
_metadata.file_modification_time AS file_modification_time,
_metadata.file_name AS source_file,
current_timestamp() AS ingestion_time
FROM read_files(
  '/Volumes/ecommerce/v01/raw/AdventureWorks_Customers/',
  format => "parquet");

--View the final bronze table 
SELECT * 
FROM historical_users_bronze
LIMIT 10;

In [0]:
%sql
DESCRIBE EXTENDED historical_users_bronze;

In [0]:
%sql
SELECT 
  source_file,
  count(*) as total 
FROM historical_users_bronze
GROUP BY source_file
ORDER BY source_file 

In [0]:
df = ( spark
       .read
       .format("parquet")
       .load('/Volumes/ecommerce/v01/raw/AdventureWorks_Customers/' )
    )

In [0]:
from pyspark.sql.functions import col, current_timestamp

df_with_metadata = (
    df
    .withColumn("file_modification_time",col("_metadata.file_modification_time"))
    .withColumn("source_file",col("_metadata.file_name"))
    .withColumn("ingestion_time", current_timestamp())
 ) 
 
df_with_metadata.display()

In [0]:
df_with_metadata.write.mode("overwrite").saveAsTable("ecommerce.v01.historical_users_bronze")