## Exploring The Source Directory

In [0]:
use catalog hive_metastore;

In [0]:
%python
files = dbutils.fs.ls(f"dbfs:/mnt/demo-datasets/bookstore/orders-raw")
display(files)

## Auto Loader

In [0]:
%fs ls "dbfs:/mnt/demo/orders_checkpoint"

In [0]:
%python
(spark.readStream
              .format("cloudFiles")
              .option("cloudFiles.format","parquet")
              .option("cloudFiles.schemaLocation", "dbfs:/mnt/demo/orders_checkpoint")
              .load(f"dbfs:/mnt/demo-datasets/bookstore/orders-raw")
              .writeStream
              .option("checkpointLocation", "dbfs:/mnt/demo/orders_checkpoint")
              .table("orders_updates")
            )


In [0]:
SELECT * FROM orders_updates limit 3

In [0]:
SELECT count(*) FROM orders_updates

In [0]:
describe extended orders_updates

In [0]:
%fs ls "dbfs:/user/hive/warehouse/orders_updates"

## Landing New Files

In [0]:
%python
import re

dir_path = "dbfs:/mnt/demo-datasets/bookstore/orders-raw/"
source_file = "dbfs:/mnt/demo-datasets/bookstore/orders-raw/01.parquet"

# 1. 디렉토리 내 파일 목록 조회
files = dbutils.fs.ls(dir_path)

# 2. 숫자.parquet 패턴에서 숫자만 추출
numbers = []
pattern = re.compile(r"(\d+)\.parquet$")

for f in files:
    match = pattern.search(f.name)
    if match:
        numbers.append(int(match.group(1)))

# 3. 다음 파일 번호 계산
next_number = max(numbers) + 1 if numbers else 1
next_file_name = f"{next_number:02d}.parquet"

target_file = dir_path + next_file_name

# 4. 파일 복사
dbutils.fs.cp(source_file, target_file)

print(f"Copied to: {target_file}")


In [0]:
SELECT count(*) FROM orders_updates

In [0]:
%fs ls "dbfs:/mnt/demo/orders_checkpoint"

In [0]:
%fs head "dbfs:/mnt/demo/orders_checkpoint/metadata"

## Exploring Table History

In [0]:
describe history orders_updates

## Cleaning Up

In [0]:
DROP TABLE orders_updates

In [0]:
%fs ls "dbfs:/mnt/demo/orders_checkpoint"

In [0]:
%python
dbutils.fs.rm("dbfs:/mnt/demo/orders_checkpoint", True)