-
Notifications
You must be signed in to change notification settings - Fork 0
/
20231214_deletion_vector.py
120 lines (84 loc) · 3.4 KB
/
20231214_deletion_vector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Databricks notebook source
# MAGIC %md
# MAGIC ## 削除ベクトルのテスト
# COMMAND ----------
# MAGIC %md
# MAGIC ### パラメーターの設定など準備
# COMMAND ----------
dbutils.widgets.text( "external_location_path", "s3://motokazu-ishikawa-demo/demo" )
dbutils.widgets.text( "catalog_name", "motokazu_ishikawa_demo" )
dbutils.widgets.text( "schema_name", "testschema" )
dbutils.widgets.text( "table_dv_false", "dv_false" )
dbutils.widgets.text( "table_dv_true", "dv_true" )
# COMMAND ----------
external_location_path = dbutils.widgets.get( "external_location_path" )
catalog_name = dbutils.widgets.get( "catalog_name" )
schema_name = dbutils.widgets.get( "schema_name" )
table_dv_false = dbutils.widgets.get( "table_dv_false" )
table_dv_true = dbutils.widgets.get( "table_dv_true" )
# COMMAND ----------
dbutils.fs.mkdirs(f"{external_location_path}")
# COMMAND ----------
# MAGIC %sql
# MAGIC CREATE SCHEMA IF NOT EXISTS $catalog_name.$schema_name;
# COMMAND ----------
# MAGIC %md
# MAGIC ### 削除ベクトル非適用のテーブルと、適用したテーブルを作成して初期データを投入します
# COMMAND ----------
# MAGIC %sql
# MAGIC CREATE OR REPLACE TABLE $catalog_name.$schema_name.$table_dv_false (
# MAGIC date INTEGER NOT NULL,
# MAGIC delay INT NOT NULL,
# MAGIC distance INT NOT NULL,
# MAGIC origin STRING NOT NULL,
# MAGIC destination STRING NOT NULL
# MAGIC )
# MAGIC LOCATION '$external_location_path/$table_dv_false'
# MAGIC TBLPROPERTIES ('delta.enableDeletionVectors' = false);
# COMMAND ----------
# MAGIC %sql
# MAGIC CREATE OR REPLACE TABLE $catalog_name.$schema_name.$table_dv_true (
# MAGIC date INTEGER NOT NULL,
# MAGIC delay INT NOT NULL,
# MAGIC distance INT NOT NULL,
# MAGIC origin STRING NOT NULL,
# MAGIC destination STRING NOT NULL
# MAGIC )
# MAGIC LOCATION '$external_location_path/$table_dv_true'
# MAGIC TBLPROPERTIES ('delta.enableDeletionVectors' = true);
# COMMAND ----------
df = ( spark.read.format("csv")
.option("header", True)
.option("inferSchema","true")
.load("dbfs:/databricks-datasets/flights/departuredelays.csv") )
df.write.mode("append").saveAsTable(f"{catalog_name}.{schema_name}.{table_dv_false}")
# COMMAND ----------
df = ( spark.read.format("csv")
.option("header", True)
.option("inferSchema","true")
.load("dbfs:/databricks-datasets/flights/departuredelays.csv") )
df.write.mode("append").saveAsTable(f"{catalog_name}.{schema_name}.{table_dv_true}")
# COMMAND ----------
# MAGIC %md
# MAGIC ### 削除ベクトル非適用テーブルのDELETE前と後のファイル
# COMMAND ----------
# MAGIC %sql
# MAGIC LIST '$external_location_path/$table_dv_false'
# COMMAND ----------
# MAGIC %sql
# MAGIC DELETE FROM $catalog_name.$schema_name.$table_dv_false WHERE origin = 'ATL' AND destination = 'IAD';
# COMMAND ----------
# MAGIC %sql
# MAGIC LIST '$external_location_path/$table_dv_false'
# COMMAND ----------
# MAGIC %md
# MAGIC ### 削除ベクトル適用テーブルのDELETE前と後のファイル
# COMMAND ----------
# MAGIC %sql
# MAGIC LIST '$external_location_path/$table_dv_true'
# COMMAND ----------
# MAGIC %sql
# MAGIC DELETE FROM $catalog_name.$schema_name.$table_dv_true WHERE origin = 'ATL' AND destination = 'IAD';
# COMMAND ----------
# MAGIC %sql
# MAGIC LIST '$external_location_path/$table_dv_true'