In [12]:
from pyspark.sql import SparkSession
from delta import *

builder = SparkSession.builder.appName("delta-table-sql") \
        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
        .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
        .config("spark.driver.memory", "4g") \
        .config("spark.sql.autoBroadcastJoinThreshold", "-1")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

### Create a table

In [13]:
df2 = spark.range(10, 25, 2)
df2.show()

+---+
| id|
+---+
| 10|
| 12|
| 14|
| 16|
| 18|
| 20|
| 22|
| 24|
+---+



write dataframe

In [14]:
df2.write.format("delta").mode('overwrite').save("./tmp/delta_exm")

Remove table

In [15]:
spark.sql('drop table if exists  delta_table_example')

DataFrame[]

In [16]:
spark.sql("""CREATE OR REPLACE TABLE {} (id long) USING DELTA LOCATION '{}' """.format('delta_table_example', "./tmp/delta_exm"))

DataFrame[]

#### Create table in the metastore using DataFrame's schema and write data to it

In [17]:
df2.write.format("delta").mode("Overwrite").saveAsTable("delta_table_example")

In [18]:
spark.sql('show databases').show()

+---------+
|namespace|
+---------+
|  default|
+---------+



In [19]:
spark.sql('show tables from default').show(truncate=False)

+---------+-------------------+-----------+
|namespace|tableName          |isTemporary|
+---------+-------------------+-----------+
|default  |delta_table_example|false      |
+---------+-------------------+-----------+



In [20]:
from pyspark.sql.utils import AnalysisException

try:
    spark.sql("""ALTER TABLE delta_table_example ADD COLUMN doubling_id INT""" )
except AnalysisException as e:
    print(str(e))
    
spark.sql("""UPDATE delta_table_example SET doubling_id = id*2 """).show()
    

25/08/09 18:32:45 WARN UpdateCommand: Could not validate number of records due to missing statistics.


+-----------------+
|num_affected_rows|
+-----------------+
|                8|
+-----------------+



In [21]:
spark.table("delta_table_example").show()

+---+-----------+
| id|doubling_id|
+---+-----------+
| 18|         36|
| 22|         44|
| 12|         24|
| 24|         48|
| 20|         40|
| 16|         32|
| 10|         20|
| 14|         28|
+---+-----------+



#### Create table in the metastore

In [22]:
DeltaTable.createIfNotExists(spark) \
  .tableName("default.people10m") \
  .addColumn("id", "INT") \
  .addColumn("firstName", "STRING") \
  .addColumn("middleName", "STRING") \
  .addColumn("lastName", "STRING", comment = "surname") \
  .addColumn("gender", "STRING") \
  .addColumn("birthDate", "TIMESTAMP") \
  .addColumn("ssn", "STRING") \
  .addColumn("salary", "INT") \
  .execute()

<delta.tables.DeltaTable at 0x72447c137580>

In [23]:
spark.sql('show tables from default').show(truncate=False)

+---------+-------------------+-----------+
|namespace|tableName          |isTemporary|
+---------+-------------------+-----------+
|default  |delta_table_example|false      |
|default  |people10m          |false      |
+---------+-------------------+-----------+



In [24]:
spark.table("people10m").show()

+---+---------+----------+--------+------+---------+---+------+
| id|firstName|middleName|lastName|gender|birthDate|ssn|salary|
+---+---------+----------+--------+------+---------+---+------+
+---+---------+----------+--------+------+---------+---+------+

