In [2]:
from pyspark.sql import SparkSession
from delta import *

In [13]:
# Initialize Spark Session with Delta support
builder = SparkSession.builder.appName("DeltaLakeTest") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
spark = configure_spark_with_delta_pip(builder).getOrCreate()

# Example data
data = spark.createDataFrame([(1, "Alice"), (2, "Bob")], ["id", "name"])

# Write to Delta table
data.write.mode("overwrite").format("delta").save("/data/delta-table",)

# Read from Delta table
df = spark.read.format("delta").load("/data/delta-table")
df.show()

+---+-----+
| id| name|
+---+-----+
|  1|Alice|
|  2|  Bob|
+---+-----+



In [4]:
import pandas as pd

In [5]:
from deltalake import write_deltalake, DeltaTable

In [6]:
data = {'first_name': ['bob', 'li', 'leah'], 'age': [47, 23, 51]}
data_2 = {"first_name": ["suh", "anais"], "age": [33, 68]}


In [7]:
df = pd.DataFrame.from_dict(data)
write_deltalake("tmp/pandas-table", df)


In [8]:
DeltaTable("tmp/pandas-table/").to_pandas()

Unnamed: 0,first_name,age
0,bob,47
1,li,23
2,leah,51


In [9]:
df2 = pd.DataFrame(data_2)
write_deltalake("tmp/pandas-table", df2, mode="append")


In [10]:
DeltaTable("tmp/pandas-table/").to_pandas()

Unnamed: 0,first_name,age
0,suh,33
1,anais,68
2,bob,47
3,li,23
4,leah,51


In [11]:
DeltaTable("tmp/pandas-table/", version=0).to_pandas()

Unnamed: 0,first_name,age
0,bob,47
1,li,23
2,leah,51


In [12]:
DeltaTable("tmp/pandas-table/", version=1).to_pandas()

Unnamed: 0,first_name,age
0,suh,33
1,anais,68
2,bob,47
3,li,23
4,leah,51
