In [1]:
import pyspark
from delta import *

builder = pyspark.sql.SparkSession.builder.appName("MyApp") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")\
    .config("spark.jars.packages", "org.apache.hadoop:hadoop-azure:3.3.4,com.microsoft.azure:azure-storage:8.6.6")
    

# added hadoop-azure:3.3.4,azure-storage:8.6.6,jetty-util:9.4.48.v20220622,jetty-util-ajax:9.4.48.v20220622
spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [2]:
spark

In [4]:
try:
    spark._jvm.Class.forName("org.apache.hadoop.fs.azure.NativeAzureFileSystem")
    print("hadoop-azure is available")
except Exception as e:
    print("hadoop-azure is NOT available")

# Check if azure-storage's CloudStorageAccount class is available
try:
    spark._jvm.Class.forName("com.microsoft.azure.storage.CloudStorageAccount")
    print("azure-storage is available")
except Exception as e:
    print("azure-storage is NOT available")

hadoop-azure is available
azure-storage is available


In [5]:
#"fs.azure.sas.<container-name>.blob.core.windows.net" "fs.azure.sas.deltastorage.stockdatajanikowski.blob.core.windows.net"

from dotenv import load_dotenv
import os
load_dotenv() 
storage = os.environ['storageAccount']
storageKey = os.environ['storageKey']


spark.conf.set("fs.azure", "org.apache.hadoop.fs.azure.NativeAzureFileSystem")
spark.conf.set(
f'fs.azure.account.key.{storage}.blob.core.windows.net',
storageKey
)


In [6]:
data = spark.range(0, 5)
data.write.format("delta").save("wasbs://deltastorage@stockdatajanikowski.blob.core.windows.net/test3")

In [7]:
df = spark.read.format("delta").load("wasbs://deltastorage@stockdatajanikowski.blob.core.windows.net/test3")

In [9]:
df.show()

+---+
| id|
+---+
|  4|
|  1|
|  2|
|  0|
|  3|
+---+

