## Snowflake Query as PySpark Dataframe and PySpark Dataframe as Snowflake Table

In [1]:
from pathlib import Path
import configparser
import os
import pyspark
from pyspark.sql import SparkSession

In [2]:
config_file = os.getenv("CONFIG_PATH")

In [3]:
config = configparser.ConfigParser()
try:
    config.read(config_file)
except ConfigFileNotFound:
    print("config.ini file not found")

JDBC driver and Snowflake Spark Connector can be downloaded [here](https://search.maven.org/search?q=g:net.snowflake)

In [4]:
sf_jdbc_driver = config['snowflake']['jdbc_driver_path']
sf_connecctor = config['snowflake']['connector_path']

In [5]:
sf_account = config['snowflake']['account']
sf_user = config['snowflake']['username']
sf_database = config['snowflake']['database']
sf_schema = config['snowflake']['schema']
sf_role = config['snowflake']['role']
sf_warehouse = config['snowflake']['warehouse']
sf_authenticator = config['snowflake']['authenticator']

In [6]:
spark = (
    SparkSession.builder.master("local[*]")
    .appName("Snowflake_JDBC")
    .config("spark.jars", f"{sf_jdbc_driver},{sf_connecctor}")
    .getOrCreate()
)

In [7]:
SNOWFLAKE_SOURCE_NAME = "net.snowflake.spark.snowflake"

In [8]:
# Snowflake connection parameters
sfparams = {
    "sfURL" : f"{sf_account}.snowflakecomputing.com",
    "sfUser" : sf_user,
    "sfPassword" : "your_password",  # Not applicable when using externalbrowser authenticator
    "sfDatabase" : sf_database,
    "sfSchema" : sf_schema,
    "sfRole" : sf_role,
    "sfWarehouse" : sf_warehouse,
    "sfAuthenticator" : sf_authenticator
}

In [9]:
query = "SELECT CURRENT_DATE as my_date"

In [10]:
#run custom query
df = (
    spark.read.format(SNOWFLAKE_SOURCE_NAME)
    .options(**sfparams)
    .option("query", query)
    .load()
)

In [11]:
df.show()

+----------+
|   MY_DATE|
+----------+
|2022-03-02|
+----------+



#### Dataframe to Snowflake

In [None]:
(df
 .select("my_date").write.format(SNOWFLAKE_SOURCE_NAME)
 .options(**sfparams)
 .option("dbtable", "my_table")
 # https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameWriter.mode.html#pyspark.sql.DataFrameWriter.mode
 .mode("overwrite")
 .save()
)

In [None]:
spark.stop()

#### Using Context Manager (with)

In [None]:
with (SparkSession.builder.master("local[*]").appName("Snowflake_JDBC").config("spark.jars", f"{sf_jdbc_driver},{sf_connecctor}").getOrCreate()) as spark:
    query = "SELECT CURRENT_DATE as my_date"
    jdbcDF = (
        spark.read.format(SNOWFLAKE_SOURCE_NAME)
        .options(**sfparams)
        .option("query", query)
        .load()
    )
    jdbcDF.show()
    
    (jdbcDF
     .select("my_date").write.format(SNOWFLAKE_SOURCE_NAME)
     .options(**sfparams)
     .option("dbtable", "my_table")
     # https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameWriter.mode.html#pyspark.sql.DataFrameWriter.mode
     .mode("overwrite")
     .save()
    )
    print("Completed saving dataframe as Snowflake table")