In [28]:
import os
import json
import pandas as pd
import xgboost as xgb
import configparser
from snowflake.snowpark import Session
from copy import copy
from snowflake.snowpark import Row
import snowflake.snowpark.functions as F
from snowflake.snowpark.functions import col, lit, sql_expr, get, get_path, udf, udtf, table_function, sproc, seq8, uniform, when_matched, when_not_matched
from snowflake.snowpark.types import StructType, StructField, StringType, IntegerType, DecimalType, LongType
from snowflake.snowpark.exceptions import SnowparkJoinException, SnowparkSQLException
from snowflake.snowpark.files import SnowflakeFile
from snowflake.snowpark.column import METADATA_FILENAME, METADATA_FILE_ROW_NUMBER

# Read snowflake credentials securely
config = configparser.ConfigParser()
config.read('assets/credentials.cfg')

connection_parameters = dict(
   account   =  config['SNOWPARKAWS']['SNOWFLAKE_ACCOUNT'],
   user      =  config['SNOWPARKAWS']['SNOWFLAKE_USER'],
   password  =  config['SNOWPARKAWS']['SNOWFLAKE_PASSWORD'],
   role      =  config['SNOWPARKAWS']['SNOWFLAKE_ROLE'],  # optional
   warehouse =  config['SNOWPARKAWS']['SNOWFLAKE_WAREHOUSE'],  # optional
   database  =  config['SNOWPARKAWS']['SNOWFLAKE_DATABASE'],  # optional
   schema    =  config['SNOWPARKAWS']['SNOWFLAKE_SCHEMA'],  # optional
)

# Pass this dictionary to the Session.builder.configs method to return a builder object that has these connection parameters.
# Call the create method of the builder to establish the session.
session = Session.builder.configs(connection_parameters).create()

#### `Understanding Snowpark Session Table Object and it's different methods`

- snowflake.snowpark.Table represents a lazily-evaluated Table.

You can create a Table object by calling [Session.table()](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/table) with the name of the table in Snowflake.


Methods of Snowpark Table Object:

| Method | Description |
|--|--|
| delete() | Deletes rows in a Table and returns a DeleteResult, representing the number of rows deleted. |
| drop_table() | Drops the table from the Snowflake database. |
| merge() | Merges this Table with DataFrame source on the specified join expression and a list of matched or not-matched clauses, and returns a MergeResult, representing the number of rows inserted, updated and deleted by this merge action. |
| update() | Updates rows in the Table with specified assignments and returns a UpdateResult, representing the number of rows modified and the number of multi-joined rows modified.| 

In [5]:
target_df = session.create_dataframe([(1,1), (1,2), (2,1), (2,2), (3,1), (3,2)], schema=["a", "b"])
target_df.show()

-------------
|"A"  |"B"  |
-------------
|1    |1    |
|1    |2    |
|2    |1    |
|2    |2    |
|3    |1    |
|3    |2    |
-------------



- [save_as_table](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.DataFrameWriter.save_as_table)

In [6]:
target_df.write.save_as_table(table_name="my_table", mode="overwrite", table_type="temporary")

In [7]:
t = session.table("my_table")
t.show()

-------------
|"A"  |"B"  |
-------------
|1    |1    |
|1    |2    |
|2    |1    |
|2    |2    |
|3    |1    |
|3    |2    |
-------------



- [Table.delete](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Table.delete#snowflake.snowpark.Table.delete)

In [8]:
# delete all rows in a table
t.delete()

DeleteResult(rows_deleted=6)

In [9]:
t.collect()

[]

In [14]:
target_df = session.create_dataframe([(1,1), (1,2), (2,1), (2,2), (3,1), (3,2)], schema=["a", "b"])
target_df.write.save_as_table(table_name="my_table", mode="overwrite", table_type="temporary")
t = session.table("my_table")
t.show()

-------------
|"A"  |"B"  |
-------------
|1    |1    |
|1    |2    |
|2    |1    |
|2    |2    |
|3    |1    |
|3    |2    |
-------------



In [15]:
# Delete all rows where column "a" has value 1
t.delete(col("A") == 1)

DeleteResult(rows_deleted=2)

In [17]:
t.sort(col("A"), col("B")).show()

-------------
|"A"  |"B"  |
-------------
|2    |1    |
|2    |2    |
|3    |1    |
|3    |2    |
-------------



In [21]:
target_df = session.create_dataframe([(1,1), (1,2), (2,1), (2,2), (3,1), (3,2)], schema=["a", "b"])
target_df.write.save_as_table(table_name="my_table", mode="overwrite", table_type="temporary")
t = session.table("my_table")
t.sort(col("A"), col("B")).show()

source_df = session.create_dataframe([2,3,4,5], schema=["a"])
source_df.show()

-------------
|"A"  |"B"  |
-------------
|1    |1    |
|1    |2    |
|2    |1    |
|2    |2    |
|3    |1    |
|3    |2    |
-------------

-------
|"A"  |
-------
|2    |
|3    |
|4    |
|5    |
-------



In [22]:
# delete all rows in this table where column "a" in this table is equal to column "a" in another dataframe
t.delete(col("A") == source_df.col("A"), source_df)

DeleteResult(rows_deleted=4)

In [23]:
t.sort(col("A"), col("B")).show()

-------------
|"A"  |"B"  |
-------------
|1    |1    |
|1    |2    |
-------------



- [drop_table](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Table.drop_table)

In [25]:
source_df = session.create_dataframe([2,3,4,5], schema=["a"])
source_df.write.save_as_table(table_name="my_table1", mode="overwrite", table_type="temporary")

session.sql("show tables in schema snowpark_db.public").show()


-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"created_on"                      |"name"                |"database_name"  |"schema_name"  |"kind"     |"comment"  |"cluster_by"  |"rows"  |"bytes"  |"owner"   |"retention_time"  |"automatic_clustering"  |"change_tracking"  |"search_optimization"  |"search_optimization_progress"  |"search_optimization_bytes"  |"is_external"  |"enable_schema_evolution"  |"owner_role_type"  |"is_event"  |"budget"  |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [27]:
session.table("my_table1").drop_table()
session.sql("show tables in schema snowpark_db.public").show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"created_on"                      |"name"                |"database_name"  |"schema_name"  |"kind"     |"comment"  |"cluster_by"  |"rows"  |"bytes"  |"owner"   |"retention_time"  |"automatic_clustering"  |"change_tracking"  |"search_optimization"  |"search_optimization_progress"  |"search_optimization_bytes"  |"is_external"  |"enable_schema_evolution"  |"owner_role_type"  |"is_event"  |"budget"  |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

- [Table.merge](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Table.merge)

In [39]:
from snowflake.snowpark.functions import when_matched, when_not_matched
from snowflake.snowpark.types import StructType, StructField, StringType, IntegerType, BooleanType, LongType

schema = StructType([StructField("key", IntegerType())
                    ,StructField("value", StringType())
                   ])
source_df = session.create_dataframe([(10,"new"),(12,"new"),(13,"old")], schema=schema)
source_df.write.save_as_table(table_name="src_table", mode="overwrite", table_type="temporary")
source = session.table("src_table")
source.show()

-------------------
|"KEY"  |"VALUE"  |
-------------------
|10     |new      |
|12     |new      |
|13     |old      |
-------------------



In [40]:
from snowflake.snowpark.functions import when_matched, when_not_matched
from snowflake.snowpark.types import StructType, StructField, StringType, IntegerType, BooleanType, LongType

schema = StructType([StructField("key", IntegerType())
                    ,StructField("value", StringType())
                   ])

target_df = session.create_dataframe([(10,"old"), (10,"too_old"), (11,"old")], schema=schema)
target_df.write.save_as_table(table_name="tgt_table", mode="overwrite", table_type="temporary")
target = session.table("tgt_table")
target.show()

-------------------
|"KEY"  |"VALUE"  |
-------------------
|10     |old      |
|10     |too_old  |
|11     |old      |
-------------------



In [41]:
target.merge(source, (target["key"] == source["key"]) & (target["value"] == source["value"])
                   , [when_matched().update({"value": source["value"]})
                     ,when_not_matched().insert({"key": source["key"], "value":source["value"]})]
             )

MergeResult(rows_inserted=3, rows_updated=0, rows_deleted=0)

In [42]:
target.sort(col("key"), col("value")).show()

-------------------
|"KEY"  |"VALUE"  |
-------------------
|10     |new      |
|10     |old      |
|10     |too_old  |
|11     |old      |
|12     |new      |
|13     |old      |
-------------------



- [Table.update](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Table.update)

In [44]:
target_df = session.create_dataframe([(1,1),(1,2),(2,1),(2,2),(3,1),(3,2)], schema=["a","b"])
target_df.write.save_as_table(table_name="upd_table", mode="overwrite", table_type="temporary")
t = session.table("upd_table")
t.sort(col("a"), col("b")).show()

-------------
|"A"  |"B"  |
-------------
|1    |1    |
|1    |2    |
|2    |1    |
|2    |2    |
|3    |1    |
|3    |2    |
-------------



In [46]:
# Update all rows in column "b" to 0 and all rows in column "a" to the summation of column "a" and column "b"
t.update({"b":0, "a": t.a + t.b})

UpdateResult(rows_updated=6, multi_joined_rows_updated=0)

In [47]:
t.sort(col("a"), col("b")).show()

-------------
|"A"  |"B"  |
-------------
|2    |0    |
|3    |0    |
|3    |0    |
|4    |0    |
|4    |0    |
|5    |0    |
-------------



In [48]:
source_df = session.create_dataframe([1, 2, 3, 4], schema=["a"])
source_df.show()

target_df = session.create_dataframe([(1,1),(1,2),(2,1),(2,2),(3,1),(3,2)], schema=["a","b"])
target_df.write.save_as_table("tgt_table", mode="overwrite", table_type="temporary")
t = session.table("tgt_table")
t.show()

-------
|"A"  |
-------
|1    |
|2    |
|3    |
|4    |
-------

-------------
|"A"  |"B"  |
-------------
|1    |1    |
|1    |2    |
|2    |1    |
|2    |2    |
|3    |1    |
|3    |2    |
-------------



In [49]:
# Update all rows in column "b" to 0 where column "a" in this table equal to column "a" in another dataframe
t.update({"b":0}, t["a"]==source_df.a, source_df)

UpdateResult(rows_updated=6, multi_joined_rows_updated=0)

In [50]:
t.sort(col("a"), col("b")).show()

-------------
|"A"  |"B"  |
-------------
|1    |0    |
|1    |0    |
|2    |0    |
|2    |0    |
|3    |0    |
|3    |0    |
-------------



In [51]:
# Close Snowpark session
session.close()