# Snowpark Basics
From YouTube: https://www.youtube.com/watch?v=udcFnIvXFnE&t=293s

## Packages

In [3]:
# Snowpark for Python
from snowflake.snowpark.session import Session
import snowflake.snowpark.functions as F
from snowflake.snowpark.version import VERSION

## Version info
snowflake_environment = session.sql('select current_user(), current_version()').collect()
snowpark_version = VERSION

# Misc
import json
import os
from datetime import date

## Create Snowflake connection

In [4]:
## Connection information
path = os.environ.get('CAS_CREDENTIALS')
connection_parameters = json.load(open(f'{path}\\connection.json'))

# Create Snowflake Session object
session = Session.builder.configs(connection_parameters).create()

# Current Environment Details
print('Snowflake version           : {}'.format(snowflake_environment[0][1]))
print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0],snowpark_version[1],snowpark_version[2]))

Snowflake version           : 7.27.1
Snowpark for Python version : 1.4.0


In [215]:
type(session)

snowflake.snowpark.session.Session

## Explore warehouse

In [216]:
session.sql('SHOW DATABASES').show()

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"created_on"                      |"name"                 |"is_default"  |"is_current"  |"origin"                                            |"owner"       |"comment"                                          |"options"  |"retention_time"  |"kind"             |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|2023-07-19 04:21:12.297000-07:00  |CITIBIKE               |N             |N             |                                                    |ACCOUNTADMIN  |                                                   |    

In [217]:
session.use_database('SNOWFLAKE_SAMPLE_DATA')

In [218]:
session.get_current_database()

'"SNOWFLAKE_SAMPLE_DATA"'

In [219]:
session.use_schema('TPCH_SF10')

In [220]:
session.get_current_schema()

'"TPCH_SF10"'

In [221]:
session.sql('SHOW TABLES').show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"created_on"                      |"name"    |"database_name"        |"schema_name"  |"kind"  |"comment"                          |"cluster_by"  |"rows"    |"bytes"     |"owner"  |"retention_time"  |"automatic_clustering"  |"change_tracking"  |"is_external"  |"owner_role_type"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|2021-11-09 20:42:41.809000-08:00  |CUSTOMER  |SNOWFLAKE_SAMPLE_DATA  |TPCH_SF10      |TABLE   |Customer data as defined by TPC-H  |              |1500000

In [222]:
session.sql('SELECT * FROM LINEITEM LIMIT 5').show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"L_ORDERKEY"  |"L_PARTKEY"  |"L_SUPPKEY"  |"L_LINENUMBER"  |"L_QUANTITY"  |"L_EXTENDEDPRICE"  |"L_DISCOUNT"  |"L_TAX"  |"L_RETURNFLAG"  |"L_LINESTATUS"  |"L_SHIPDATE"  |"L_COMMITDATE"  |"L_RECEIPTDATE"  |"L_SHIPINSTRUCT"   |"L_SHIPMODE"  |"L_COMMENT"                   |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|52618212      |1651955      |1988         |4               |20.00         |38137.40           |0.09          |0.01     |A               |F               |1992-04-17    |1992-04-25    

## Reference a Snowflake table

In [223]:
tbl = session.table("LINEITEM")

In [224]:
type(tbl)

snowflake.snowpark.table.Table

## Show vs Collect

Show will display 20 rows by default.

In [225]:
tbl.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"L_ORDERKEY"  |"L_PARTKEY"  |"L_SUPPKEY"  |"L_LINENUMBER"  |"L_QUANTITY"  |"L_EXTENDEDPRICE"  |"L_DISCOUNT"  |"L_TAX"  |"L_RETURNFLAG"  |"L_LINESTATUS"  |"L_SHIPDATE"  |"L_COMMITDATE"  |"L_RECEIPTDATE"  |"L_SHIPINSTRUCT"   |"L_SHIPMODE"  |"L_COMMENT"                                  |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|59184515      |1799069      |74121        |1               |30.00         |35039.40           |0.04          |0.05     |N               |O

Collect wiill create a list of rows.

In [226]:
tbl_sample = session.sql('SELECT * FROM LINEITEM LIMIT 25')

In [227]:
list_of_rows = tbl_sample.collect()
display(type(list_of_rows), list_of_rows[:5])

list

[Row(L_ORDERKEY=52007138, L_PARTKEY=1431204, L_SUPPKEY=56219, L_LINENUMBER=1, L_QUANTITY=Decimal('7.00'), L_EXTENDEDPRICE=Decimal('7945.91'), L_DISCOUNT=Decimal('0.00'), L_TAX=Decimal('0.06'), L_RETURNFLAG='A', L_LINESTATUS='F', L_SHIPDATE=datetime.date(1994, 12, 23), L_COMMITDATE=datetime.date(1994, 11, 28), L_RECEIPTDATE=datetime.date(1995, 1, 2), L_SHIPINSTRUCT='COLLECT COD', L_SHIPMODE='REG AIR', L_COMMENT='ular packa'),
 Row(L_ORDERKEY=52007138, L_PARTKEY=243770, L_SUPPKEY=68773, L_LINENUMBER=2, L_QUANTITY=Decimal('27.00'), L_EXTENDEDPRICE=Decimal('46271.52'), L_DISCOUNT=Decimal('0.01'), L_TAX=Decimal('0.03'), L_RETURNFLAG='R', L_LINESTATUS='F', L_SHIPDATE=datetime.date(1994, 11, 3), L_COMMITDATE=datetime.date(1994, 12, 13), L_RECEIPTDATE=datetime.date(1994, 11, 5), L_SHIPINSTRUCT='DELIVER IN PERSON', L_SHIPMODE='FOB', L_COMMENT=' the express, regu'),
 Row(L_ORDERKEY=52007138, L_PARTKEY=1026811, L_SUPPKEY=1842, L_LINENUMBER=3, L_QUANTITY=Decimal('22.00'), L_EXTENDEDPRICE=Decimal('

## Basic transformations

In [228]:
type(tbl)

snowflake.snowpark.table.Table

In [229]:
tbl.queries

{'queries': ['SELECT  *  FROM (LINEITEM)'], 'post_actions': []}

In [230]:
tbl.show(2)

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"L_ORDERKEY"  |"L_PARTKEY"  |"L_SUPPKEY"  |"L_LINENUMBER"  |"L_QUANTITY"  |"L_EXTENDEDPRICE"  |"L_DISCOUNT"  |"L_TAX"  |"L_RETURNFLAG"  |"L_LINESTATUS"  |"L_SHIPDATE"  |"L_COMMITDATE"  |"L_RECEIPTDATE"  |"L_SHIPINSTRUCT"  |"L_SHIPMODE"  |"L_COMMENT"                               |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|55014720      |1324613      |74640        |6               |24.00         |39301.20           |0.04          |0.08     |R               |F            

### Select columns

In [231]:
(tbl
 .select("L_SHIPDATE","L_EXTENDEDPRICE","L_SHIPMODE","L_QUANTITY","L_DISCOUNT")
 .show(5)
)

---------------------------------------------------------------------------------
|"L_SHIPDATE"  |"L_EXTENDEDPRICE"  |"L_SHIPMODE"  |"L_QUANTITY"  |"L_DISCOUNT"  |
---------------------------------------------------------------------------------
|1997-05-11    |22322.58           |REG AIR       |14.00         |0.09          |
|1998-05-06    |54706.75           |SHIP          |43.00         |0.06          |
|1998-06-22    |38772.96           |AIR           |24.00         |0.07          |
|1998-05-08    |3036.90            |FOB           |2.00          |0.03          |
|1998-07-21    |55745.04           |TRUCK         |34.00         |0.05          |
---------------------------------------------------------------------------------



### Filter

In [232]:
tbl_1997 = (
    tbl
    .select("L_SHIPDATE","L_EXTENDEDPRICE","L_SHIPMODE","L_QUANTITY","L_DISCOUNT")
    .filter(F.col("L_SHIPDATE").between(date(1997,1,1), date(1997,12,31)))
)

tbl_1997.show(10)

---------------------------------------------------------------------------------
|"L_SHIPDATE"  |"L_EXTENDEDPRICE"  |"L_SHIPMODE"  |"L_QUANTITY"  |"L_DISCOUNT"  |
---------------------------------------------------------------------------------
|1997-04-22    |77110.75           |SHIP          |41.00         |0.09          |
|1997-02-13    |9694.30            |TRUCK         |5.00          |0.00          |
|1997-05-07    |37740.00           |AIR           |34.00         |0.04          |
|1997-04-24    |40986.66           |TRUCK         |39.00         |0.02          |
|1997-06-13    |87544.80           |AIR           |48.00         |0.05          |
|1997-04-16    |21416.01           |AIR           |11.00         |0.06          |
|1997-05-06    |13022.40           |SHIP          |10.00         |0.01          |
|1997-12-15    |61907.54           |MAIL          |34.00         |0.00          |
|1997-01-01    |28826.75           |REG AIR       |25.00         |0.08          |
|1997-02-05    |

### Sort

In [233]:
(tbl_1997
 .sort('L_SHIPDATE')
 .show()
)

---------------------------------------------------------------------------------
|"L_SHIPDATE"  |"L_EXTENDEDPRICE"  |"L_SHIPMODE"  |"L_QUANTITY"  |"L_DISCOUNT"  |
---------------------------------------------------------------------------------
|1997-01-01    |56055.00           |AIR           |50.00         |0.06          |
|1997-01-01    |29260.26           |TRUCK         |18.00         |0.02          |
|1997-01-01    |51487.24           |FOB           |28.00         |0.02          |
|1997-01-01    |28826.75           |REG AIR       |25.00         |0.08          |
|1997-01-01    |31547.58           |RAIL          |17.00         |0.06          |
|1997-01-01    |23226.59           |AIR           |17.00         |0.10          |
|1997-01-01    |66826.40           |REG AIR       |40.00         |0.07          |
|1997-01-01    |66250.10           |AIR           |35.00         |0.04          |
|1997-01-01    |66943.60           |REG AIR       |40.00         |0.00          |
|1997-01-01    |

### Create a column

Create a string literal column

In [234]:
(tbl_1997
 .with_column("TEST", F.lit('test'))
 .show(5)
)

------------------------------------------------------------------------------------------
|"L_SHIPDATE"  |"L_EXTENDEDPRICE"  |"L_SHIPMODE"  |"L_QUANTITY"  |"L_DISCOUNT"  |"TEST"  |
------------------------------------------------------------------------------------------
|1997-07-03    |5565.55            |SHIP          |5.00          |0.01          |test    |
|1997-05-19    |37692.77           |RAIL          |19.00         |0.04          |test    |
|1997-08-10    |7186.76            |RAIL          |4.00          |0.09          |test    |
|1997-07-11    |38499.52           |FOB           |32.00         |0.00          |test    |
|1997-06-11    |91459.65           |REG AIR       |47.00         |0.07          |test    |
------------------------------------------------------------------------------------------



Create a numeric column

In [235]:
(tbl_1997
 .with_column("TEST", F.lit(100))
 .show(5)
)

------------------------------------------------------------------------------------------
|"L_SHIPDATE"  |"L_EXTENDEDPRICE"  |"L_SHIPMODE"  |"L_QUANTITY"  |"L_DISCOUNT"  |"TEST"  |
------------------------------------------------------------------------------------------
|1997-07-25    |41599.11           |RAIL          |21.00         |0.02          |100     |
|1997-08-13    |7252.15            |AIR           |5.00          |0.10          |100     |
|1997-09-27    |19741.41           |TRUCK         |13.00         |0.08          |100     |
|1997-09-13    |1946.96            |REG AIR       |1.00          |0.07          |100     |
|1997-08-14    |49351.95           |SHIP          |27.00         |0.01          |100     |
------------------------------------------------------------------------------------------



TOTAL_REVENUE = (L_EXTENDEDPRICE - (L_EXTENDEDPRICE * L_DISCOUNT)) * L_QUANTITY)

Create multiple columns

In [236]:
(tbl_1997
 .with_columns(['TEST1','TEST2'],
               [F.lit(1),F.lit('new')])
 .show()
)

-----------------------------------------------------------------------------------------------------
|"L_SHIPDATE"  |"L_EXTENDEDPRICE"  |"L_SHIPMODE"  |"L_QUANTITY"  |"L_DISCOUNT"  |"TEST1"  |"TEST2"  |
-----------------------------------------------------------------------------------------------------
|1997-01-19    |24890.16           |RAIL          |24.00         |0.08          |1        |new      |
|1997-01-08    |79361.55           |SHIP          |45.00         |0.00          |1        |new      |
|1997-06-15    |48765.82           |RAIL          |29.00         |0.01          |1        |new      |
|1997-07-19    |28639.39           |RAIL          |17.00         |0.06          |1        |new      |
|1997-10-20    |82977.50           |SHIP          |50.00         |0.03          |1        |new      |
|1997-12-14    |35014.97           |RAIL          |23.00         |0.06          |1        |new      |
|1997-11-21    |18798.96           |RAIL          |12.00         |0.02          |1

Create column using an expression

TOTAL_REVENUE = (L_EXTENDEDPRICE - (L_EXTENDEDPRICE * L_DISCOUNT)) * L_QUANTITY)

In [237]:
finaltbl_1997 = (
 tbl_1997
 .select(["L_SHIPDATE","L_EXTENDEDPRICE","L_SHIPMODE","L_QUANTITY","L_DISCOUNT"])
 .with_column("REVENUE",(F.col('L_EXTENDEDPRICE') - (F.col("L_EXTENDEDPRICE") * F.col("L_DISCOUNT")) * F.col("L_QUANTITY")))
)

finaltbl_1997.show()

--------------------------------------------------------------------------------------------------
|"L_SHIPDATE"  |"L_EXTENDEDPRICE"  |"L_SHIPMODE"  |"L_QUANTITY"  |"L_DISCOUNT"  |"REVENUE"       |
--------------------------------------------------------------------------------------------------
|1997-07-25    |41599.11           |RAIL          |21.00         |0.02          |24127.483800    |
|1997-08-13    |7252.15            |AIR           |5.00          |0.10          |3626.075000     |
|1997-09-27    |19741.41           |TRUCK         |13.00         |0.08          |-789.656400     |
|1997-09-13    |1946.96            |REG AIR       |1.00          |0.07          |1810.672800     |
|1997-08-14    |49351.95           |SHIP          |27.00         |0.01          |36026.923500    |
|1997-10-19    |53063.91           |MAIL          |29.00         |0.09          |-85432.895100   |
|1997-01-18    |11737.11           |FOB           |11.00         |0.01          |10446.027900    |
|1997-01-0

## Group by 

In [238]:
finaltbl_1997.queries

{'queries': ['SELECT "L_SHIPDATE", "L_EXTENDEDPRICE", "L_SHIPMODE", "L_QUANTITY", "L_DISCOUNT", ("L_EXTENDEDPRICE" - (("L_EXTENDEDPRICE" * "L_DISCOUNT") * "L_QUANTITY")) AS "REVENUE" FROM LINEITEM WHERE (("L_SHIPDATE" >= DATE \'1997-01-01\') AND ("L_SHIPDATE" <= DATE \'1997-12-31\'))'],
 'post_actions': []}

In [239]:
tbl_1997_shipmode = (
    finaltbl_1997
    .group_by('L_SHIPMODE')
    .agg(F.sum('REVENUE').alias('TOTAL_REVENUE_1997'))
)

tbl_1997_shipmode.show()

---------------------------------------
|"L_SHIPMODE"  |"TOTAL_REVENUE_1997"  |
---------------------------------------
|TRUCK         |-33997986770.683300   |
|SHIP          |-34025174530.819000   |
|MAIL          |-33969888960.370700   |
|RAIL          |-34104351362.659000   |
|AIR           |-34048584858.305300   |
|FOB           |-33792875137.023700   |
|REG AIR       |-33883815844.822900   |
---------------------------------------



## Joins

In [240]:
selectCols = ['L_SHIPDATE','L_EXTENDEDPRICE','L_SHIPMODE','L_QUANTITY','L_DISCOUNT']

finaltbl_1998 = (
    session
    .table('LINEITEM')
    .select(selectCols)
    .filter(F.col('L_SHIPDATE').between(date(1998,1,1), date(1998,12,31)))
    .with_column("REVENUE",(F.col('L_EXTENDEDPRICE') - (F.col("L_EXTENDEDPRICE") * F.col("L_DISCOUNT")) * F.col("L_QUANTITY")))

)

finaltbl_1998.show()

-------------------------------------------------------------------------------------------------
|"L_SHIPDATE"  |"L_EXTENDEDPRICE"  |"L_SHIPMODE"  |"L_QUANTITY"  |"L_DISCOUNT"  |"REVENUE"      |
-------------------------------------------------------------------------------------------------
|1998-05-06    |54706.75           |SHIP          |43.00         |0.06          |-86436.665000  |
|1998-06-22    |38772.96           |AIR           |24.00         |0.07          |-26365.612800  |
|1998-05-08    |3036.90            |FOB           |2.00          |0.03          |2854.686000    |
|1998-07-21    |55745.04           |TRUCK         |34.00         |0.05          |-39021.528000  |
|1998-05-17    |45517.23           |TRUCK         |33.00         |0.04          |-14565.513600  |
|1998-07-26    |31070.72           |AIR           |32.00         |0.08          |-48470.323200  |
|1998-05-13    |15576.47           |RAIL          |13.00         |0.10          |-4672.941000   |
|1998-07-01    |2939

In [241]:
tbl_1998_shipmode = (
    finaltbl_1998
    .group_by('L_SHIPMODE')
    .agg(F.sum('REVENUE').alias('TOTAL_REVENUE_1998'))
)

tbl_1998_shipmode.show()

---------------------------------------
|"L_SHIPMODE"  |"TOTAL_REVENUE_1998"  |
---------------------------------------
|AIR           |-25725641625.386000   |
|TRUCK         |-25589978955.784000   |
|MAIL          |-25514188715.499700   |
|SHIP          |-25565441916.584500   |
|RAIL          |-25534756190.430000   |
|REG AIR       |-25490428550.808400   |
|FOB           |-25502466911.912000   |
---------------------------------------



In [242]:
(tbl_1997_shipmode
 .join(tbl_1998_shipmode, 
       on = tbl_1997_shipmode['L_SHIPMODE'] == tbl_1998_shipmode['L_SHIPMODE'],
       how = 'INNER', )
 .show())

-------------------------------------------------------------------------------------------
|"l_75vg_L_SHIPMODE"  |"TOTAL_REVENUE_1997"  |"r_l8zv_L_SHIPMODE"  |"TOTAL_REVENUE_1998"  |
-------------------------------------------------------------------------------------------
|RAIL                 |-34104351362.659000   |RAIL                 |-25534756190.430000   |
|SHIP                 |-34025174530.819000   |SHIP                 |-25565441916.584500   |
|MAIL                 |-33969888960.370700   |MAIL                 |-25514188715.499700   |
|TRUCK                |-33997986770.683300   |TRUCK                |-25589978955.784000   |
|REG AIR              |-33883815844.822900   |REG AIR              |-25490428550.808400   |
|FOB                  |-33792875137.023700   |FOB                  |-25502466911.912000   |
|AIR                  |-34048584858.305300   |AIR                  |-25725641625.386000   |
--------------------------------------------------------------------------------

In [243]:
(tbl_1997_shipmode
 .join(tbl_1998_shipmode, 
       on = tbl_1997_shipmode['L_SHIPMODE'] == tbl_1998_shipmode['L_SHIPMODE'],
       how = 'INNER', )
 .select(
     tbl_1997_shipmode.col('L_SHIPMODE').alias('L_SHIPMODE'),
     tbl_1997_shipmode.col('TOTAL_REVENUE_1997'),
     tbl_1998_shipmode.col('TOTAL_REVENUE_1998')
 )
 .show()
)

--------------------------------------------------------------
|"L_SHIPMODE"  |"TOTAL_REVENUE_1997"  |"TOTAL_REVENUE_1998"  |
--------------------------------------------------------------
|FOB           |-33792875137.023700   |-25502466911.912000   |
|TRUCK         |-33997986770.683300   |-25589978955.784000   |
|AIR           |-34048584858.305300   |-25725641625.386000   |
|MAIL          |-33969888960.370700   |-25514188715.499700   |
|SHIP          |-34025174530.819000   |-25565441916.584500   |
|RAIL          |-34104351362.659000   |-25534756190.430000   |
|REG AIR       |-33883815844.822900   |-25490428550.808400   |
--------------------------------------------------------------



In [244]:
join_shipmodes_revenue = (
    tbl_1997_shipmode
    .join(tbl_1998_shipmode, 
          on = tbl_1997_shipmode['L_SHIPMODE'] == tbl_1998_shipmode['L_SHIPMODE'],
          how = 'INNER')
    .select(
         tbl_1997_shipmode.col('L_SHIPMODE').alias('L_SHIPMODE'),
         tbl_1997_shipmode.col('TOTAL_REVENUE_1997'),
         tbl_1998_shipmode.col('TOTAL_REVENUE_1998')
     )
)

join_shipmodes_revenue.show()

--------------------------------------------------------------
|"L_SHIPMODE"  |"TOTAL_REVENUE_1997"  |"TOTAL_REVENUE_1998"  |
--------------------------------------------------------------
|SHIP          |-34025174530.819000   |-25565441916.584500   |
|RAIL          |-34104351362.659000   |-25534756190.430000   |
|MAIL          |-33969888960.370700   |-25514188715.499700   |
|AIR           |-34048584858.305300   |-25725641625.386000   |
|TRUCK         |-33997986770.683300   |-25589978955.784000   |
|FOB           |-33792875137.023700   |-25502466911.912000   |
|REG AIR       |-33883815844.822900   |-25490428550.808400   |
--------------------------------------------------------------



## Create an array column

In [245]:
(join_shipmodes_revenue
 .with_column('TOTALS', F.array_construct(F.col('TOTAL_REVENUE_1997'), F.col('TOTAL_REVENUE_1998')))
 .show()
)

-------------------------------------------------------------------------------------
|"L_SHIPMODE"  |"TOTAL_REVENUE_1997"  |"TOTAL_REVENUE_1998"  |"TOTALS"              |
-------------------------------------------------------------------------------------
|SHIP          |-34025174530.819000   |-25565441916.584500   |[                     |
|              |                      |                      |  -34025174530.819,   |
|              |                      |                      |  -25565441916.5845   |
|              |                      |                      |]                     |
|MAIL          |-33969888960.370700   |-25514188715.499700   |[                     |
|              |                      |                      |  -33969888960.3707,  |
|              |                      |                      |  -25514188715.4997   |
|              |                      |                      |]                     |
|RAIL          |-34104351362.659000   |-25534756190.43

## Concat column literal

In [246]:
(join_shipmodes_revenue
 .with_column(
     'TOTALS',
     F.array_construct(F.col('TOTAL_REVENUE_1997'), F.col('TOTAL_REVENUE_1998'))
 )
 .with_column(
     'COMMENT',
     F.concat(F.lit('Logic was created by '), F.current_user())
 )
 .show()
)

-------------------------------------------------------------------------------------------------------------------
|"L_SHIPMODE"  |"TOTAL_REVENUE_1997"  |"TOTAL_REVENUE_1998"  |"TOTALS"              |"COMMENT"                    |
-------------------------------------------------------------------------------------------------------------------
|TRUCK         |-33997986770.683300   |-25589978955.784000   |[                     |Logic was created by PSTYLS  |
|              |                      |                      |  -33997986770.6833,  |                             |
|              |                      |                      |  -25589978955.784    |                             |
|              |                      |                      |]                     |                             |
|SHIP          |-34025174530.819000   |-25565441916.584500   |[                     |Logic was created by PSTYLS  |
|              |                      |                      |  -3402517

## Create conditional logic (IF/THEN) using WHEN/OTHERWISE

In [249]:
(join_shipmodes_revenue
 .with_column(
     'TOTALS',
     F.array_construct(F.col('TOTAL_REVENUE_1997'), F.col('TOTAL_REVENUE_1998'))
 )
 .with_column(
     'COMMENT',
     F.concat(F.lit('Logic was created by '), F.current_user())
 )
 .with_column(
     'TRUST_LEVEL',
     F.when(F.col('L_SHIPMODE') == 'REG AIR', 0.4)
      .when(F.col('L_SHIPMODE') == 'AIR', .9)
      .otherwise(999)
 )
 .show()
)

-----------------------------------------------------------------------------------------------------------------------------------
|"L_SHIPMODE"  |"TOTAL_REVENUE_1997"  |"TOTAL_REVENUE_1998"  |"TOTALS"              |"COMMENT"                    |"TRUST_LEVEL"  |
-----------------------------------------------------------------------------------------------------------------------------------
|AIR           |-34048584858.305300   |-25725641625.386000   |[                     |Logic was created by PSTYLS  |0.9            |
|              |                      |                      |  -34048584858.3053,  |                             |               |
|              |                      |                      |  -25725641625.386    |                             |               |
|              |                      |                      |]                     |                             |               |
|MAIL          |-33969888960.370700   |-25514188715.499700   |[             

## Drop columns

In [250]:
(join_shipmodes_revenue
 .with_column(
     'TOTALS',
     F.array_construct(F.col('TOTAL_REVENUE_1997'), F.col('TOTAL_REVENUE_1998'))
 )
 .with_column(
     'COMMENT',
     F.concat(F.lit('Logic was created by '), F.current_user())
 )
 .with_column(
     'TRUST_LEVEL',
     F.when(F.col('L_SHIPMODE') == 'REG AIR', 0.4)
      .when(F.col('L_SHIPMODE') == 'AIR', .9)
      .otherwise(999)
 )
 .drop(['TOTAL_REVENUE_1997', 'TOTAL_REVENUE_1998'])
 .show()
)

-------------------------------------------------------------------------------------
|"L_SHIPMODE"  |"TOTALS"              |"COMMENT"                    |"TRUST_LEVEL"  |
-------------------------------------------------------------------------------------
|TRUCK         |[                     |Logic was created by PSTYLS  |999.0          |
|              |  -33997986770.6833,  |                             |               |
|              |  -25589978955.784    |                             |               |
|              |]                     |                             |               |
|FOB           |[                     |Logic was created by PSTYLS  |999.0          |
|              |  -33792875137.0237,  |                             |               |
|              |  -25502466911.912    |                             |               |
|              |]                     |                             |               |
|AIR           |[                     |Logic was creat

## Close Session

In [251]:
session.close()