In [2]:
import snowflake.snowpark as snowpark
from snowflake.snowpark import Session
from dotenv import load_dotenv
import os

load_dotenv()


connection_parameters = {
   "account": os.getenv('account_snow'),
   "user": os.getenv('user_snow'),
   "password":os.getenv('password_snow'),
   "role": "ACCOUNTADMIN",
   "database": "NEW_DB", 
   "schema": "PUBLIC"
  
}  

session = Session.builder.configs(connection_parameters).create()

In [3]:
employee_data = [
    [1,'TONY',24000,101],
    [2,'STEVE',17000,101],
    [3,'BRUCE',9000,101],
    [4,'WANDA',20000,102],
    [5,'VICTOR',12000,102],
    [6,'STEPHEN',10000,103],
    [7,'HANK',15000,103],
    [8,'THOR',21000,103]
]

employee_schema = ["EMP_ID", "EMP_NAME", "SALARY", "DEPT_ID"]

df_employee =session.createDataFrame(employee_data, schema=employee_schema)
df_employee.show()

------------------------------------------------
|"EMP_ID"  |"EMP_NAME"  |"SALARY"  |"DEPT_ID"  |
------------------------------------------------
|1         |TONY        |24000     |101        |
|2         |STEVE       |17000     |101        |
|3         |BRUCE       |9000      |101        |
|4         |WANDA       |20000     |102        |
|5         |VICTOR      |12000     |102        |
|6         |STEPHEN     |10000     |103        |
|7         |HANK        |15000     |103        |
|8         |THOR        |21000     |103        |
------------------------------------------------



In [4]:
from snowflake.snowpark.functions import count
df_employee.group_by("DEPT_ID").agg(count("EMP_ID")).show()

-------------------------------
|"DEPT_ID"  |"COUNT(EMP_ID)"  |
-------------------------------
|101        |3                |
|102        |2                |
|103        |3                |
-------------------------------



In [5]:
from snowflake.snowpark.functions import max, min
df_employee.group_by("DEPT_ID").agg(max("SALARY"), min("SALARY")).show()

---------------------------------------------
|"DEPT_ID"  |"MAX(SALARY)"  |"MIN(SALARY)"  |
---------------------------------------------
|101        |24000          |9000           |
|102        |20000          |12000          |
|103        |21000          |10000          |
---------------------------------------------



In [6]:
df_employee.group_by("DEPT_ID").agg(max("SALARY").alias("MAX_SALARY"), min("SALARY").alias("MIN_SALARY")).show()

-------------------------------------------
|"DEPT_ID"  |"MAX_SALARY"  |"MIN_SALARY"  |
-------------------------------------------
|101        |24000         |9000          |
|102        |20000         |12000         |
|103        |21000         |10000         |
-------------------------------------------



In [7]:
df1 = df_employee.group_by("DEPT_ID").agg(count("EMP_ID").alias("EMP_COUNT"))
df1.show()

---------------------------
|"DEPT_ID"  |"EMP_COUNT"  |
---------------------------
|101        |3            |
|102        |2            |
|103        |3            |
---------------------------



In [8]:
from snowflake.snowpark.functions import col

In [9]:
df2 = df1.filter(col("EMP_COUNT") > 2)
df2.show()

---------------------------
|"DEPT_ID"  |"EMP_COUNT"  |
---------------------------
|101        |3            |
|103        |3            |
---------------------------



In [10]:
df3 = df2.select("DEPT_ID")
df3.show()

-------------
|"DEPT_ID"  |
-------------
|101        |
|103        |
-------------



In [11]:
df_employee.group_by("DEPT_ID").agg(count("EMP_ID").alias("EMP_COUNT")).\
    filter(col("EMP_COUNT")>2).select("DEPT_ID").show()

-------------
|"DEPT_ID"  |
-------------
|101        |
|103        |
-------------

