# Spark SQL statements

In [None]:
%%configure
{
    "driverMemory": "1000M",
    "executorCores": 1,
    "conf": {
        "spark.sql.extensions": "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions",
        "spark.sql.catalog.iceberg": "org.apache.iceberg.spark.SparkCatalog",
        "spark.sql.catalog.iceberg.catalog-impl": "org.apache.iceberg.jdbc.JdbcCatalog",
        "spark.sql.catalog.iceberg.warehouse": "s3://datalake/warehouse/",
        "spark.sql.catalog.iceberg.io-impl": "org.apache.iceberg.aws.s3.S3FileIO",
        "spark.sql.catalog.iceberg.s3.endpoint": "http://127.0.0.1:9000",
        "spark.sql.catalog.iceberg.uri": "jdbc:mysql://localhost/iceberg",
        "spark.sql.catalog.iceberg.jdbc.user": "root",
        "spark.sql.catalog.iceberg.jdbc.password": "",
        "spark.sql.catalog.iceberg.jdbc.useSSL": "false",
        "spark.sql.defaultCatalog": "iceberg"
    }
}

In order to apply changes in configuration, we must start to create a `SparkSession` afterward.

In [None]:
spark

Check if `mydb.employees` table still exists in external catalog system.

In [None]:
%%sql

show tables in mydb like 'employees'

In [None]:
%%sql

describe mydb.employees

## Select all

In [None]:
%%sql

select * from mydb.employees

## Projection

In [None]:
%%sql

select
    employee_name,
    age,
    salary
    
from mydb.employees

We can do cast type, expression and aliasing column.

In [None]:
%%sql

select
    employee_name,
    age,
    salary,
    cast(salary as decimal) * 0.27 as tax
    
from mydb.employees

## Filter

In [None]:
%%sql

select
    *
    
from mydb.employees
where state = 'NY'

## Limit

In [None]:
%%sql

select
    *
    
from mydb.employees
where state = 'NY'
limit 3

## Order by

In [None]:
%%sql

select
    *
    
from mydb.employees
where state = 'NY'
order by salary desc
limit 3

## Group by

In [None]:
%%sql

select
    department,
    sum(salary)
    
from mydb.employees
group by department

In [None]:
%%sql

select
    sum(salary) as sum_salary,
    avg(salary) as avg_salary,
    sum(bonus) as sum_bonus,
    max(bonus) as max_bonus
    
from mydb.employees
group by department

## Group by and Having

In [None]:
%%sql

select
    sum(salary) as sum_salary,
    avg(salary) as avg_salary,
    sum(bonus) as sum_bonus,
    max(bonus) as max_bonus
    
from mydb.employees
group by department
having sum_bonus > 50000

## Group by and Order by

In [None]:
%%sql

select
    department,
    sum(salary) as sum_salary
    
from mydb.employees
group by department
order by sum_salary desc

## Temporary view

In [None]:
%%sql

create or replace temporary view high_salary_employees

as

select
    *
    
from mydb.employees
where salary > 80000

In [None]:
%%sql

select * from high_salary_employees

## Join

In [None]:
%%sql

create or replace temporary view employee_relation

as 

select * from values
    ('James', 'Michael'),
    ('Robert', 'Michael'),
    ('Maria', 'Jen'),
    ('Raman', 'Jen'),
    ('Scott', 'Jen'),
    ('Jeff', 'Kumar') as data(employee_name, report_to)

In [None]:
%%sql

select * from employee_relation

In [None]:
%%sql

select
    a.*,
    b.report_to
    
from mydb.employees as a
inner join employee_relation as b
  on a.employee_name == b.employee_name

In [None]:
%%sql

select
    a.*,
    coalesce(b.report_to, b.report_to, 'Director')
    
from mydb.employees as a
left join employee_relation as b
  on a.employee_name == b.employee_name

## More about aggregate functions

### collect_list

In [None]:
%%sql

select
    collect_list(salary) as salaries
    
from mydb.employees

In [None]:
%%sql

select
    state,
    collect_list(salary) as salaries
    
from mydb.employees
group by state

### collect_set

In [None]:
%%sql

select
    state,
    collect_list(department) as departments

from mydb.employees
group by state

In [None]:
%%sql

select
    state,
    collect_set(department) as departments
    
from mydb.employees
group by state

More about SparkSQL function [https://spark.apache.org/docs/latest/api/sql/index.html](https://spark.apache.org/docs/latest/api/sql/index.html)