### Important Note!

This notebook ran using with a 13.3 LTS Runtime version!

There are no concerns regarding cluster size (memory and cores).

Important: don't use Single User clusters (choose Shared options) to deal with Lakehouse Federation data.

Insert `spark.databricks.delta.retentionDurationCheck.enabled false` during cluster configs creation to be able to use VACUUM properly in this notebook.

The purpose of this notebook is just to show the version of the SQL commands for Python, **always use the SQL version as a reference**, as it was the one used during the Databricks SQL course.

In [0]:
%sql
CREATE FOREIGN CATALOG databricks_postgres USING CONNECTION postgres_conn 
OPTIONS (database 'lakehouse_federation')

In [0]:
%sql
SELECT * FROM databricks_postgres.public.remuneracao_day2

In [0]:
%sql
CREATE CATALOG remuneracao;
USE CATALOG remuneracao;
CREATE SCHEMA explorer;
USE explorer;

In [0]:
%sql
CREATE TABLE remuneracao.explorer.remuneracao_day2
COMMENT 'Remuneracao Monthly'
AS SELECT * FROM databricks_postgres.public.remuneracao_day2;

In [0]:
%sql
SELECT cargo, COUNT(cpf) 
FROM remuneracao.explorer.remuneracao_day2
WHER cargo IS NOT null
GROUP BY cargo
ORDER BY 2 DESC
LIMIT 15

### Deleting data

In [0]:
%sql
DELETE FROM remuneracao.explorer.remuneracao_day2
WHERE CARGO = 'CABO';

### Updating data

In [0]:
%sql
UPDATE remuneracao.explorer.remuneracao_day2
set liquido = '41.650,92' 
WHERE CARGO = 'TERCEIRO SARGENTO';

In [0]:
%sql
SELECT CPF, CARGO, LIQUIDO
FROM remuneracao.explorer.remuneracao_day2
WHERE CARGO = 'TERCEIRO SARGENTO'
LIMITE 5;

### Inserting Data

In [0]:
%sql
INSERT INTO remuneracao_day2 VALUES
('JONAS FULANO', '***123456**', 'NARNIA', 'SABATICO', 'APRENDIZ', 'ATIVO', 2, 2022, 62, '104151', ',00', 'NENHUM', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', '500000,00', '378000,00'),
('NEYMAR', '***456789**', 'BRASIL', 'FUTEBOL', 'ATLETA', 'ATIVO', 2, 2022, 62, '1051621', ',00', 'NENHUM', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', '500000,00', '378000,00')

In [0]:
%sql
SELECT * FROM remuneracao_day2
WHERE CARGO IN ('SABATICO', 'FUTEBOL')

## Merging data with updates!

In [0]:
%sql
CREATE TABLE remuneracao.explorer.remuneracao_deep_clone
CLONE remuneracao.explorer.remuneracao_day2

In [0]:
%sql
TRUNCATE TABLE remuneracao.explorer.remuneracao_deep_clone;

In [0]:
%sql
SELECT COUNT(*) FROM remuneracao.explorer.remuneracao_deep_clone;

In [0]:
%sql
DESCRIBE HISTORY remuneracao.explorer.remuneracao_deep_clone;

In [0]:
%sql
ALTER TABLE remuneracao.explorer.remuneracao_deep_clone
RENAME TO remuneracao.explorer.remuneracao_updates;

In [0]:
%sql
"DESCRIBE HISTORY remuneracao.explorer.remuneracao_updates;

In [0]:
%sql
DESCRIBE DETAIL remuneracao.explorer.remuneracao_updates

In [0]:
%sql
INSERT INTO remuneracao.explorer.remuneracao_updates VALUES
('JONAS FULANO', '***123456**', 'NARNIA', 'SABATICO', 'APRENDIZ', 'ATIVO', 2, 2022, 62, '104151', ',00', 'NENHUM', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00'),
('NEYMAR', '***456789**', 'COMUNICACOES', 'MARKETING', 'INFLUENCER', 'ATIVO', 2, 2022, 215, '1426', ',00', 'ESPORTE', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', '690000,00', '200000,00'),
('SILVIO SANTOS', '***519612**', 'COMUNICACOES', 'TELEVISAO', 'APRESENTADOR', 'INATIVO', 2, 2022, 627, '62473', ',00', 'NENHUM', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', '100000,00', '99999,00'),
('AYRTON SENNA', '***696262**', 'ESPORTES', 'CORRIDA', 'ATLETA', 'INATIVO', 2, 2022, 682, '52677', ',00', 'NENHUM', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', ',00', '259416,00', '150161,00');

In [0]:
%sql
MERGE INTO remuneracao.explorer.remuneracao_day2 AS re
USING remuneracao.explorer.remuneracao_updates AS up
ON re.nome = up.nome AND re.cpf = up.cpf AND re.mes = up.mes
--WHEN MATCHED THEN" + \
--  DELETE" + \"
WHEN MATCHED AND up.funcao != "INFLUENCER" THEN
UPDATE SET *
WHEN NOT MATCHED THEN
INSERT *

In [0]:
%sql
DESCRIBE HISTORY remuneracao.explorer.remuneracao_day;

In [0]:
%sql
SELECT * remuneracao.explorer.remuneracao_day
WHERE nome IN ('JONAS FULANO', 'NEYMAR', 'SILVIO SANTOS', 'AYRTON SENNA');

In [0]:
%sql
SELECT * FROM remuneracao.explorer.remuneracao_day2 VERSION AS OF 4
EXCEPT ALL
SELECT * FROM remuneracao.explorer.remuneracao_day2 VERSION AS OF 2

In [0]:
%sql
RESTORE TABLE remuneracao.explorer.remuneracao_day2 VERSION AS OF 0;

In [0]:
%sql
SELECT * 
FROM remuneracao.explorer.remuneracao_day2
WHERE nome IN ('JONAS FULANO', 'NEYMAR', 'SILVIO SANTOS', 'AYRTON SENNA')

# REMEMBER:
#### To enable a duration out of the minimum allowed (168h / 1week) you must set:
`spark.databricks.delta.retentionDurationCheck.enabled false`
as parameter settings on your cluster or modifying yout Delta table properties, such as:
`tblProps = {`
`"delta.deletedFileRetentionDuration": "500 days"`
`"delta.logRetentionDuration": "90 days",`
`"databricks.delta.retentionDurationCheck.enabled": "false"`
`}`

In [0]:
%sql
VACUUM remuneracao.explorer.remuneracao_day2 RETAIN 0 HOURS DRY RUN;