#Date functions

See [documentation](https://docs.databricks.com/en/sql/language-manual/sql-ref-datetime-pattern.html#pattern-table) for more details on date patterns

| Symbol|	Meaning	|Presentation|	Examples|
|--------|-------|-------------|------|
| G|	era	|text	|AD; Anno Domini |
| y|	year	|year	|2020; 20 |
| D|	day-of-year	|number(3)|	189 |
| M/L|	month-of-year	|month	|7; 07; Jul; July |
| E|	day-of-week	|text	|Tue; Tuesday |
| F|	aligned day of week in month	|number(1)	|3 |
| a|	am-pm-of-day|	am-pm	|PM |
| h|	clock-hour-of-am-pm (1-12)|	number(2)|	12 |
| H|	hour-of-day (0-23)|	number(2)	|0 |
| m|	minute-of-hour	|number(2)	|30 |
| s|	second-of-minute|	number(2)	|55 |
| z|	time-zone name	|zone-name	|Pacific Standard Time; PST |


In [0]:
SELECT
 unix_timestamp ('2016-04-08', 'yyyy-MM-dd')as `unix_timestamp_example`
,date_format ('2016-04-08', 'y') as `date_format_example`
,to_unix_timestamp ('2016-04-08', 'yyyy-MM-dd') as `to_unix_timestamp_example`
,from_unixtime (0, 'yyyy-MM-dd HH:mm:ss') as `from_unixtime_example`
,to_char(date'2016-04-08', 'y') as `to_char_example`
,to_date('2009-07-30 04:17:52')as `to_date_example`
,to_timestamp('2016-12-31', 'yyyy-MM-dd') as `to_timestamp_example`
,to_varchar(date'2016-04-08', 'y') as `to_varchar_example`
,from_utc_timestamp('2017-07-14 02:40:00.0', 'GMT+1') as `from_utc_timestamp_example`
,to_utc_timestamp( '2017-07-14 02:40:00.0', 'GMT+1') as `to_utc_timestamp_example`

In [0]:
SELECT date_from_unix_date(1)

#PK/FK definitions

For more information read the blog article ["Primary Key and Foreign Key constraints are GA and now enable faster queries"](https://www.databricks.com/blog/primary-key-and-foreign-key-constraints-are-ga-and-now-enable-faster-queries)

In [0]:

USE CATALOG corning52296;
CREATE SCHEMA IF NOT EXISTS pk_fk_constraints;
USE SCHEMA pk_fk_constraints;

-- Create the first table with a primary key
CREATE OR REPLACE TABLE employees(
  EmployeeID INTEGER NOT NULL,
  Name STRING,
  Age INTEGER,
  CONSTRAINT employees_pk PRIMARY KEY(EmployeeID)
);

-- Insert initial values into the first table
INSERT INTO employees(EmployeeID, Name, Age)
VALUES (1, 'John Doe', 30), (2, 'Jane Smith', 40) , (3, 'John Smith', 25);

-- Informational constraint on 'EmployeeID'. Does not prevent insertions of duplicate 'EmployeeID'.
INSERT INTO employees(EmployeeID, Name, Age)
VALUES (1, 'Jane Doe', 25);

SELECT * from employees;

-- Create the second table with a foreign key that references the first table
CREATE OR REPLACE TABLE orders(
  OrderID INTEGER NOT NULL,
  EmployeeID INTEGER,
  Product STRING,
  CONSTRAINT orders_pk PRIMARY KEY(OrderID),
  CONSTRAINT orders_employees_fk FOREIGN KEY(EmployeeID) REFERENCES employees
);

-- Insert initial values into the second table
INSERT INTO orders(OrderID, EmployeeID, Product)
VALUES (1001, 1, 'Product A'), (1002, 2, 'Product B'), (1003, 1, 'Product B'),(1004, 3, 'Product A');


SELECT * from orders;
-- Insert initial values into the first table
INSERT INTO employees(EmployeeID, Name, Age)
VALUES (1, 'Jane Doe', 35);

-- Insert initial values into the second table
INSERT INTO orders(OrderID, EmployeeID, Product)
VALUES (1001, 4, 'Product A'); -- 1001 is a duplicate use of primary key and 4 does not exist in the employees table

SELECT * from employees;

SELECT * from orders;


#Joins

## Setup

In [0]:
 CREATE TEMP VIEW employee(id, name, deptno) AS
     VALUES(105, 'Chloe', 5),
           (103, 'Paul' , 3),
           (101, 'John' , 1),
           (102, 'Lisa' , 2),
           (104, 'Evan' , 4),
           (106, 'Amy'  , 6);

CREATE TEMP VIEW department(deptno, deptname) AS
    VALUES(3, 'Engineering'),
          (2, 'Sales'      ),
          (1, 'Marketing'  );

## Standard Joins

In [0]:
-- Use employee and department tables to demonstrate inner join.
SELECT id, name, employee.deptno, deptname
   FROM employee
   INNER JOIN department ON employee.deptno = department.deptno;

In [0]:
-- Use employee and department tables to demonstrate left join.
SELECT id, name, employee.deptno, deptname
   FROM employee
   LEFT JOIN department ON employee.deptno = department.deptno;

In [0]:
-- Use employee and department tables to demonstrate right join.
SELECT id, name, employee.deptno, deptname
    FROM employee
    RIGHT JOIN department ON employee.deptno = department.deptno;

In [0]:
-- Use employee and department tables to demonstrate full join.
SELECT id, name, employee.deptno, deptname
    FROM employee
    FULL JOIN department ON employee.deptno = department.deptno;

In [0]:
-- Use employee and department tables to demonstrate cross join.
SELECT id, name, employee.deptno, deptname
    FROM employee
    CROSS JOIN department;

In [0]:
-- Use employee and department tables to demonstrate lateral inner join.
SELECT id, name, deptno, deptname
    FROM employee
    JOIN LATERAL (SELECT deptname
                    FROM department
                    WHERE employee.deptno = department.deptno);

In [0]:
-- Use employee and department tables to demonstrate lateral left join.
SELECT id, name, deptno, deptname
    FROM employee
    LEFT JOIN LATERAL (SELECT deptname
                         FROM department
                         WHERE employee.deptno = department.deptno);

##ANTI

In [0]:
-- Use employee and department tables to demonstrate anti join.
SELECT *
    FROM employee
    ANTI JOIN department ON employee.deptno = department.deptno;

##SEMI

In [0]:
-- Use employee and department tables to demonstrate semi join.
SELECT *
    FROM employee
    SEMI JOIN department ON employee.deptno = department.deptno;

## NATURAL

In [0]:
SELECT id, name, employee.deptno, deptname
    FROM employee
    NATURAL JOIN department;

##USING Clause

In [0]:
SELECT id, name, employee.deptno, deptname
    FROM employee
     JOIN department USING(deptno);

#EXCEPT 

##Set Based Queries

In [0]:
SELECT deptno FROM employee
EXCEPT
SELECT deptno FROM department;

##  in SELECT Queries

In [0]:
SELECT * EXCEPT (deptno) FROM employee

#Shaping

##LATERAL VIEW .. EXPLODE

In [0]:
CREATE TABLE person (id INT, name STRING, age INT, class INT, address STRING);

In [0]:
INSERT INTO person VALUES
    (100, 'John', 30, 1, 'Street 1'),
    (200, 'Mary', NULL, 1, 'Street 2'),
    (300, 'Mike', 80, 3, 'Street 3'),
    (400, 'Dan', 50, 4, 'Street 4');

In [0]:
SELECT * FROM person
    LATERAL VIEW EXPLODE(ARRAY(30, 60)) tableName AS c_age
    LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age;

##PIVOT

In [0]:
CREATE TEMP VIEW sales(year, quarter, region, sales) AS
   VALUES (2018, 1, 'east', 100),
          (2018, 2, 'east',  20),
          (2018, 3, 'east',  40),
          (2018, 4, 'east',  40),
          (2019, 1, 'east', 120),
          (2019, 2, 'east', 110),
          (2019, 3, 'east',  80),
          (2019, 4, 'east',  60),
          (2018, 1, 'west', 105),
          (2018, 2, 'west',  25),
          (2018, 3, 'west',  45),
          (2018, 4, 'west',  45),
          (2019, 1, 'west', 125),
          (2019, 2, 'west', 115),
          (2019, 3, 'west',  85),
          (2019, 4, 'west',  65);

In [0]:
SELECT year, q1_total, q1_avg, q2_total, q2_avg, q3_total, q3_avg, q4_total, q4_avg
    FROM (SELECT year, quarter, sales FROM sales) AS s
    PIVOT (sum(sales) AS total, avg(sales) AS avg
      FOR quarter
      IN (1 AS q1, 2 AS q2, 3 AS q3, 4 AS q4));

##UNPVIOT

In [0]:
CREATE OR REPLACE TEMPORARY VIEW sales(location, year, q1, q2, q3, q4) AS
  VALUES ('Toronto'      , 2020, 100 , 80 , 70, 150),
         ('San Francisco', 2020, NULL, 20 , 50,  60),
         ('Toronto'      , 2021, 110 , 90 , 80, 170),
         ('San Francisco', 2021, 70  , 120, 85, 105);

SELECT *
    FROM sales UNPIVOT INCLUDE NULLS
    (sales FOR quarter IN (q1       AS `Jan-Mar`,
                           q2       AS `Apr-Jun`,
                           q3       AS `Jul-Sep`,
                           sales.q4 AS `Oct-Dec`));

#Aggregates


## Approximate Functions

Approximate functions are designed to run quicker than their standard counterparts, especially on large datasets. They provide faster results by trading off some accuracy for performance. The size of data where approximate functions will show significant performance improvements can vary, but generally, they are beneficial when dealing with large datasets, typically in the range of millions to billions of rows.

For smaller datasets, the performance difference might not be as noticeable, and the standard functions might be sufficient. However, as the dataset grows larger, the performance benefits of approximate functions become more apparent.


In [0]:
SELECT approx_count_distinct(col1) FILTER(WHERE col2 = 10)
    FROM VALUES (1, 10), (1, 10), (2, 10), (2, 10), (3, 10), (1, 12) AS tab(col1, col2);

In [0]:
SELECT approx_percentile(DISTINCT col, 0.5, 100) FROM VALUES (0), (6), (6), (7), (9), (10) AS tab(col);


In [0]:
SELECT approx_top_k(expr, 2) FROM VALUES 'a', 'b', 'c', 'c', 'c', 'c', 'd', 'd' AS tab(expr);

#AI Functions

In [0]:
SELECT ai_analyze_sentiment('I am happy') as sent_1, ai_analyze_sentiment('I am sad') as sent_2;

In [0]:
SELECT ai_classify("My password is leaked.", ARRAY("urgent", "not urgent"));

In [0]:
SELECT ai_extract(
    'John Doe lives in New York and works for Acme Corp.',
    array('person', 'location', 'organization')
  );

In [0]:
SELECT ai_fix_grammar('This sentence have some mistake');

In [0]:
SELECT ai_gen('Generate a concise, cheerful email title for a summer bike sale with 20% discount');
  

In [0]:
SELECT ai_mask(
    'John Doe lives in New York. His email is john.doe@example.com.',
    array('person', 'email')
  );

In [0]:
SELECT *,
  ai_query(
    'databricks-meta-llama-3-1-70b-instruct',
    "Can you tell me the name of the US state that serves the provided ZIP code? zip code: " || pickup_zip
    )
  FROM samples.nyctaxi.trips
  LIMIT 10

In [0]:
SELECT ai_similarity('Apache Spark', 'Apache Arrow');

In [0]:
SELECT ai_translate('La vida es un hermoso viaje.', 'en')