# PIVOT
MySQL does not support `PIVOT` function. 
However, you can use the CASE statement 
with the aggregate function to achieve the same result.

### Connect to DataBase (MySQL)

In [1]:
%load_ext sql

%sql mysql+pymysql://Choonsik:malrangcow@localhost/sqld

In [2]:
%%sql

CREATE TABLE departments (
    dept_id INT PRIMARY KEY,
    dept_name VARCHAR(20)
)
;

CREATE TABLE employees (
    emp_id INT PRIMARY KEY,
    dept_id INT NOT NULL,
    name VARCHAR(20),
    age INT,
    salary INT,
    FOREIGN KEY (dept_id) REFERENCES departments(dept_id) ON DELETE CASCADE
)
;

 * mysql+pymysql://Choonsik:***@localhost/sqld
0 rows affected.
0 rows affected.


[]

In [3]:
%%sql

INSERT INTO departments
VALUES
    (0, 'none'),
    (1, 'Sales'),
    (2, 'Marketing'),
    (3, 'HR'),
    (4, 'IT')
;

INSERT INTO employees
VALUES
    (1, 1, 'John', 30, 1000),
    (2, 1, 'Alice', 25, 1200),
    (3, 2, 'Bob', 35, 1500),
    (4, 2, 'Alex', 40, 1300),
    (5, 3, 'Jacob', 45, 2000),
    (6, 3, 'Alice', 30, 1800),
    (7, 4, 'John', 35, 1600),
    (8, 4, 'Bob', 40, 1900),
    (9, 0, 'Choonsik', 25, 1200),
    (10, 0, 'MalrangCow', 30, 1500)
;

 * mysql+pymysql://Choonsik:***@localhost/sqld
5 rows affected.
10 rows affected.


[]

In [4]:
%%sql df <<

SELECT *
FROM employees
;

 * mysql+pymysql://Choonsik:***@localhost/sqld
10 rows affected.
Returning data to local variable df


In [5]:
import pandas as pd

df = pd.DataFrame(df)
df

Unnamed: 0,emp_id,dept_id,name,age,salary
0,1,1,John,30,1000
1,2,1,Alice,25,1200
2,3,2,Bob,35,1500
3,4,2,Alex,40,1300
4,5,3,Jacob,45,2000
5,6,3,Alice,30,1800
6,7,4,John,35,1600
7,8,4,Bob,40,1900
8,9,0,Choonsik,25,1200
9,10,0,MalrangCow,30,1500


In [6]:
%%sql df <<

SELECT *
FROM departments
;

 * mysql+pymysql://Choonsik:***@localhost/sqld
5 rows affected.
Returning data to local variable df


In [7]:
df = pd.DataFrame(df)
df

Unnamed: 0,dept_id,dept_name
0,0,none
1,1,Sales
2,2,Marketing
3,3,HR
4,4,IT


MySQL does not support PIVOT function.

In [8]:
%%sql

SELECT *
FROM (
    SELECT
        dept_name,
        age
    FROM employees
    JOIN departments
    ON employees.dept_id = departments.dept_id
) AS emp
;
PIVOT (
    AVG(age)
    FOR dept_name IN ('Sales', 'Marketing', 'HR', 'IT')
) AS p
;

 * mysql+pymysql://Choonsik:***@localhost/sqld
10 rows affected.
(pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'PIVOT (\n    AVG(age)\n    FOR dept_name IN ('Sales', 'Marketing', 'HR', 'IT')\n) A' at line 1")
[SQL: PIVOT (
    AVG(age)
    FOR dept_name IN ('Sales', 'Marketing', 'HR', 'IT')
) AS p
;]
(Background on this error at: https://sqlalche.me/e/20/f405)


simply use the `GROUP BY` statement to aggregate the data.

In [9]:
%%sql df <<

SELECT
    dept_name,
    AVG(age) AS avg_age
FROM employees
JOIN departments
ON employees.dept_id = departments.dept_id
GROUP BY dept_name
;

 * mysql+pymysql://Choonsik:***@localhost/sqld
5 rows affected.
Returning data to local variable df


In [10]:
df = pd.DataFrame(df)
df

Unnamed: 0,dept_name,avg_age
0,none,27.5
1,Sales,27.5
2,Marketing,37.5
3,HR,37.5
4,IT,37.5


use the `CASE` statement with the aggregate function.

In [11]:
%%sql df <<

SELECT
    AVG(CASE WHEN dept_name = 'Sales' THEN age ELSE NULL END) AS 'Sales',
    AVG(CASE WHEN dept_name = 'Marketing' THEN age ELSE NULL END) AS 'Marketing',
    AVG(CASE WHEN dept_name = 'HR' THEN age ELSE NULL END) AS 'HR',
    AVG(CASE WHEN dept_name = 'IT' THEN age ELSE NULL END) AS 'IT'
FROM employees
JOIN departments ON employees.dept_id = departments.dept_id
;

 * mysql+pymysql://Choonsik:***@localhost/sqld
1 rows affected.
Returning data to local variable df


In [12]:
df = pd.DataFrame(df)
df

Unnamed: 0,Sales,Marketing,HR,IT
0,27.5,37.5,37.5,37.5


In [13]:
%%sql

DROP TABLE employees
;

DROP TABLE departments
;

 * mysql+pymysql://Choonsik:***@localhost/sqld
0 rows affected.
0 rows affected.


[]