In [2]:
%load_ext sql
import os
from sqlalchemy import create_engine

pgconfig = {
    'host': 'db',
    'port': os.environ['PG_PORT'],
    'database': os.environ['PG_DATABASE'],
    'user': os.environ['PG_USER'],
    'password': os.environ['PG_PASSWORD'],
}
dsl = 'postgresql://{user}:{password}@{host}:{port}/{database}'.format(**pgconfig)
conn = create_engine(dsl)

# MagicコマンドでSQLを書くための設定
%sql conn

In [3]:
%%sql
drop table if exists Employees;
CREATE TABLE Employees (
  id         INTEGER PRIMARY KEY,
  name       VARCHAR(10) NOT NULL,
  age        INTEGER NOT NULL,
  department VARCHAR(10) NOT NULL
);

INSERT INTO Employees VALUES(1,'Sato',23,'営業');
INSERT INTO Employees VALUES(2,'Suzuki',35,'営業');
INSERT INTO Employees VALUES(3,'Saito',38,'営業');
INSERT INTO Employees VALUES(4,'Yamada',42,'開発');
INSERT INTO Employees VALUES(5,'Tanaka',41,'開発');
INSERT INTO Employees VALUES(6,'Takahashi',35,'開発');

select *
from Employees;

*  postgresql://padawan:***@db:5432/dsdojo_db
Done.
Done.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
6 rows affected.


id,name,age,department
1,Sato,23,営業
2,Suzuki,35,営業
3,Saito,38,営業
4,Yamada,42,開発
5,Tanaka,41,開発
6,Takahashi,35,開発


## やりたいこと
+ 所属部署ごとに平均年齢よりも若い社員を表示する

In [11]:
%%sql
-- joinを使う
select e1.*
from employees as e1
join (
    select avg(age) as avg_age, department
    from employees
    group by department 
) as e2
on e1.department = e2.department
where avg_age > e1.age

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


id,name,age,department
1,Sato,23,営業
6,Takahashi,35,開発


In [4]:
%%sql
select *
from (
    -- スカラサブクエリをつかう
    select *, (
        select avg(age)
        from employees as e2
        where e1.department = e2.department
    )
    from employees as e1
) as tmp
where avg > age

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


id,name,age,department,avg
1,Sato,23,営業,32.0
6,Takahashi,35,開発,39.33333333333333


In [17]:
%%sql
-- 相関サブクエリを使う
select *
from employees as e1
where age < (
    select avg(age)
    from employees as e2
    where e1.department = e2.department
)

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


id,name,age,department
1,Sato,23,営業
6,Takahashi,35,開発


In [12]:
%%sql
-- not existsを使う(極値関数使うよりもいまいち)
select *
from employees as e1
where not exists (
    select *
    from (
        select
            avg(age) as avg,
            department
        from employees
        group by department
    ) e3
    where e1.department = e3.department and
    e1.age >= e3.avg
)

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


id,name,age,department
1,Sato,23,営業
6,Takahashi,35,開発


In [15]:
%%sql
-- もし各グループごとのmaxの年齢の場合だとnot existsでスッキリする
select *
from employees as e1
where not exists (
    select *
    from employees as e2
    where e1.department = e2.department and
    e1.age < e2.age
)

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


id,name,age,department
3,Saito,38,営業
4,Yamada,42,開発


In [19]:
%%sql
-- ウィンドウ関数を使う
select id,name,age,department
from (
    select *,
    avg(age) over (
        partition by department
    ) as avg
    from employees
) as tmp
where age < avg

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


id,name,age,department
1,Sato,23,営業
6,Takahashi,35,開発
