In [1]:
%load_ext sql
import os
from sqlalchemy import create_engine

pgconfig = {
    'host': 'db',
    'port': os.environ['PG_PORT'],
    'database': os.environ['PG_DATABASE'],
    'user': os.environ['PG_USER'],
    'password': os.environ['PG_PASSWORD'],
}
dsl = 'postgresql://{user}:{password}@{host}:{port}/{database}'.format(**pgconfig)
conn = create_engine(dsl)

# MagicコマンドでSQLを書くための設定
%sql conn

[33mThere's a new jupysql version available (0.10.3), you're running 0.7.2. To upgrade: pip install jupysql --upgrade[0m


In [2]:
%%sql
drop table if exists tbl;
create table tbl (
    id integer primary key,
    group_id integer not null,
    value integer not null
);

insert into tbl
values (1,1,4),
    (2,1,6),
    (3,1,7),
    (4,1,0),
    (5,2,5),
    (6,2,10),
    (7,3,8);

*  postgresql://padawan:***@db:5432/dsdojo_db
Done.
Done.
7 rows affected.


[]

## やりたいこと
+ グループごとに最大となる列を見つける

In [3]:
%%sql
-- シンプルだが、idはわからない
select group_id, max(value)
from tbl
group by group_id

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


group_id,max
3,8
2,10
1,7


In [4]:
%%sql
-- 相関サブクエリを使用する
select *
from tbl as t1
where not exists (
    select *
    from tbl as t2
    where t1.group_id = t2.group_id and
    t2.value > t1.value
)

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


id,group_id,value
3,1,7
6,2,10
7,3,8


In [21]:
%%sql
-- flagで最も大きい数を識別する
select case
    when (
        select max(value)
        from tbl as t2
        where t1.group_id = t2.group_id
    ) = value then 'o' else 'x'
    end as flag,
    t1.*
from tbl as t1

*  postgresql://padawan:***@db:5432/dsdojo_db
7 rows affected.


flag,id,group_id,value
x,1,1,4
x,2,1,6
o,3,1,7
x,4,1,0
x,5,2,5
o,6,2,10
o,7,3,8


In [7]:
%%sql
-- where inをつかう
select t1.*
from tbl as t1
where (group_id, value) in (
    select group_id, max(value) as value
    from tbl
    group by group_id
)

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


id,group_id,value
3,1,7
6,2,10
7,3,8


In [13]:
%%sql
-- dense rankを使う
select *
from (
    select *,
        dense_rank() over (
        partition by group_id
        order by value desc
    ) as rnk
    from tbl
) as tmp
where rnk = 1

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


id,group_id,value,rnk
3,1,7,1
6,2,10,1
7,3,8,1


In [6]:
%%sql
-- joinをつかう
select t1.*
from tbl as t1
inner join (
    select group_id, max(value) as value
    from tbl
    group by group_id
) as t2
on t1.group_id = t2.group_id and t1.value = t2.value

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


id,group_id,value
3,1,7
6,2,10
7,3,8


In [15]:
%%sql
-- cross joinとhavingをつかう
select
    max(t1.id),
    t1.group_id,
    max(t1.value)
from tbl as t1, tbl as t2
where t1.group_id = t2.group_id and
t1.value <= t2.value
group by t1.group_id, t1.value
having count(*) = 1

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


max,group_id,max_1
3,1,7
6,2,10
7,3,8


In [14]:
%%sql
-- 相関サブクエリを使用する
select *
from tbl as t1
where not exists (
    select *
    from tbl as t2
    where t1.group_id = t2.group_id and
    t2.value > t1.value
)

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


id,group_id,value
3,1,7
6,2,10
7,3,8
