In [1]:
%load_ext sql
import os
from sqlalchemy import create_engine

pgconfig = {
    'host': 'db',
    'port': os.environ['PG_PORT'],
    'database': os.environ['PG_DATABASE'],
    'user': os.environ['PG_USER'],
    'password': os.environ['PG_PASSWORD'],
}
dsl = 'postgresql://{user}:{password}@{host}:{port}/{database}'.format(**pgconfig)
conn = create_engine(dsl)

# MagicコマンドでSQLを書くための設定
%sql conn

In [7]:
%%sql
drop table if exists tbl;
create table tbl (
    id integer primary key,
    group_id integer not null,
    value integer not null
);

insert into tbl
values (1,1,4),
    (2,1,6),
    (3,1,7),
    (4,1,0),
    (5,2,5),
    (6,2,10),
    (7,2,10);

*  postgresql://padawan:***@db:5432/dsdojo_db
Done.
Done.
7 rows affected.


[]

## やりたいこと
+ グループごとにvalueが最大値となるレコードを抽出したい

In [8]:
%%sql
select *
from tbl
where (group_id, value) in (
    select group_id, max(value)
    from tbl
    group by group_id
)

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


id,group_id,value
3,1,7
6,2,10
7,2,10


In [11]:
%%sql
select *
from (
    select *,
        -- first_valueでもOK
        max(value) over (
            partition by group_id
            order by value desc
        ) as max
    from tbl
) as tmp
where value = max

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


id,group_id,value,max
3,1,7,7
6,2,10,10
7,2,10,10


In [16]:
%%sql
select *
from (
    select *,
        (
            select max(t2.value)
            from tbl as t2
            where t1.group_id = t2.group_id
        ) as max
    from tbl as t1
) as tmp
where value = max

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


id,group_id,value,max
3,1,7,7
6,2,10,10
7,2,10,10


In [32]:
%%sql
-- 相関サブクエリ
select *
from tbl as t1
where not exists (
    select *
    from tbl as t2
    where t1.group_id = t2.group_id and
    t1.value < t2.value
)

*  postgresql://padawan:***@db:5432/dsdojo_db
3 rows affected.


id,group_id,value
3,1,7
6,2,10
7,2,10


In [41]:
%%sql
-- maxとなる要素が1つのみの場合はOKだが、2つ以上の場合はダメ
select
    t1.id,
    sum(t1.group_id) as group_id,
    sum(t1.value) as value
from tbl as t1
join tbl as t2
on t1.group_id = t2.group_id and
t1.value <= t2.value
group by t1.id
having count(t1.id) = 1

*  postgresql://padawan:***@db:5432/dsdojo_db
1 rows affected.


id,group_id,value
3,1,7


In [30]:
%%sql
-- It eliminates duplicate rows an leaves only the first row as defined my the ORDER BY clause.
SELECT DISTINCT ON (group_id)
    id, group_id, value
FROM tbl
ORDER BY group_id, value desc

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


id,group_id,value
3,1,7
6,2,10
