In [1]:
%load_ext sql
import os
from sqlalchemy import create_engine

pgconfig = {
    'host': 'db',
    'port': os.environ['PG_PORT'],
    'database': os.environ['PG_DATABASE'],
    'user': os.environ['PG_USER'],
    'password': os.environ['PG_PASSWORD'],
}
dsl = 'postgresql://{user}:{password}@{host}:{port}/{database}'.format(**pgconfig)
conn = create_engine(dsl)

# MagicコマンドでSQLを書くための設定
%sql conn

In [2]:
%%sql
drop table if exists Graduates;
CREATE TABLE Graduates
(name   VARCHAR(16) PRIMARY KEY,
 income INTEGER NOT NULL);

INSERT INTO Graduates VALUES('サンプソン', 400000);
INSERT INTO Graduates VALUES('マイク',     30000);
INSERT INTO Graduates VALUES('ホワイト',   20000);
INSERT INTO Graduates VALUES('アーノルド', 20000);
INSERT INTO Graduates VALUES('スミス',     20000);
INSERT INTO Graduates VALUES('ロレンス',   15000);
INSERT INTO Graduates VALUES('ハドソン',   15000);
INSERT INTO Graduates VALUES('ケント',     10000);
INSERT INTO Graduates VALUES('ベッカー',   10000);
INSERT INTO Graduates VALUES('スコット',   10000);

*  postgresql://padawan:***@db:5432/dsdojo_db
Done.
Done.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.


[]

## やりたいこと
+ 最頻値を求める

In [53]:
%%sql
-- modeの使用(1つだけ)
select
    mode() within group (order by income desc)
from Graduates

*  postgresql://padawan:***@db:5432/dsdojo_db
1 rows affected.


mode
20000


In [9]:
%%sql
-- dense_rankを使う
select income, cnt
from (
    select
        income,
        cnt,
        dense_rank() over (order by cnt desc) as rnk
    from (
        select income, count(*) as cnt
        from Graduates
        group by income
    ) as tmp
)  as tmp2
where rnk = 1

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


income,cnt
10000,3
20000,3


In [68]:
%%sql
-- allを使用
select income, count(*)
from Graduates
group by income
having count(*) >= all(
    select count(*)
    from Graduates
    group by income
)

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


income,count
10000,3
20000,3


In [69]:
%%sql
-- not existsの利用
-- 最大のcountを見つける -> countよりも大きいものが存在しない -> not exists
select income, count(*)
from Graduates as g1
group by income
having not exists (
    select count(*)
    from Graduates as g2
    group by income
    having count(g1.*) < count(g2.*)
)

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


income,count
10000,3
20000,3


In [70]:
%%sql
-- 極値関数の利用
select income,
    count(*) as cnt
from Graduates
group by income
having count(*) >= (
    select max(cnt)
    from (
        select count(*) as cnt
        from Graduates
        group by income
    ) tmp
)

*  postgresql://padawan:***@db:5432/dsdojo_db
2 rows affected.


income,cnt
10000,3
20000,3
