In [44]:
%load_ext sql
import os
from sqlalchemy import create_engine

pgconfig = {
    'host': 'db',
    'port': os.environ['PG_PORT'],
    'database': os.environ['PG_DATABASE'],
    'user': os.environ['PG_USER'],
    'password': os.environ['PG_PASSWORD'],
}
dsl = 'postgresql://{user}:{password}@{host}:{port}/{database}'.format(**pgconfig)
conn = create_engine(dsl)

# MagicコマンドでSQLを書くための設定
%sql conn

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [45]:
%%sql
drop table if exists OpenCourses;
CREATE TABLE OpenCourses
(month       INTEGER ,
 course_id   INTEGER ,
    PRIMARY KEY(month, course_id));

INSERT INTO OpenCourses VALUES(201806, 1);
INSERT INTO OpenCourses VALUES(201806, 3);
INSERT INTO OpenCourses VALUES(201806, 4);
INSERT INTO OpenCourses VALUES(201807, 4);
INSERT INTO OpenCourses VALUES(201808, 2);
INSERT INTO OpenCourses VALUES(201808, 4);

select *
from OpenCourses;

*  postgresql://padawan:***@db:5432/dsdojo_db
Done.
Done.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
6 rows affected.


month,course_id
201806,1
201806,3
201806,4
201807,4
201808,2
201808,4


## やりたいこと
+ 下のようなクロス表を作成する

In [3]:
%%sql
select course_id,
    min(case when month=201806 then 'o' else 'x' end) as "June",
    min(case when month=201807 then 'o' else 'x' end) as "July",
    min(case when month=201808 then 'o' else 'x' end) as "August"
from OpenCourses
group by course_id
order by course_id

*  postgresql://padawan:***@db:5432/dsdojo_db
4 rows affected.


course_id,June,July,August
1,o,x,x
2,x,x,o
3,o,x,x
4,o,o,o


In [50]:
%%sql
-- case..endとsumを合わせる
select course_id,
    case when sum(case when month = '201806' then 1 else null end) = 1
        then 'o' else 'x'
    end as "June",
    case when sum(case when month = '201807' then 1 else null end) = 1
        then 'o' else 'x'
    end as "July",
    case when sum(case when month = '201808' then 1 else null end) = 1
        then 'o' else 'x'
    end as "August"
from OpenCourses as oc
group by oc.course_id
order by oc.course_id

*  postgresql://padawan:***@db:5432/dsdojo_db
4 rows affected.


course_id,June,July,August
1,o,x,x
2,x,x,o
3,o,x,x
4,o,o,o



1. groupで一覧を出す -> 集約関数で答えを出す
2. 重複機にせず計算して、distinctで最終結果を出す
3. distinctでsourceを出す
  + select部は case whenとinで対応
  + select部は case whenとexistsで対応 -> 相関サブクエリ
4. distinctでsourceを出し、joinで対応する

In [4]:
%%sql
-- existsと相関サブクエリ
select distinct course_id,
    case when exists (
        select *
        from OpenCourses as oc2
        where oc1.course_id = oc2.course_id and
        oc2.month = 201806
    ) then 'o' else 'x' end as "June",
    case when exists (
        select *
        from OpenCourses as oc2
        where oc1.course_id = oc2.course_id and
        oc2.month = 201807
    ) then 'o' else 'x' end as "July",
    case when exists (
        select *
        from OpenCourses as oc2
        where oc1.course_id = oc2.course_id and
        oc2.month = 201808
    ) then 'o' else 'x' end as "August"
from OpenCourses as oc1

*  postgresql://padawan:***@db:5432/dsdojo_db
4 rows affected.


course_id,June,July,August
1,o,x,x
2,x,x,o
3,o,x,x
4,o,o,o


In [5]:
%%sql
-- inを使う
select oc1.course_id,
    course_id in (
    select course_id
    from OpenCourses
    where month = 201806
) as "June",
    course_id in (
    select course_id
    from OpenCourses
    where month = 201807
) as "July",
    course_id in (
    select course_id
    from OpenCourses
    where month = 201808
) as "August"
from (
    select distinct course_id
    from OpenCourses
    order by course_id
) as oc1

*  postgresql://padawan:***@db:5432/dsdojo_db
4 rows affected.


course_id,June,July,August
1,True,False,False
2,False,False,True
3,True,False,False
4,True,True,True


In [55]:
%%sql
-- where使わないでselectの中に相関サブクエリを書いてもできる:
select course_id,
    (
        select 'o'
        from OpenCourses as c1
        where c1.course_id = c2.course_id and
        month = '201806'
    ) as "June",
    (
        select 'o'
        from OpenCourses as c1
        where c1.course_id = c2.course_id and
        month = '201807'
    ) as "July",
    (
        select 'o'
        from OpenCourses as c1
        where c1.course_id = c2.course_id and
        month = '201808'
    ) as "August"
from (
    select distinct course_id
    from OpenCourses
) as c2
order by c2.course_id

*  postgresql://padawan:***@db:5432/dsdojo_db
4 rows affected.


course_id,June,July,August
1,o,,
2,,,o
3,o,,
4,o,o,o


In [6]:
%%sql
select
    oc1.course_id,
    "June",
    "July",
    "August"
-- 表側固定のクロス表を作る
from (
    select distinct course_id
    from OpenCourses
    order by course_id
) as oc1
left join (
    select course_id, 'o' as "June"
    from OpenCourses
    where month = 201806 
) as oc2
on oc1.course_id = oc2.course_id
left join (
    select course_id, 'o' as "July"
    from OpenCourses
    where month = 201807
) as oc3
on oc1.course_id = oc3.course_id
left join (
    select course_id, 'o' as "August"
    from OpenCourses
    where month = 201808
) as oc4
on oc1.course_id = oc4.course_id

*  postgresql://padawan:***@db:5432/dsdojo_db
4 rows affected.


course_id,June,July,August
1,o,,
2,,,o
3,o,,
4,o,o,o


In [46]:
# ------
# ServerLoadSampleに対して
# 計測日をcompleteした表を出す