# LOAD SQL Extension

In [64]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


# Connect to the local database where Pagila is loaded

## Create connection to the newly created db

In [65]:
DB_ENDPOINT = "127.0.0.1"
DB = 'pagila'
DB_USER = 'postgres'
DB_PASSWORD = ''
DB_PORT = '5432'

# postgresql://username:password@host:port/database
conn_string = "postgresql://{}:{}@{}:{}/{}" \
                        .format(DB_USER, DB_PASSWORD, DB_ENDPOINT, DB_PORT, DB)

print(conn_string)


postgresql://postgres:@127.0.0.1:5432/pagila


In [66]:
%sql $conn_string

'Connected: postgres@pagila'

# FUNCTION TYPE 1: Slicing

## extracting only a single dimension of all fields (e.g by date, day)

In [67]:
%%sql
SELECT dimDate.month,dimMovie.rating, dimCustomer.city, sum(sales_amount) as revenue
FROM factSales
JOIN dimMovie ON (dimMovie.movie_key = factSales.movie_key)
JOIN dimDate  ON (dimDate.date_key = factSales.date_key)
JOIN dimCustomer ON (dimCustomer.customer_key = factSales.customer_key)
WHERE MONTH IN ('3')
GROUP BY (dimDate.month, dimMovie.rating, dimCustomer.city)
ORDER BY revenue desc
LIMIT 10;

 * postgresql://postgres:***@127.0.0.1:5432/pagila
10 rows affected.


month,rating,city,revenue
3,NC-17,Richmond Hill,264
3,NC-17,Urawa,246
3,PG-13,Leshan,240
3,PG-13,Iwakuni,234
3,PG-13,Johannesburg,228
3,PG-13,Cavite,210
3,NC-17,Karnal,204
3,R,NDjamna,198
3,PG-13,Shimonoseki,198
3,PG,Molodetno,192


# FUNCTION TYPE 2: Dicing

## extracting subcube of two or more dimension using subset of the fields only (e.g. rating and city only)

In [68]:
%%sql
SELECT dimMovie.rating, dimCustomer.city, sum(sales_amount) as revenue
FROM factSales
JOIN dimMovie ON (dimMovie.movie_key = factSales.movie_key)
JOIN dimDate  ON (dimDate.date_key = factSales.date_key)
JOIN dimCustomer ON (dimCustomer.customer_key = factSales.customer_key)
WHERE dimMovie.rating in ('PG-13', 'PG')
and dimCustomer.city in ('Bellevue', 'Lancaster')
GROUP BY (dimMovie.rating, dimCustomer.city)
ORDER BY revenue desc
LIMIT 10;

 * postgresql://postgres:***@127.0.0.1:5432/pagila
4 rows affected.


rating,city,revenue
PG-13,Lancaster,216
PG,Lancaster,174
PG-13,Bellevue,168
PG,Bellevue,132


# FUNCTION TYPE 3: Rolling up

## aggregating non-numeric or additive fields (e.g. city aggregated as single country)

In [69]:
%%time
%%sql
SELECT dimDate.day, dimMovie.rating, dimCustomer.country, sum(sales_amount) as revenue
FROM factSales
JOIN dimMovie ON (dimMovie.movie_key = factSales.movie_key)
JOIN dimDate  ON (dimDate.date_key = factSales.date_key)
JOIN dimCustomer ON (dimCustomer.customer_key = factSales.customer_key)
GROUP BY (dimDate.day, dimMovie.rating, dimCustomer.country)
ORDER BY revenue desc
LIMIT 10;

 * postgresql://postgres:***@127.0.0.1:5432/pagila
10 rows affected.
CPU times: user 7.1 ms, sys: 1.93 ms, total: 9.03 ms
Wall time: 1.03 s


day,rating,country,revenue
30,G,China,1020
30,PG,India,942
30,NC-17,India,924
30,PG-13,China,882
30,R,China,876
30,R,India,864
30,G,India,828
18,NC-17,India,816
30,PG,China,792
21,PG-13,India,774


# FUNCTION TYPE 3: Drill-down

In [70]:
%%time
%%sql
SELECT dimDate.day, dimMovie.rating, dimCustomer.city, sum(sales_amount) as revenue
FROM factSales
JOIN dimMovie ON (dimMovie.movie_key = factSales.movie_key)
JOIN dimDate  ON (dimDate.date_key = factSales.date_key)
JOIN dimCustomer ON (dimCustomer.customer_key = factSales.customer_key)
where dimCustomer.country in ('China')
GROUP BY (dimDate.day,dimMovie.rating, dimCustomer.city)
ORDER BY revenue desc
LIMIT 10;

 * postgresql://postgres:***@127.0.0.1:5432/pagila
10 rows affected.
CPU times: user 7.76 ms, sys: 1.71 ms, total: 9.46 ms
Wall time: 68.9 ms


day,rating,city,revenue
19,NC-17,Lengshuijiang,108
30,PG-13,Zhoushan,102
21,PG,Zhoushan,102
29,R,Baiyin,102
30,G,Hohhot,96
17,PG-13,Yantai,96
20,G,Liaocheng,96
23,PG-13,Tiefa,96
30,G,Yinchuan,96
27,NC-17,Tianjin,90


# FUNCTION TYPE 4: Grouping set

In [86]:
%%time
%%sql
SELECT dimDate.month, dimStore.country, sum(sales_amount) as revenue
FROM factSales
JOIN dimDate  ON (dimDate.date_key   = factSales.date_key)
JOIN dimStore ON (dimStore.store_key = factSales.store_key)
GROUP BY GROUPING SETS(
    (),
    (dimDate.month),
    (dimStore.country), 
    (dimDate.month, dimStore.country)
    )
ORDER BY dimDate.month, dimStore.country;

 * postgresql://postgres:***@127.0.0.1:5432/pagila
18 rows affected.
CPU times: user 4.5 ms, sys: 1.76 ms, total: 6.27 ms
Wall time: 122 ms


month,country,revenue
1.0,Australia,14220
1.0,Canada,14796
1.0,,29016
2.0,Australia,29442
2.0,Canada,28488
2.0,,57930
3.0,Australia,72534
3.0,Canada,71124
3.0,,143658
4.0,Australia,85020


# FUNCTION TYPE 5: CUBE - same with grouping sets

In [85]:
%%time
%%sql
SELECT dimDate.month, dimStore.country, sum(sales_amount) as revenue
FROM factSales
JOIN dimDate  ON (dimDate.date_key   = factSales.date_key)
JOIN dimStore ON (dimStore.store_key = factSales.store_key)
GROUP by CUBE(dimDate.month, dimStore.country)
ORDER BY dimDate.month, dimStore.country;

 * postgresql://postgres:***@127.0.0.1:5432/pagila
18 rows affected.
CPU times: user 4.42 ms, sys: 1.62 ms, total: 6.05 ms
Wall time: 144 ms


month,country,revenue
1.0,Australia,14220
1.0,Canada,14796
1.0,,29016
2.0,Australia,29442
2.0,Canada,28488
2.0,,57930
3.0,Australia,72534
3.0,Canada,71124
3.0,,143658
4.0,Australia,85020


## MANUAL VERSION of CUBE

In [78]:
%%time
%%sql
SELECT  NULL as month, NULL as country, sum(sales_amount) as revenue
FROM factSales
    UNION all 
SELECT NULL, dimStore.country,sum(sales_amount) as revenue
FROM factSales
JOIN dimStore on (dimStore.store_key = factSales.store_key)
GROUP by  dimStore.country
    UNION all 
SELECT cast(dimDate.month as text) , NULL, sum(sales_amount) as revenue
FROM factSales
JOIN dimDate on (dimDate.date_key = factSales.date_key)
GROUP by dimDate.month
    UNION all
SELECT cast(dimDate.month as text),dimStore.country,sum(sales_amount) as revenue
FROM factSales
JOIN dimDate     on (dimDate.date_key         = factSales.date_key)
JOIN dimStore on (dimStore.store_key = factSales.store_key)
GROUP by (dimDate.month, dimStore.country)

 * postgresql://postgres:***@127.0.0.1:5432/pagila
18 rows affected.
CPU times: user 10.3 ms, sys: 2.74 ms, total: 13 ms
Wall time: 302 ms


month,country,revenue
,,405462
,Canada,202614
,Australia,202848
3.0,,143658
5.0,,3096
4.0,,171762
2.0,,57930
1.0,,29016
5.0,Australia,1632
1.0,Canada,14796
