# Demo 03 -  Columnar Vs Row Storage

- The columnar storage extension used here: 
    - cstore_fdw by citus_data [https://github.com/citusdata/cstore_fdw](https://github.com/citusdata/cstore_fdw)
- The data tables are the ones used by citus_data to show the storage extension


In [None]:
#In case your envirnoment does not have the needed libraries
!pip install psycopg2-binary
!pip install ipython-sql
!pip install pandas

In [3]:
%load_ext sql

# STEP0 : Connect to the local database where Pagila is loaded

In [4]:
DB_ENDPOINT = "db"
DB = 'reviews'
DB_USER = 'postgres'
DB_PASSWORD = 'postgres'
DB_PORT = '5432'

# postgresql://username:password@host:port/database
conn_string = "postgresql://{}:{}@{}:{}/{}" \
                        .format(DB_USER, DB_PASSWORD, DB_ENDPOINT, DB_PORT, DB)

print(conn_string)


postgresql://postgres:postgres@db:5432/reviews


In [5]:
%sql $conn_string

'Connected: postgres@reviews'

In [69]:
#!psql -U postgres -c 'create database reviews;'
#!wget http://examples.citusdata.com/customer_reviews_1998.csv.gz
#!wget http://examples.citusdata.com/customer_reviews_1999.csv.gz

#!gzip -d customer_reviews_1998.csv.gz 
#!gzip -d customer_reviews_1999.csv.gz 

#!mv customer_reviews_1998.csv /tmp/customer_reviews_1998.csv
#!mv customer_reviews_1999.csv /tmp/customer_reviews_1999.csv

-rw-r--r-- 1 jovyan users 101299118 Jun  2  2012 /tmp/customer_reviews_1998.csv


# STEP1 :  Create a table with a normal  (Row) storage & load data

In [37]:
%%sql
DROP TABLE IF EXISTS customer_reviews_row;
CREATE TABLE customer_reviews_row
(
    customer_id TEXT,
    review_date DATE,
    review_rating INTEGER,
    review_votes INTEGER,
    review_helpful_votes INTEGER,
    product_id CHAR(10),
    product_title TEXT,
    product_sales_rank BIGINT,
    product_group TEXT,
    product_category TEXT,
    product_subcategory TEXT,
    similar_product_ids CHAR(10)[]
)

 * postgresql://postgres:***@db:5432/reviews
Done.
Done.


[]

In [38]:
%%sql 
COPY customer_reviews_row FROM '/tmp/customer_reviews_1998.csv' WITH CSV;
COPY customer_reviews_row FROM '/tmp/customer_reviews_1999.csv' WITH CSV;

 * postgresql://postgres:***@db:5432/reviews
589859 rows affected.
1172645 rows affected.


[]

# STEP 3 :  Create a table with columnar storage & load data

In [39]:
%%sql

-- load extension first time after install
CREATE EXTENSION cstore_fdw;

-- create server object
CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw;

-- create foreign table
DROP FOREIGN TABLE IF EXISTS customer_reviews_col;

CREATE FOREIGN TABLE customer_reviews_col
(
    customer_id TEXT,
    review_date DATE,
    review_rating INTEGER,
    review_votes INTEGER,
    review_helpful_votes INTEGER,
    product_id CHAR(10),
    product_title TEXT,
    product_sales_rank BIGINT,
    product_group TEXT,
    product_category TEXT,
    product_subcategory TEXT,
    similar_product_ids CHAR(10)[]
)
SERVER cstore_server
OPTIONS(compression 'pglz');

 * postgresql://postgres:***@db:5432/reviews
Done.
Done.


[]

In [40]:
%%sql 
COPY customer_reviews_col FROM '/tmp/customer_reviews_1998.csv' WITH CSV;
COPY customer_reviews_col FROM '/tmp/customer_reviews_1999.csv' WITH CSV;

 * postgresql://postgres:***@db:5432/reviews
589859 rows affected.
1172645 rows affected.


[]

# Compare perfromamce

In [41]:
%%time
%%sql
SELECT
    customer_id, review_date, review_rating, product_id, product_title
FROM
    customer_reviews_row
WHERE
    customer_id ='A27T7HVDXA3K2A' AND
    product_title LIKE '%Dune%' AND
    review_date >= '1998-01-01' AND
    review_date <= '1998-12-31';

 * postgresql://postgres:***@db:5432/reviews
5 rows affected.
CPU times: user 10 ms, sys: 0 ns, total: 10 ms
Wall time: 217 ms


customer_id,review_date,review_rating,product_id,product_title
A27T7HVDXA3K2A,1998-04-10,5,0399128964,Dune (Dune Chronicles (Econo-Clad Hardcover))
A27T7HVDXA3K2A,1998-04-10,5,0881036366,Dune (Dune Chronicles (Econo-Clad Hardcover))
A27T7HVDXA3K2A,1998-04-10,5,044100590X,Dune
A27T7HVDXA3K2A,1998-04-10,5,0441172717,"Dune (Dune Chronicles, Book 1)"
A27T7HVDXA3K2A,1998-04-10,5,1559949570,Dune Audio Collection


In [42]:
%%time
%%sql
SELECT
    customer_id, review_date, review_rating, product_id, product_title
FROM
    customer_reviews_col
WHERE
    customer_id ='A27T7HVDXA3K2A' AND
    product_title LIKE '%Dune%' AND
    review_date >= '1998-01-01' AND
    review_date <= '1998-12-31';

 * postgresql://postgres:***@db:5432/reviews
5 rows affected.
CPU times: user 0 ns, sys: 10 ms, total: 10 ms
Wall time: 158 ms


customer_id,review_date,review_rating,product_id,product_title
A27T7HVDXA3K2A,1998-04-10,5,0399128964,Dune (Dune Chronicles (Econo-Clad Hardcover))
A27T7HVDXA3K2A,1998-04-10,5,044100590X,Dune
A27T7HVDXA3K2A,1998-04-10,5,0441172717,"Dune (Dune Chronicles, Book 1)"
A27T7HVDXA3K2A,1998-04-10,5,0881036366,Dune (Dune Chronicles (Econo-Clad Hardcover))
A27T7HVDXA3K2A,1998-04-10,5,1559949570,Dune Audio Collection


# Conclusion: We can see that the columnar storage is faster 