# Create database

Run below command from prompt:<br>
```createdb -h 127.0.0.1 -p 5432 -U postgres review```

# Connecting to db

In [17]:
%load_ext sql

DB_ENDPOINT = "127.0.0.1"
DB = 'review'
DB_USER = 'postgres'
DB_PASSWORD = 'password'
DB_PORT = '5432'

# postgresql://username:password@host:port/database
conn_string = "postgresql://{}:{}@{}:{}/{}" .format(DB_USER, DB_PASSWORD, DB_ENDPOINT, DB_PORT, DB)

print(conn_string)
%sql $conn_string

The sql extension is already loaded. To reload it, use:
  %reload_ext sql
postgresql://postgres:password@127.0.0.1:5432/review


'Connected: postgres@review'

# Create table with normal(row) storage and load data

In [10]:
%%sql
DROP TABLE IF EXISTS customer_review_row;
CREATE TABLE customer_review_row
(
    customer_id TEXT,
    review_date DATE,
    review_rating INTEGER,
    review_votes INTEGER,
    review_helpful_votes INTEGER,
    product_id CHAR(10),
    product_title TEXT,
    product_sales_rank BIGINT,
    product_group TEXT,
    product_category TEXT,
    product_subcategory TEXT,
    similar_product_ids CHAR(10)[]
)

   postgresql://postgres:***@127.0.0.1:5432/pagila
 * postgresql://postgres:***@127.0.0.1:5432/review
Done.
Done.


[]

## Load data

In [12]:
%%sql 
COPY customer_review_row FROM 'C:/Users/shubham/customer-review-data/customer_reviews_1998.csv' WITH CSV;
COPY customer_review_row FROM 'C:/Users/shubham/customer-review-data/customer_reviews_1999.csv' WITH CSV;

   postgresql://postgres:***@127.0.0.1:5432/pagila
 * postgresql://postgres:***@127.0.0.1:5432/review
589859 rows affected.
1172645 rows affected.


[]

In [14]:
%sql select count(1) from customer_review_row;

   postgresql://postgres:***@127.0.0.1:5432/pagila
 * postgresql://postgres:***@127.0.0.1:5432/review
1 rows affected.


count
1762504


# Create a table with columnar storage & load data

For columnar storage support, we will use extension https://github.com/citusdata/cstore_fdw

In [None]:
%%sql

-- load extension first time after install
CREATE EXTENSION cstore_fdw;

-- create server object
CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw;

In [None]:
%%sql
-- create foreign table
DROP FOREIGN TABLE IF EXISTS customer_reviews_col;

CREATE FOREIGN TABLE customer_reviews_col
(
    customer_id TEXT,
    review_date DATE,
    review_rating INTEGER,
    review_votes INTEGER,
    review_helpful_votes INTEGER,
    product_id CHAR(10),
    product_title TEXT,
    product_sales_rank BIGINT,
    product_group TEXT,
    product_category TEXT,
    product_subcategory TEXT,
    similar_product_ids CHAR(10)[]
)

SERVER cstore_server
OPTIONS(compression 'pglz');

In [None]:
%%sql 
COPY customer_reviews_col FROM '/tmp/customer_reviews_1998.csv' WITH CSV;
COPY customer_reviews_col FROM '/tmp/customer_reviews_1999.csv' WITH CSV;

# Compare performance between the two tables

Query on ```customer_reviews_row```

In [None]:
%%time
%%sql

SELECT avg(review_rating) as avg_review_rating, product_title
FROM customer_reviews_row
WHERE review_date >= '1995-01-01'
AND review_date <= '1995-12-31'
GROUP BY product_title;

Query on ```customer_reviews_col```

In [None]:
%%time
%%sql

SELECT avg(review_rating) as avg_review_rating, product_title
FROM customer_reviews_col
WHERE review_date >= '1995-01-01'
AND review_date <= '1995-12-31'
GROUP BY product_title;

# Conclusion: We can see columnar storage is better