In [33]:
%load_ext sql
import os

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [34]:
host = "localhost"
database = "olist"
user = "postgres"
password = "sql123"
connection_string = f"postgresql://{user}:{password}@{host}/{database}"
%sql $connection_string

'Connected: postgres@olist'

### **Average length of name and description**

In [35]:
%%sql
SELECT product_category,
       AVG(product_name_length)::int AS name_length,
       AVG(product_desc_length)::int AS desc_length
FROM products
WHERE product_category IS NOT NULL
GROUP BY product_category
ORDER BY name_length, desc_length;

 * postgresql://postgres:***@localhost/olist
73 rows affected.


product_category,name_length,desc_length
fashion_roupa_masculina,40,627
fashion_roupa_feminina,40,639
livros_interesse_geral,40,989
livros_tecnicos,42,1352
moveis_colchao_e_estofado,43,1111
artigos_de_natal,44,412
artes_e_artesanato,44,620
moveis_sala,44,625
market_place,44,829
consoles_games,44,850


### **Average weight, length, height, width for a product category**

In [36]:
%%sql
SELECT product_category,
       (AVG(product_weight_grams)/1000)::real AS avg_weight,
        AVG(product_length_cm)::real AS avg_length,
        AVG(product_height_cm)::real AS avg_height,
        AVG(product_width_cm)::real AS avg_width
FROM products 
WHERE product_category IS NOT NULL
GROUP BY product_category

 * postgresql://postgres:***@localhost/olist
73 rows affected.


product_category,avg_weight,avg_length,avg_height,avg_width
climatizacao,4.4599595,36.467743,23.887096,26.088709
livros_importados,0.5967742,29.741936,3.451613,21.225807
artigos_de_natal,1.8498154,28.23077,16.215385,22.830769
livros_tecnicos,1.1078455,27.325203,5.869919,18.463415
ferramentas_jardim,3.103777,30.936255,19.204515,23.504648
cine_foto,0.7957857,27.642857,11.571428,18.178572
dvds_blu_ray,0.3815625,21.270834,4.4166665,14.875
fashion_roupa_feminina,0.57222223,23.296297,11.481482,18.333334
beleza_saude,1.4347938,23.800737,15.712357,17.997545
livros_interesse_geral,0.7466111,23.481482,9.773149,19.328703


### **Average Volume of Box for each Product Category**

In [37]:
%%sql
WITH avg_box_volume AS(SELECT product_category,
       AVG(volume)::real AS avg_volume
FROM(SELECT product_category,
       (product_length_cm::real * product_height_cm::real * product_width_cm::real)::real AS volume
       FROM products
       WHERE product_category IS NOT NULL) AS box_volume
GROUP BY product_category)
SELECT product_category,
       avg_volume,
       RANK() OVER(ORDER BY avg_volume DESC)
FROM avg_box_volume

 * postgresql://postgres:***@localhost/olist
73 rows affected.


product_category,avg_volume,rank
moveis_colchao_e_estofado,77244.3,1
moveis_escritorio,75468.47,2
moveis_cozinha_area_de_servico_jantar_e_jardim,69406.09,3
eletrodomesticos_2,55476.312,4
moveis_sala,54486.13,5
moveis_quarto,51038.844,6
pcs,44635.168,7
agro_industria_e_comercio,37604.23,8
industria_comercio_e_negocios,37372.31,9
malas_acessorios,32950.336,10


### **Correlation between freight value and product weight, length, height, volume and price**

In [38]:
%%sql
WITH product_freight 
       AS(SELECT oi.freight_value,
       pr.product_weight_grams,
       pr.product_length_cm,
       pr.product_height_cm,
       pr.product_width_cm,
       pr.product_length_cm::real * pr.product_height_cm::real * pr.product_width_cm::real AS volume,
       oi.price
FROM products pr
JOIN order_items oi USING(product_id))
SELECT CORR(freight_value, product_length_cm)::real AS corr_length,
       CORR(freight_value, product_height_cm)::real AS corr_height,
       CORR(freight_value, product_width_cm)::real AS corr_width,
       CORR(freight_value, volume)::real AS corr_volume,
       CORR(freight_value,product_weight_grams)::real AS corr_weight,
       CORR(freight_value, price)::real AS corr_price
FROM product_freight

 * postgresql://postgres:***@localhost/olist
1 rows affected.


corr_length,corr_height,corr_width,corr_volume,corr_weight,corr_price
0.30908597,0.39183104,0.32377744,0.5872701,0.6104202,0.4142043


**Correlation between unit sold and photo quantity** 

In [72]:
%%sql
WITH unit_photos AS(SELECT product_id,
       unit_sold,
       product_photos_qty
     FROM (SELECT *
           FROM (SELECT product_id,
                 SUM(order_item_id) as unit_sold    
                 FROM orders
                 JOIN order_items USING(order_id)
                 WHERE order_status='delivered'
                 GROUP BY product_id) AS unit_sold
                 ORDER BY unit_sold DESC) AS product_units
JOIN products USING(product_id))
SELECT CORR(unit_sold, product_photos_qty)::real
FROM unit_photos
LIMIT 10

 * postgresql://postgres:***@localhost/olist
1 rows affected.


corr
-0.006174032


### **Linear Relationship between freight value and product weight**

In [40]:
%%sql
WITH price_freight AS (SELECT product_category, 
                       freight_value,
                       (product_weight_grams/1000)::real AS weight
                       FROM order_items
                       JOIN products using(product_id))
SELECT regr_slope(freight_value,weight)::real AS slope,
       regr_intercept(freight_value,weight)::real AS intercept
FROM price_freight
LIMIT 10

 * postgresql://postgres:***@localhost/olist
1 rows affected.


slope,intercept
2.575215,15.649705
