In [1]:
%load_ext sql
import os

In [2]:
host = "localhost"
database = "olist"
user = "postgres"
password = "sql123"
connection_string = f"postgresql://{user}:{password}@{host}/{database}"
%sql $connection_string

'Connected: postgres@olist'

### **Frequency of orders delivered every month**

In [3]:
%%sql
SELECT date_part('year', order_delivered_customer)::int AS "year",
       date_part('month', order_delivered_customer)::int AS "month",
       COUNT(*) AS "orders_delivered"
FROM orders
WHERE order_status = 'delivered'
GROUP BY year, month
ORDER BY year, month

 * postgresql://postgres:***@localhost/olist
26 rows affected.


year,month,orders_delivered
2016.0,10.0,205
2016.0,11.0,58
2016.0,12.0,4
2017.0,1.0,283
2017.0,2.0,1351
2017.0,3.0,2382
2017.0,4.0,1849
2017.0,5.0,3751
2017.0,6.0,3223
2017.0,7.0,3455


### **Top 5 cities with most number of orders**

In [86]:
%%sql
SELECT cu.customer_state,
       cu.customer_city,
       COUNT(oi.order_id)
FROM orders AS oi
JOIN customers AS cu ON oi.customer_id = cu.customer_id
GROUP BY cu.customer_city, cu.customer_state
ORDER BY count DESC
LIMIT 5

 * postgresql://postgres:***@localhost/olist
5 rows affected.


customer_state,customer_city,count
SP,sao paulo,15540
RJ,rio de janeiro,6882
MG,belo horizonte,2773
DF,brasilia,2131
PR,curitiba,1521


### **Deliveries by hour**

In [87]:
%%sql
SELECT date_part('hour', order_delivered_customer)::int AS "hour",
       COUNT(*) AS "orders_delivered"
FROM orders
WHERE order_status = 'delivered'
GROUP BY hour

 * postgresql://postgres:***@localhost/olist
25 rows affected.


hour,orders_delivered
0.0,2885
1.0,1515
2.0,649
3.0,260
4.0,187
5.0,198
6.0,269
7.0,396
8.0,779
9.0,1196


### **Average difference between estimated and actual delivery by month**

In [71]:
%%sql
SELECT month,
       AVG(deviation_from_estimated)::real AS difference
FROM(SELECT EXTRACT(MONTH FROM order_purchase)::int AS month,
       EXTRACT('days' FROM order_estimated_delivery - order_delivered_customer)::int AS deviation_from_estimated
FROM orders
WHERE order_status = 'delivered') AS diff
GROUP BY month

 * postgresql://postgres:***@localhost/olist
12 rows affected.


month,difference
1,13.38496
2,9.620979
3,7.1721644
4,11.974069
5,11.7074995
6,16.024807
7,10.879737
8,9.200588
9,10.389063
10,12.356947


### **Mean difference between order and delivery time by state** 

In [45]:
%%sql
SELECT delivery_time.customer_state,
       AVG(delivery_time.delivery_in_days)::real AS avg_delivery_time_days
FROM(SELECT cu.customer_state,
       EXTRACT('days' FROM od.order_delivered_customer - od.order_purchase)::int AS delivery_in_days
     FROM ORDERS AS od
     JOIN customers AS cu ON od.customer_id = cu.customer_id
     WHERE order_status = 'delivered') AS delivery_time
GROUP BY customer_state
ORDER BY avg_delivery_time_days DESC

 * postgresql://postgres:***@localhost/olist
27 rows affected.


customer_state,avg_delivery_time_days
RR,28.975609
AP,26.731344
AM,25.986206
AL,24.040302
PA,23.316067
MA,21.117155
SE,21.02985
CE,20.817827
AC,20.6375
PB,19.953579
