In [0]:
from pyspark.sql.functions import to_date, year, month, weekofyear, desc, rank
from pyspark.sql.types import DateType, DateConverter

In [0]:
%run "../includes/configurations"

#### Ingesting processed data

In [0]:
orders_df = spark.read.parquet(f"{processed_folder_path}/orders")
customers_df = spark.read.parquet(f"{processed_folder_path}/customers")

In [0]:
display(orders_df.take(5))

order_id,customer_id,order_status,order_date,order_delivery_date
6ec1bea8cbcef0a1b81bc9b7fbd37ccb,e6b5e20566e5c72cbaab04f91dec9c85,delivered,2018-08-07,2018-08-27
441972a5bbd51a10459a487402076942,b79fa9dfed0c3d624b70fbd0ca2469de,delivered,2018-08-23,2018-08-29
fbecebecbe32df9dc3411b1e35b5484c,7f9f88f14a8f0dc733a5e371af98abac,delivered,2017-03-20,2017-03-27
9e22b00fd1b9f036577e8c517ea0a5d4,5a7346263920ecab0f88da703bbaf3b1,delivered,2017-12-01,2018-01-03
07d9d11b0175952638ddd53d7855f8a7,a1c779cddccf5ffe60e543d3ce0dc477,delivered,2017-11-22,2017-11-30


In [0]:
display(customers_df.take(5))

customer_id,customer_zip_code_prefix,customer_city,customer_state
f2a1d75b74d9ec748af88e894cd87597,68590,jacunda,PA
f15272fe9d0e2ae3297185f18d3bac46,15056,sao jose do rio preto,SP
7324ecb0ff143f561193d22bea7d63fb,13302,itu,SP
7accf3d920f47c07f5bfbc88f53f9926,45638,coaraci,BA
3680a273ddb333253fa2edc7d3f8a3f2,29700,colatina,ES


In [0]:
orders_df.createOrReplaceTempView("orders_df_view")
customers_df.createOrReplaceTempView("customers_df_view")

In [0]:
joined_df = spark.sql("""
SELECT * FROM orders_df_view O
INNER JOIN customers_df_view C
USING (customer_id);
""")

In [0]:
display(joined_df.take(7))

customer_id,order_id,order_status,order_date,order_delivery_date,customer_zip_code_prefix,customer_city,customer_state
e6b5e20566e5c72cbaab04f91dec9c85,6ec1bea8cbcef0a1b81bc9b7fbd37ccb,delivered,2018-08-07,2018-08-27,65110,sao jose de ribamar,MA
b79fa9dfed0c3d624b70fbd0ca2469de,441972a5bbd51a10459a487402076942,delivered,2018-08-23,2018-08-29,81690,curitiba,PR
7f9f88f14a8f0dc733a5e371af98abac,fbecebecbe32df9dc3411b1e35b5484c,delivered,2017-03-20,2017-03-27,22461,rio de janeiro,RJ
5a7346263920ecab0f88da703bbaf3b1,9e22b00fd1b9f036577e8c517ea0a5d4,delivered,2017-12-01,2018-01-03,22793,rio de janeiro,RJ
a1c779cddccf5ffe60e543d3ce0dc477,07d9d11b0175952638ddd53d7855f8a7,delivered,2017-11-22,2017-11-30,1227,sao paulo,SP
39a23021e4ee2efd04ea8decaff7b328,1c38314597f39a117548efb308041b76,delivered,2017-12-11,2017-12-28,28030,campos dos goytacazes,RJ
e42b1319fefce9bf37acf91a0ac8c1a9,06caf4b0e55d9bc015a91fa77432a1cb,delivered,2017-03-11,2017-03-21,73330,brasilia,DF


In [0]:
joined_df.createOrReplaceTempView("joined_df_view")

In [0]:
top_regions = spark.sql("""
SELECT customer_zip_code_prefix, customer_city, customer_state, year(joined_df_view.order_date) AS order_year, month(joined_df_view.order_date) AS order_month, weekofyear(joined_df_view.order_date) AS order_week, COUNT(order_id) AS order_count
FROM joined_df_view
GROUP BY customer_zip_code_prefix, customer_city, customer_state, year(joined_df_view.order_date), month(joined_df_view.order_date), weekofyear(joined_df_view.order_date)
ORDER BY 7 desc
""")

In [0]:
display(top_regions.take(10))

customer_zip_code_prefix,customer_city,customer_state,order_year,order_month,order_week,order_count
80030,curitiba,PR,2017,1,1,14
24230,niteroi,RJ,2018,3,10,7
22790,rio de janeiro,RJ,2018,1,1,7
22775,rio de janeiro,RJ,2018,1,4,7
30575,belo horizonte,MG,2017,11,47,7
22790,rio de janeiro,RJ,2017,12,49,6
82200,curitiba,PR,2017,1,1,6
20551,rio de janeiro,RJ,2017,11,47,6
24230,niteroi,RJ,2017,11,47,6
35502,divinopolis,MG,2017,11,47,6


In [0]:
top_regions.createOrReplaceTempView("top_regions_view")

In [0]:
from pyspark.sql.window import Window

In [0]:
regionRankSpec = Window.partitionBy("order_year", "order_month", "order_week").orderBy(desc("order_count"))
top_regions = top_regions.withColumn("rank", rank().over(regionRankSpec))

# UPDATING ALREADY EXISTING VIEW WITH RANK
top_regions.createOrReplaceTempView("top_regions_view")

In [0]:
%sql
SELECT * FROM top_regions_view
WHERE rank<2
LIMIT 10

customer_zip_code_prefix,customer_city,customer_state,order_year,order_month,order_week,order_count,rank
69309,boa vista,RR,2016,9,35,1,1
99025,passo fundo,RS,2016,9,36,1,1
12244,sao jose dos campos,SP,2016,9,37,1,1
14600,sao joaquim da barra,SP,2016,9,37,1,1
2975,sao paulo,SP,2016,10,39,1,1
20511,rio de janeiro,RJ,2016,10,40,3,1
30411,belo horizonte,MG,2016,10,41,2,1
62680,paracuru,CE,2016,10,42,1,1
80030,curitiba,PR,2016,12,51,1,1
80030,curitiba,PR,2017,1,1,14,1
