In [1]:
from pyspark.sql.utils import ParseException, AnalysisException
def run_sql(query, **kwargs):
    head_n = kwargs.get('head', 5)
    try:
        return spark.sql(query).toPandas().head(head_n)
    except (ParseException, AnalysisException) as ex:
        print(str(ex).replace('\\n', '\n')[2:-2])
    

In [2]:
import pandas as pd
pd.set_option('display.max_columns', 45)
pd.set_option('display.max_colwidth', -1)

### Developing Trusted Order Items

Following the order_validation seen on 'Trusted Items', for the Trusted Order Items we have to unnest items and garnish items, keeping its relation with the order_id. The unnest can be accomplished with the EXPLODE function.

#### Considerations
- Since the event came from the same device, with its own localization, all values should share the same currency;
- Analysts must be aware that the item’s metrics are replicated depending on the number of garnish items; Otherwise would be interesting to keep them as json, or generate its own table.

In [3]:
query = """
CREATE OR REPLACE TEMPORARY VIEW order_validation AS
WITH order_timestamps AS (
    SELECT
        order_id,
        MIN(IF(value = 'REGISTERED', created_at, NULL)) AS registration_time,
        MIN(IF(value = 'PLACED', created_at, NULL)) AS place_time,
        MIN(IF(value = 'CONCLUDED', created_at, NULL)) AS conclusion_time,
        MIN(IF(value = 'CANCELLED', created_at, NULL)) AS cancelation_time
    FROM raw_layer.order_status
    GROUP BY order_id
),

valid_order_flows AS (
    SELECT
        order_id,
        registration_time IS NOT NULL
            AND (
                (
                place_time IS NOT NULL
                    AND registration_time <= place_time 
                    AND (
                        (cancelation_time IS NULL AND place_time <= conclusion_time) OR 
                        (conclusion_time IS NULL AND place_time <= cancelation_time)
                    ) 
                 )
                 OR (
                cancelation_time IS NOT NULL
                    AND registration_time <= cancelation_time 
                 )
            ) AS valid_flow
    FROM order_timestamps
),

valid_orders AS (
    SELECT
        order_id,
        valid_flow
    FROM valid_order_flows
)

SELECT * FROM valid_orders
"""

spark.sql(query)

DataFrame[]

In [4]:
spark.sql("SELECT COUNT(*) FROM order_validation").show()

+--------+
|count(1)|
+--------+
| 2441067|
+--------+



In [5]:
query = """
WITH last_status AS (
    SELECT
        order_id,
        MIN(IF(value = 'REGISTERED', created_at, NULL)) AS registration_time,
        SUBSTR(MAX(CONCAT(CAST(created_at AS STRING), '<>', value)), 22) AS status
    FROM raw_layer.order_status
    WHERE order_id IN (SELECT order_id FROM order_validation)
    GROUP BY 1
)

SELECT status, COUNT(*) FROM last_status GROUP BY 1
"""
run_sql(query)

Unnamed: 0,status,count(1)
0,CONCLUDED,2354218
1,REGISTERED,16
2,CANCELLED,55179
3,PLACED,31654


In [6]:
query = """
SELECT
    order_id,
    explode(items)
FROM raw_layer.order 
WHERE order_id IN (SELECT order_id FROM order_validation WHERE valid_flow)
ORDER BY order_id
LIMIT 20
"""
run_sql(query)

Unnamed: 0,order_id,col
0,000013ce-ce11-43fe-a6ac-38e616e1982a,"(Combo 2 (24 Peças), (0, BRL), (0, BRL), 1.0, 2, (3800, BRL), 31ada94ae8c940418164f2b29425f18c, (3800, BRL), None, [], 4022, (0, BRL), (0, BRL))"
1,000013ce-ce11-43fe-a6ac-38e616e1982a,"(Temaki de Salmão - Tradicional, (0, BRL), (0, BRL), 1.0, 1, (1450, BRL), 7717e55dc59542fdba43d02bcc40cc21, (1450, BRL), None, [], 700, (0, BRL), (0, BRL))"
2,0000149b-2294-4a99-8ced-9af03363b709,"(FRITAS, (0, BRL), (0, BRL), 2.0, 4, (0, BRL), 9c1e7ef592be436883e4ade9b4a66c8e, (0, BRL), None, [(média, Row(value=u'0', currency=u'BRL'), Row(value=u'0', currency=u'BRL'), 2.0, 5, Row(value=u'400', currency=u'BRL'), 8GUS, 3f94a198ef2a4e8ba2fffa6869c8174b, Row(value=u'800', currency=u'BRL'), ESCOLHA A SUA PREFERÊNCIA, None)], None, (0, BRL), (0, BRL))"
3,0000149b-2294-4a99-8ced-9af03363b709,"(N 09, (0, BRL), (0, BRL), 1.0, 1, (950, BRL), c7c52cc67cea44c29bd9da2ca4b3571c, (950, BRL), None, [], None, (0, BRL), (0, BRL))"
4,0000149b-2294-4a99-8ced-9af03363b709,"(N 05, (0, BRL), (0, BRL), 1.0, 2, (1350, BRL), fce68583e2324b5e99d28f80de3affe0, (1350, BRL), Sem salada!, [(CARNE DE PICANHA 90 grs, Row(value=u'0', currency=u'BRL'), Row(value=u'0', currency=u'BRL'), 1.0, 3, Row(value=u'400', currency=u'BRL'), 8GUK, 12e59568979044098b8439ad888a4f84, Row(value=u'400', currency=u'BRL'), ADICIONAIS, None)], None, (0, BRL), (0, BRL))"


In [7]:
spark.sql("SET spark.sql.parser.quotedRegexColumnNames=true")
run_sql("""
    WITH items_exploded AS (
    SELECT
        order_id,
        explode(items) AS item
    FROM raw_layer.order 
    WHERE order_id IN (SELECT order_id FROM order_validation WHERE valid_flow)
),

garnish_exploded AS (
    SELECT
        order_id,
        item.externalId AS external_id,
        item.name AS name,
        item.addition.currency AS currency,
        item.addition.value/100 AS addition_value,
        item.discount.value/100 AS discount_value,
        item.quantity AS quantity,
        item.sequence AS sequence,
        item.unitPrice.value/100 AS unit_price_value,
        item.totalValue.value/100 AS total_value,
        item.customerNote AS customer_note,
        explode(item.garnishItems) AS garnish_item,
        item.integrationId AS integration_id,
        item.totalAddition.value/100 AS total_addition_value,
        item.totalDiscount.value/100 AS total_discount_value
    FROM items_exploded
)

SELECT
    order_id,
    external_id,
    name,
    currency,
    addition_value,
    discount_value,
    quantity,
    sequence,
    unit_price_value,
    total_value,
    customer_note,
    integration_id,
    total_addition_value,
    total_discount_value,
    garnish_item.name AS garnish_name,
    garnish_item.externalId AS garnish_external_id,
    garnish_item.categoryId AS garnish_category_id,
    garnish_item.categoryName AS garnish_category_name,
    garnish_item.addition.value/100 AS garnish_addition_value,
    garnish_item.discount.value/100 AS garnish_discount_value,
    garnish_item.quantity AS garnish_quantity,
    garnish_item.sequence AS garnish_sequence,
    garnish_item.unitPrice.value/100 AS garnish_unit_price_value,
    garnish_item.totalValue.value/100 AS garnish_total_value,
    garnish_item.integrationId AS garnish_integration_id
FROM garnish_exploded
ORDER BY order_id
LIMIT 20""", head=20)

Unnamed: 0,order_id,external_id,name,currency,addition_value,discount_value,quantity,sequence,unit_price_value,total_value,customer_note,integration_id,total_addition_value,total_discount_value,garnish_name,garnish_external_id,garnish_category_id,garnish_category_name,garnish_addition_value,garnish_discount_value,garnish_quantity,garnish_sequence,garnish_unit_price_value,garnish_total_value,garnish_integration_id
0,0000149b-2294-4a99-8ced-9af03363b709,9c1e7ef592be436883e4ade9b4a66c8e,FRITAS,BRL,0.0,0.0,2.0,4,0.0,0.0,,,0.0,0.0,média,3f94a198ef2a4e8ba2fffa6869c8174b,8GUS,ESCOLHA A SUA PREFERÊNCIA,0.0,0.0,2.0,5,4.0,8.0,
1,0000149b-2294-4a99-8ced-9af03363b709,fce68583e2324b5e99d28f80de3affe0,N 05,BRL,0.0,0.0,1.0,2,13.5,13.5,Sem salada!,,0.0,0.0,CARNE DE PICANHA 90 grs,12e59568979044098b8439ad888a4f84,8GUK,ADICIONAIS,0.0,0.0,1.0,3,4.0,4.0,
2,00001892-6731-4eab-96e4-e60d88814e93,db70200ad6d54072b582cd2e35ca3f6c,Promoção Pizza Grande + broto doce de MM's com 68% de desconto,BRL,0.0,0.0,1.0,1,29.9,29.9,,,0.0,0.0,MARACATU,a311918d9d7c4819a6e39a634cbee077,19SWU,Escolha sua preferencia,0.0,0.0,1.0,2,0.0,0.0,
3,00001892-6731-4eab-96e4-e60d88814e93,db70200ad6d54072b582cd2e35ca3f6c,Promoção Pizza Grande + broto doce de MM's com 68% de desconto,BRL,0.0,0.0,1.0,1,29.9,29.9,,,0.0,0.0,CALABRESA CROCANTE,bf712161c8b247c9aac8c422dd3e6be4,19SWV,Pizza salgada,0.0,0.0,1.0,3,0.0,0.0,
4,00001892-6731-4eab-96e4-e60d88814e93,db70200ad6d54072b582cd2e35ca3f6c,Promoção Pizza Grande + broto doce de MM's com 68% de desconto,BRL,0.0,0.0,1.0,1,29.9,29.9,,,0.0,0.0,MARACATU,a311918d9d7c4819a6e39a634cbee077,19SWU,Escolha sua preferencia,0.0,0.0,1.0,2,0.0,0.0,
5,00001892-6731-4eab-96e4-e60d88814e93,db70200ad6d54072b582cd2e35ca3f6c,Promoção Pizza Grande + broto doce de MM's com 68% de desconto,BRL,0.0,0.0,1.0,1,29.9,29.9,,,0.0,0.0,CALABRESA CROCANTE,bf712161c8b247c9aac8c422dd3e6be4,19SWV,Pizza salgada,0.0,0.0,1.0,3,0.0,0.0,
6,00001cdb-2399-417f-b630-f87919d25eaa,ebe444c0b9ee4243b4f08cd9118e64d9,GIGANTE 41 CM 2 SABORES,BRL,0.0,0.0,1.0,1,0.0,0.0,Favor não por queijo na pizza,,0.0,0.0,1/2 CHOCOLATE C/ OVOMALTINE,159b84bc693f4d33968aacc8c5b53e1f,SABOR,Escolha um sabor,0.0,0.0,1.0,4,18.45,18.45,
7,00001cdb-2399-417f-b630-f87919d25eaa,ebe444c0b9ee4243b4f08cd9118e64d9,GIGANTE 41 CM 2 SABORES,BRL,0.0,0.0,1.0,1,0.0,0.0,Favor não por queijo na pizza,,0.0,0.0,1/2 PRESTIGIO,ce91c8af5265414197026e0295e00666,SABOR2,Escolha o segundo sabor,0.0,0.0,1.0,3,18.45,18.45,
8,00001cdb-2399-417f-b630-f87919d25eaa,ebe444c0b9ee4243b4f08cd9118e64d9,GIGANTE 41 CM 2 SABORES,BRL,0.0,0.0,1.0,1,0.0,0.0,Favor não por queijo na pizza,,0.0,0.0,1/2 CHOCOLATE C/ OVOMALTINE,159b84bc693f4d33968aacc8c5b53e1f,SABOR2,Escolha o segundo sabor,0.0,0.0,1.0,4,18.45,18.45,
9,00001cdb-2399-417f-b630-f87919d25eaa,ebe444c0b9ee4243b4f08cd9118e64d9,GIGANTE 41 CM 2 SABORES,BRL,0.0,0.0,1.0,1,0.0,0.0,Favor não por queijo na pizza,,0.0,0.0,1/2 CHOCOLATE C/ OVOMALTINE,159b84bc693f4d33968aacc8c5b53e1f,SABOR2,Escolha o segundo sabor,0.0,0.0,1.0,4,18.45,18.45,
