In [2]:
import argparse
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pprint
import pyspark
import pyspark.sql.functions as F
import time
from tqdm import tqdm

from pyspark.sql.types import StringType, IntegerType, FloatType, DateType, DoubleType, TimestampType, LongType
from pyspark.sql.functions import col, lower, trim, when,row_number, count,date_add, when,to_date
from pyspark.sql import Window
from pyspark.sql.window import Window

import utils.data_processing_bronze_table as bronze_processing
import utils.data_processing_silver_table as silver_processing
import utils.data_processing_gold_feature as gold_processing_feature
import utils.data_processing_gold_label_table as gold_label_processing

## set up pyspark session

In [3]:
print('\n\n---starting job---\n\n')

# Initialize SparkSession
spark = pyspark.sql.SparkSession.builder \
    .appName("olist_bronze_processing") \
    .master("local[*]") \
    .getOrCreate()

# Set log level to ERROR to hide warnings
spark.sparkContext.setLogLevel("ERROR")



---starting job---




Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/06/18 18:08:51 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


## Build Bronze Table

Important note: There is some discrepancy in where the datamart folder is created when the main.py script is run vs this Jupyter notebook is run.

* This Jupyter notebook will create the datamart folder inside `scripts` folder and output the bronze tables there.
* When you run the main.py script, the datamart folder will be created inside `app` folder (i.e. root) and output the bronze tables there.

Need to have team meeting to resolve this

I chose to run the main.py script, therefore subsequent code on Silver Tables built references the path from `app` folder to access the bronze tables.

In [5]:
# Create bronze root directory
bronze_root = "datamart/bronze"
os.makedirs(bronze_root, exist_ok=True)
print(f"Bronze root directory: {bronze_root}")

Bronze root directory: datamart/bronze


In [8]:
# Process all Olist datasets
print("\nProcessing Olist datasets...\n")
bronze_processing.process_olist_customers_bronze(bronze_root, spark)
print('-------------------------------------------------')
bronze_processing.process_olist_geolocation_bronze(bronze_root, spark)
print('-------------------------------------------------')
bronze_processing.process_olist_order_items_bronze(bronze_root, spark)
print('-------------------------------------------------')
# bronze_processing.process_olist_order_payments_bronze(bronze_root, spark)
print('-------------------------------------------------')
# bronze_processing.process_olist_order_reviews_bronze(bronze_root, spark)
print('-------------------------------------------------')
bronze_processing.process_olist_products_bronze(bronze_root, spark)
print('-------------------------------------------------')
bronze_processing.process_olist_sellers_bronze(bronze_root, spark)
print('-------------------------------------------------')
bronze_processing.process_product_cat_translation_bronze(bronze_root, spark)
print('-------------------------------------------------')


Processing Olist datasets...

loaded data/olist_customers_dataset.csv  →  99,441 rows


                                                                                

saved bronze: datamart/bronze/customers/bronze_olist_customers.parquet
-------------------------------------------------
loaded data/olist_geolocation_dataset.csv  →  1,000,325 rows


                                                                                

saved bronze: datamart/bronze/geolocation/bronze_olist_geolocation.parquet
-------------------------------------------------
loaded data/olist_order_items_dataset.csv  →  112,650 rows


                                                                                

saved bronze: datamart/bronze/order_items/bronze_olist_order_items.parquet
-------------------------------------------------
-------------------------------------------------
-------------------------------------------------
loaded data/olist_products_dataset.csv  →  32,951 rows


                                                                                

saved bronze: datamart/bronze/products/bronze_olist_products.parquet
-------------------------------------------------
loaded data/olist_sellers_dataset.csv  →  3,095 rows
saved bronze: datamart/bronze/sellers/bronze_olist_sellers.parquet
-------------------------------------------------
loaded data/product_category_name_translation.csv  →  72 rows


                                                                                

saved bronze: datamart/bronze/category_translation/bronze_product_category_translation.parquet
-------------------------------------------------


In [9]:
# Process orders with monthly partitioning
bronze_processing.process_olist_orders_bronze(bronze_root, spark)

                                                                                

Day 19_02_2018: 259 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_02_2018.csv


                                                                                

Day 14_03_2017: 99 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_03_2017.csv


                                                                                

Day 10_06_2017: 77 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_06_2017.csv


                                                                                

Day 06_04_2017: 96 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_04_2017.csv


                                                                                

Day 27_05_2018: 100 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_05_2018.csv


                                                                                

Day 27_08_2017: 119 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_08_2017.csv


                                                                                

Day 10_02_2018: 179 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_02_2018.csv


                                                                                

Day 03_08_2018: 314 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_08_2018.csv


                                                                                

Day 18_10_2017: 179 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_10_2017.csv


                                                                                

Day 27_07_2017: 124 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_07_2017.csv


                                                                                

Day 20_05_2018: 161 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_05_2018.csv


                                                                                

Day 22_08_2018: 187 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_08_2018.csv


                                                                                

Day 02_02_2018: 212 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_02_2018.csv


                                                                                

Day 06_08_2017: 109 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_08_2017.csv


                                                                                

Day 17_03_2017: 65 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_03_2017.csv


                                                                                

Day 16_04_2017: 49 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_04_2017.csv


                                                                                

Day 17_03_2018: 180 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_03_2018.csv


                                                                                

Day 06_10_2017: 130 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_10_2017.csv


                                                                                

Day 13_05_2017: 73 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_05_2017.csv


                                                                                

Day 10_10_2016: 39 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_10_2016.csv


                                                                                

Day 24_07_2017: 162 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_07_2017.csv


                                                                                

Day 01_12_2017: 275 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_12_2017.csv


                                                                                

Day 25_02_2017: 43 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_02_2017.csv


                                                                                

Day 07_06_2017: 129 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_06_2017.csv


                                                                                

Day 17_06_2018: 152 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_06_2018.csv


                                                                                

Day 30_01_2018: 258 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_01_2018.csv


                                                                                

Day 20_08_2017: 104 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_08_2017.csv


                                                                                

Day 27_03_2017: 111 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_03_2017.csv


                                                                                

Day 21_04_2017: 68 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_04_2017.csv


                                                                                

Day 21_05_2017: 148 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_05_2017.csv


                                                                                

Day 25_03_2017: 76 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_03_2017.csv


                                                                                

Day 24_05_2018: 114 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_05_2018.csv


                                                                                

Day 11_04_2018: 274 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_04_2018.csv


                                                                                

Day 11_07_2017: 165 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_07_2017.csv


                                                                                

Day 17_08_2018: 257 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_08_2018.csv


                                                                                

Day 13_07_2018: 168 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_07_2018.csv


                                                                                

Day 12_07_2017: 153 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_07_2017.csv


                                                                                

Day 19_06_2017: 156 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_06_2017.csv


                                                                                

Day 20_04_2018: 197 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_04_2018.csv


                                                                                

Day 19_08_2017: 108 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_08_2017.csv


                                                                                

Day 14_12_2017: 200 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_12_2017.csv


                                                                                

Day 31_05_2017: 129 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_05_2017.csv


                                                                                

Day 14_06_2017: 131 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_06_2017.csv


                                                                                

Day 20_02_2017: 63 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_02_2017.csv


                                                                                

Day 09_05_2017: 128 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_05_2017.csv


                                                                                

Day 07_04_2017: 85 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_04_2017.csv


                                                                                

Day 25_06_2018: 241 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_06_2018.csv


                                                                                

Day 05_05_2018: 197 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_05_2018.csv


                                                                                

Day 30_06_2017: 95 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_06_2017.csv


                                                                                

Day 02_05_2018: 296 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_05_2018.csv


                                                                                

Day 28_11_2017: 380 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_11_2017.csv


                                                                                

Day 21_09_2017: 150 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_09_2017.csv


                                                                                

Day 15_03_2018: 290 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_03_2018.csv


                                                                                

Day 10_08_2018: 256 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_08_2018.csv


                                                                                

Day 09_06_2018: 172 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_06_2018.csv


                                                                                

Day 09_11_2017: 191 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_11_2017.csv


                                                                                

Day 12_04_2017: 63 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_04_2017.csv


                                                                                

Day 16_03_2017: 103 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_03_2017.csv


                                                                                

Day 19_12_2017: 176 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_12_2017.csv


                                                                                

Day 22_03_2017: 105 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_03_2017.csv


                                                                                

Day 02_03_2018: 266 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_03_2018.csv


                                                                                

Day 28_04_2017: 98 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_04_2017.csv


                                                                                

Day 14_10_2017: 116 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_10_2017.csv


                                                                                

Day 18_03_2017: 73 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_03_2017.csv


                                                                                

Day 16_04_2018: 280 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_04_2018.csv


                                                                                

Day 03_12_2017: 234 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_12_2017.csv


                                                                                

Day 06_02_2017: 85 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_02_2017.csv


                                                                                

Day 12_11_2017: 174 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_11_2017.csv


                                                                                

Day 07_04_2018: 164 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_04_2018.csv


                                                                                

Day 04_08_2018: 245 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_08_2018.csv


                                                                                

Day 26_05_2017: 80 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_05_2017.csv


                                                                                

Day 20_10_2017: 124 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_10_2017.csv


                                                                                

Day 01_03_2018: 277 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_03_2018.csv


                                                                                

Day 16_05_2018: 357 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_05_2018.csv


                                                                                

Day 23_10_2017: 161 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_10_2017.csv


                                                                                

Day 08_08_2018: 316 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_08_2018.csv


                                                                                

Day 12_10_2017: 143 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_10_2017.csv


                                                                                

Day 24_03_2018: 165 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_03_2018.csv


                                                                                

Day 07_08_2017: 157 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_08_2017.csv


                                                                                

Day 05_08_2017: 115 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_08_2017.csv


                                                                                

Day 20_11_2017: 230 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_11_2017.csv


                                                                                

Day 25_02_2018: 237 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_02_2018.csv


                                                                                

Day 10_04_2018: 201 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_04_2018.csv


                                                                                

Day 07_01_2018: 196 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_01_2018.csv


                                                                                

Day 03_09_2017: 125 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_09_2017.csv


                                                                                

Day 15_04_2018: 223 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_04_2018.csv


                                                                                

Day 11_02_2018: 172 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_02_2018.csv


                                                                                

Day 01_04_2018: 207 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_04_2018.csv


                                                                                

Day 01_05_2018: 256 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_05_2018.csv


                                                                                

Day 23_05_2017: 125 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_05_2017.csv


                                                                                

Day 05_09_2017: 157 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_09_2017.csv


                                                                                

Day 18_06_2017: 90 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_06_2017.csv


                                                                                

Day 30_07_2018: 288 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_07_2018.csv


                                                                                

Day 07_05_2018: 372 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_05_2018.csv


                                                                                

Day 04_07_2017: 125 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_07_2017.csv


                                                                                

Day 30_07_2017: 117 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_07_2017.csv


                                                                                

Day 26_01_2017: 86 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_01_2017.csv


                                                                                

Day 10_10_2017: 185 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_10_2017.csv


                                                                                

Day 17_08_2017: 158 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_08_2017.csv


                                                                                

Day 05_04_2018: 266 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_04_2018.csv


                                                                                

Day 01_05_2017: 117 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_05_2017.csv


                                                                                

Day 12_09_2017: 206 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_09_2017.csv


                                                                                

Day 05_12_2017: 282 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_12_2017.csv


                                                                                

Day 19_04_2018: 293 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_04_2018.csv


                                                                                

Day 05_03_2018: 264 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_03_2018.csv


                                                                                

Day 21_07_2017: 115 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_07_2017.csv


                                                                                

Day 03_10_2017: 198 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_10_2017.csv


                                                                                

Day 11_01_2018: 266 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_01_2018.csv


                                                                                

Day 17_09_2017: 126 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_09_2017.csv


                                                                                

Day 26_02_2017: 46 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_02_2017.csv


                                                                                

Day 29_06_2018: 174 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_06_2018.csv


                                                                                

Day 12_05_2017: 115 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_05_2017.csv


                                                                                

Day 10_11_2017: 165 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_11_2017.csv


                                                                                

Day 08_07_2018: 122 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_07_2018.csv


                                                                                

Day 10_05_2018: 279 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_05_2018.csv


                                                                                

Day 01_07_2017: 80 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_07_2017.csv


                                                                                

Day 23_08_2017: 112 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_08_2017.csv


                                                                                

Day 19_03_2017: 56 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_03_2017.csv


                                                                                

Day 06_06_2017: 134 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_06_2017.csv


                                                                                

Day 04_04_2018: 257 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_04_2018.csv


                                                                                

Day 19_06_2018: 231 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_06_2018.csv


                                                                                

Day 20_01_2018: 184 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_01_2018.csv


                                                                                

Day 30_08_2018: 4 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_08_2018.csv


                                                                                

Day 06_12_2017: 280 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_12_2017.csv


                                                                                

Day 12_03_2017: 64 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_03_2017.csv


                                                                                

Day 22_05_2017: 148 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_05_2017.csv


                                                                                

Day 18_09_2017: 183 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_09_2017.csv


                                                                                

Day 03_02_2018: 193 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_02_2018.csv


                                                                                

Day 15_12_2017: 192 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_12_2017.csv


                                                                                

Day 18_06_2018: 246 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_06_2018.csv


                                                                                

Day 13_11_2017: 205 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_11_2017.csv


                                                                                

Day 25_07_2017: 153 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_07_2017.csv


                                                                                

Day 21_10_2017: 113 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_10_2017.csv


                                                                                

Day 28_08_2018: 44 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_08_2018.csv


                                                                                

Day 26_03_2017: 64 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_03_2017.csv


                                                                                

Day 04_11_2017: 111 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_11_2017.csv


                                                                                

Day 06_09_2018: 3 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_09_2018.csv


                                                                                

Day 23_01_2017: 39 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_01_2017.csv


                                                                                

Day 01_08_2018: 311 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_08_2018.csv


                                                                                

Day 23_11_2017: 283 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_11_2017.csv


                                                                                

Day 05_05_2017: 111 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_05_2017.csv


                                                                                

Day 22_04_2018: 200 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_04_2018.csv


                                                                                

Day 28_05_2017: 98 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_05_2017.csv


                                                                                

Day 26_02_2018: 299 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_02_2018.csv


                                                                                

Day 22_11_2017: 201 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_11_2017.csv


                                                                                

Day 17_12_2017: 133 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_12_2017.csv


                                                                                

Day 11_05_2018: 247 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_05_2018.csv


                                                                                

Day 16_11_2017: 226 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_11_2017.csv


                                                                                

Day 11_03_2017: 71 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_03_2017.csv


                                                                                

Day 28_07_2017: 132 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_07_2017.csv


                                                                                

Day 19_07_2018: 253 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_07_2018.csv


                                                                                

Day 03_10_2016: 8 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_10_2016.csv


                                                                                

Day 30_12_2017: 97 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_12_2017.csv


                                                                                

Day 08_10_2016: 42 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_10_2016.csv


                                                                                

Day 04_06_2018: 225 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_06_2018.csv


                                                                                

Day 15_05_2018: 352 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_05_2018.csv


                                                                                

Day 24_08_2018: 99 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_08_2018.csv


                                                                                

Day 25_04_2017: 111 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_04_2017.csv


                                                                                

Day 08_05_2018: 331 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_05_2018.csv


                                                                                

Day 28_02_2018: 313 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_02_2018.csv


                                                                                

Day 11_04_2017: 78 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_04_2017.csv


                                                                                

Day 26_03_2018: 272 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_03_2018.csv


                                                                                

Day 06_06_2018: 227 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_06_2018.csv


                                                                                

Day 20_05_2017: 72 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_05_2017.csv


                                                                                

Day 24_10_2017: 164 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_10_2017.csv


                                                                                

Day 26_01_2018: 229 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_01_2018.csv


                                                                                

Day 06_01_2018: 216 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_01_2018.csv


                                                                                

Day 21_06_2018: 234 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_06_2018.csv


                                                                                

Day 22_02_2017: 63 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_02_2017.csv


                                                                                

Day 27_04_2018: 242 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_04_2018.csv


                                                                                

Day 18_08_2017: 147 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_08_2017.csv


                                                                                

Day 29_04_2018: 171 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_04_2018.csv


                                                                                

Day 10_04_2017: 87 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_04_2017.csv


                                                                                

Day 07_07_2018: 91 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_07_2018.csv


                                                                                

Day 04_09_2016: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_09_2016.csv


                                                                                

Day 09_12_2017: 155 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_12_2017.csv


                                                                                

Day 16_08_2018: 320 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_08_2018.csv


                                                                                

Day 03_05_2018: 305 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_05_2018.csv


                                                                                

Day 01_09_2017: 158 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_09_2017.csv


                                                                                

Day 01_08_2017: 165 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_08_2017.csv


                                                                                

Day 23_05_2018: 132 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_05_2018.csv


                                                                                

Day 25_11_2017: 499 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_11_2017.csv


                                                                                

Day 15_06_2017: 108 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_06_2017.csv


                                                                                

Day 11_03_2018: 218 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_03_2018.csv


                                                                                

Day 15_11_2017: 186 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_11_2017.csv


                                                                                

Day 19_09_2017: 154 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_09_2017.csv


                                                                                

Day 21_03_2018: 286 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_03_2018.csv


                                                                                

Day 09_08_2017: 144 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_08_2017.csv


                                                                                

Day 24_09_2017: 118 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_09_2017.csv


                                                                                

Day 04_10_2017: 157 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_10_2017.csv


                                                                                

Day 27_05_2017: 83 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_05_2017.csv


                                                                                

Day 11_01_2017: 12 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_01_2017.csv


                                                                                

Day 07_03_2018: 259 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_03_2018.csv


                                                                                

Day 17_06_2017: 75 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_06_2017.csv


                                                                                

Day 09_07_2017: 94 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_07_2017.csv


                                                                                

Day 13_04_2018: 202 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_04_2018.csv


                                                                                

Day 18_03_2018: 215 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_03_2018.csv


                                                                                

Day 28_01_2018: 156 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_01_2018.csv


                                                                                

Day 30_01_2017: 53 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_01_2017.csv


                                                                                

Day 13_01_2017: 12 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_01_2017.csv


                                                                                

Day 13_06_2017: 126 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_06_2017.csv


                                                                                

Day 18_04_2017: 71 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_04_2017.csv


                                                                                

Day 17_07_2017: 168 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_07_2017.csv


                                                                                

Day 15_02_2017: 62 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_02_2017.csv


                                                                                

Day 08_07_2017: 84 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_07_2017.csv


                                                                                

Day 06_05_2017: 109 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_05_2017.csv


                                                                                

Day 12_03_2018: 232 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_03_2018.csv


                                                                                

Day 22_04_2017: 84 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_04_2017.csv


                                                                                

Day 30_08_2017: 181 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_08_2017.csv


                                                                                

Day 14_05_2017: 112 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_05_2017.csv


                                                                                

Day 22_06_2018: 179 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_06_2018.csv


                                                                                

Day 04_08_2017: 142 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_08_2017.csv


                                                                                

Day 22_01_2018: 314 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_01_2018.csv


                                                                                

Day 23_03_2017: 114 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_03_2017.csv


                                                                                

Day 07_11_2017: 160 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_11_2017.csv


                                                                                

Day 04_02_2017: 67 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_02_2017.csv


                                                                                

Day 05_06_2018: 200 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_06_2018.csv


                                                                                

Day 14_02_2018: 295 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_02_2018.csv


                                                                                

Day 12_01_2017: 13 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_01_2017.csv


                                                                                

Day 26_07_2018: 247 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_07_2018.csv


                                                                                

Day 02_05_2017: 128 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_05_2017.csv


                                                                                

Day 26_04_2018: 254 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_04_2018.csv


                                                                                

Day 26_05_2018: 99 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_05_2018.csv


                                                                                

Day 03_02_2017: 60 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_02_2017.csv


                                                                                

Day 19_11_2017: 158 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_11_2017.csv


                                                                                

Day 13_04_2017: 60 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_04_2017.csv


                                                                                

Day 22_08_2017: 138 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_08_2017.csv


                                                                                

Day 12_12_2017: 260 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_12_2017.csv


                                                                                

Day 26_11_2017: 391 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_11_2017.csv


                                                                                

Day 29_04_2017: 74 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_04_2017.csv


                                                                                

Day 06_07_2017: 154 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_07_2017.csv


                                                                                

Day 05_11_2017: 144 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_11_2017.csv


                                                                                

Day 14_05_2018: 364 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_05_2018.csv


                                                                                

Day 27_12_2017: 167 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_12_2017.csv


                                                                                

Day 27_01_2017: 62 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_01_2017.csv


                                                                                

Day 13_03_2017: 111 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_03_2017.csv


                                                                                

Day 22_07_2018: 215 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_07_2018.csv


                                                                                

Day 11_10_2017: 158 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_10_2017.csv


                                                                                

Day 14_09_2017: 166 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_09_2017.csv


                                                                                

Day 05_03_2017: 74 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_03_2017.csv


                                                                                

Day 02_07_2017: 108 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_07_2017.csv


                                                                                

Day 08_02_2017: 95 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_02_2017.csv


                                                                                

Day 06_11_2017: 193 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_11_2017.csv


                                                                                

Day 08_06_2017: 137 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_06_2017.csv


                                                                                

Day 27_08_2018: 67 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_08_2018.csv


                                                                                

Day 25_05_2017: 108 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_05_2017.csv


                                                                                

Day 24_04_2017: 115 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_04_2017.csv


                                                                                

Day 01_02_2017: 73 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_02_2017.csv


                                                                                

Day 18_01_2018: 241 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_01_2018.csv


                                                                                

Day 29_12_2017: 135 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_12_2017.csv


                                                                                

Day 22_06_2017: 88 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_06_2017.csv


                                                                                

Day 17_09_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_09_2018.csv


                                                                                

Day 13_07_2017: 137 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_07_2017.csv


                                                                                

Day 12_06_2018: 246 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_06_2018.csv


                                                                                

Day 14_08_2018: 316 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_08_2018.csv


                                                                                

Day 20_07_2017: 142 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_07_2017.csv


                                                                                

Day 31_01_2017: 70 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_01_2017.csv


                                                                                

Day 24_01_2017: 40 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_01_2017.csv


                                                                                

Day 13_08_2018: 292 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_08_2018.csv


                                                                                

Day 09_04_2017: 73 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_04_2017.csv


                                                                                

Day 25_04_2018: 284 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_04_2018.csv


                                                                                

Day 23_06_2018: 151 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_06_2018.csv


                                                                                

Day 07_09_2017: 99 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_09_2017.csv


                                                                                

Day 31_08_2017: 149 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_08_2017.csv


                                                                                

Day 12_07_2018: 124 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_07_2018.csv


                                                                                

Day 03_03_2018: 214 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_03_2018.csv


                                                                                

Day 25_01_2018: 233 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_01_2018.csv


                                                                                

Day 28_10_2017: 88 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_10_2017.csv


                                                                                

Day 05_09_2016: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_09_2016.csv


                                                                                

Day 17_05_2017: 141 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_05_2017.csv


                                                                                

Day 25_12_2017: 90 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_12_2017.csv


                                                                                

Day 04_10_2016: 63 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_10_2016.csv


                                                                                

Day 29_01_2017: 35 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_01_2017.csv


                                                                                

Day 22_03_2018: 254 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_03_2018.csv


                                                                                

Day 29_07_2017: 115 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_07_2017.csv


                                                                                

Day 14_04_2017: 54 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_04_2017.csv


                                                                                

Day 28_08_2017: 155 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_08_2017.csv


                                                                                

Day 20_03_2017: 119 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_03_2017.csv


                                                                                

Day 06_07_2018: 121 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_07_2018.csv


                                                                                

Day 17_07_2018: 221 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_07_2018.csv


                                                                                

Day 07_10_2017: 105 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_10_2017.csv


                                                                                

Day 03_03_2017: 74 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_03_2017.csv


                                                                                

Day 14_07_2018: 148 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_07_2018.csv


                                                                                

Day 18_12_2017: 209 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_12_2017.csv


                                                                                

Day 02_09_2017: 104 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_09_2017.csv


                                                                                

Day 24_06_2018: 199 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_06_2018.csv


                                                                                

Day 27_03_2018: 245 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_03_2018.csv


                                                                                

Day 23_03_2018: 221 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_03_2018.csv


                                                                                

Day 27_04_2017: 80 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_04_2017.csv


                                                                                

Day 18_01_2017: 33 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_01_2017.csv


                                                                                

Day 01_07_2018: 161 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_07_2018.csv


                                                                                

Day 14_06_2018: 212 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_06_2018.csv


                                                                                

Day 10_03_2017: 88 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_03_2017.csv


                                                                                

Day 11_07_2018: 127 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_07_2018.csv


                                                                                

Day 03_04_2017: 72 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_04_2017.csv


                                                                                

Day 13_09_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_09_2018.csv


                                                                                

Day 02_07_2018: 195 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_07_2018.csv


                                                                                

Day 30_05_2018: 140 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_05_2018.csv


                                                                                

Day 04_03_2018: 236 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_03_2018.csv


                                                                                

Day 10_07_2017: 143 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_07_2017.csv


                                                                                

Day 29_05_2017: 160 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_05_2017.csv


                                                                                

Day 28_04_2018: 169 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_04_2018.csv


                                                                                

Day 08_04_2018: 185 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_04_2018.csv


                                                                                

Day 15_07_2017: 98 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_07_2017.csv


                                                                                

Day 02_04_2017: 65 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_04_2017.csv


                                                                                

Day 28_03_2017: 103 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_03_2017.csv


                                                                                

Day 16_01_2018: 302 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_01_2018.csv


                                                                                

Day 11_08_2018: 188 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_08_2018.csv


                                                                                

Day 01_10_2017: 128 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_10_2017.csv


                                                                                

Day 25_08_2017: 123 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_08_2017.csv


                                                                                

Day 13_12_2017: 218 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_12_2017.csv


                                                                                

Day 19_05_2018: 139 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_05_2018.csv


                                                                                

Day 02_11_2017: 124 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_11_2017.csv


                                                                                

Day 17_01_2017: 32 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_01_2017.csv


                                                                                

Day 14_08_2017: 159 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_08_2017.csv


                                                                                

Day 11_08_2017: 141 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_08_2017.csv


                                                                                

Day 23_01_2018: 262 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_01_2018.csv


                                                                                

Day 05_07_2017: 145 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_07_2017.csv


                                                                                

Day 14_01_2017: 18 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_01_2017.csv


                                                                                

Day 14_07_2017: 149 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_07_2017.csv


                                                                                

Day 04_02_2018: 201 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_02_2018.csv


                                                                                

Day 20_09_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_09_2018.csv


                                                                                

Day 13_06_2018: 259 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_06_2018.csv


                                                                                

Day 10_05_2017: 116 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_05_2017.csv


                                                                                

Day 30_05_2017: 105 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_05_2017.csv


                                                                                

Day 30_03_2017: 74 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_03_2017.csv


                                                                                

Day 14_01_2018: 235 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_01_2018.csv


                                                                                

Day 06_03_2018: 271 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_03_2018.csv


                                                                                

Day 10_07_2018: 134 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_07_2018.csv


                                                                                

Day 02_10_2017: 143 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_10_2017.csv


                                                                                

Day 22_02_2018: 279 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_02_2018.csv


                                                                                

Day 08_06_2018: 198 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_06_2018.csv


                                                                                

Day 20_09_2017: 163 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_09_2017.csv


                                                                                

Day 25_06_2017: 105 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_06_2017.csv


                                                                                

Day 16_08_2017: 174 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_08_2017.csv


                                                                                

Day 29_08_2018: 14 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_08_2018.csv


                                                                                

Day 27_07_2018: 189 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_07_2018.csv


                                                                                

Day 04_09_2017: 150 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_09_2017.csv


                                                                                

Day 09_07_2018: 148 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_07_2018.csv


                                                                                

Day 26_08_2018: 73 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_08_2018.csv


                                                                                

Day 31_05_2018: 133 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_05_2018.csv


                                                                                

Day 22_10_2017: 158 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_10_2017.csv


                                                                                

Day 15_04_2017: 52 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_04_2017.csv


                                                                                

Day 30_11_2017: 267 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_11_2017.csv


                                                                                

Day 22_12_2017: 111 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_12_2017.csv


                                                                                

Day 09_05_2018: 344 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_05_2018.csv


                                                                                

Day 23_12_2017: 109 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_12_2017.csv


                                                                                

Day 20_04_2017: 98 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_04_2017.csv


                                                                                

Day 23_06_2017: 107 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_06_2017.csv


                                                                                

Day 23_09_2017: 88 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_09_2017.csv


                                                                                

Day 24_07_2018: 271 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_07_2018.csv


                                                                                

Day 17_04_2018: 266 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_04_2018.csv


                                                                                

Day 14_03_2018: 199 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_03_2018.csv


                                                                                

Day 30_10_2017: 149 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_10_2017.csv


                                                                                

Day 16_05_2017: 153 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_05_2017.csv


                                                                                

Day 01_02_2018: 232 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_02_2018.csv


                                                                                

Day 15_01_2018: 307 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_01_2018.csv


                                                                                

Day 24_02_2017: 64 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_02_2017.csv


                                                                                

Day 17_05_2018: 228 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_05_2018.csv


                                                                                

Day 29_01_2018: 246 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_01_2018.csv


                                                                                

Day 10_06_2018: 193 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_06_2018.csv


                                                                                

Day 21_03_2017: 97 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_03_2017.csv


                                                                                

Day 23_04_2017: 88 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_04_2017.csv


                                                                                

Day 13_02_2018: 224 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_02_2018.csv


                                                                                

Day 29_03_2018: 194 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_03_2018.csv


                                                                                

Day 13_02_2017: 78 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_02_2017.csv


                                                                                

Day 10_12_2017: 190 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_12_2017.csv


                                                                                

Day 21_08_2017: 177 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_08_2017.csv


                                                                                

Day 07_12_2017: 229 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_12_2017.csv


                                                                                

Day 10_03_2018: 193 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_03_2018.csv


                                                                                

Day 19_07_2017: 153 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_07_2017.csv


                                                                                

Day 20_01_2017: 29 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_01_2017.csv


                                                                                

Day 09_01_2018: 252 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_01_2018.csv


                                                                                

Day 28_06_2018: 243 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_06_2018.csv


                                                                                

Day 30_06_2018: 124 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_06_2018.csv


                                                                                

Day 30_04_2018: 240 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_04_2018.csv


                                                                                

Day 21_05_2018: 206 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_05_2018.csv


                                                                                

Day 20_03_2018: 297 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_03_2018.csv


                                                                                

Day 02_06_2018: 142 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_06_2018.csv


                                                                                

Day 20_06_2017: 94 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_06_2017.csv


                                                                                

Day 24_05_2017: 142 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_05_2017.csv


                                                                                

Day 14_02_2017: 76 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_02_2017.csv


                                                                                

Day 13_10_2017: 154 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_10_2017.csv


                                                                                

Day 15_08_2017: 194 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_08_2017.csv


                                                                                

Day 05_10_2016: 47 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_10_2016.csv


                                                                                

Day 25_09_2017: 136 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_09_2017.csv


                                                                                

Day 08_04_2017: 68 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_04_2017.csv


                                                                                

Day 19_01_2017: 29 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_01_2017.csv


                                                                                

Day 16_02_2018: 214 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_02_2018.csv


                                                                                

Day 08_08_2017: 146 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_08_2017.csv


                                                                                

Day 06_10_2016: 51 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_10_2016.csv


                                                                                

Day 28_07_2018: 176 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_07_2018.csv


                                                                                

Day 13_01_2018: 219 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_01_2018.csv


                                                                                

Day 07_02_2017: 112 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_02_2017.csv


                                                                                

Day 23_07_2017: 105 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_07_2017.csv


                                                                                

Day 12_08_2017: 98 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_08_2017.csv


                                                                                

Day 04_04_2017: 96 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_04_2017.csv


                                                                                

Day 15_10_2017: 121 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_10_2017.csv


                                                                                

Day 25_08_2018: 69 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_08_2018.csv


                                                                                

Day 24_08_2017: 128 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_08_2017.csv


                                                                                

Day 09_08_2018: 289 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_08_2018.csv


                                                                                

Day 13_03_2018: 227 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_03_2018.csv


                                                                                

Day 05_06_2017: 147 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_06_2017.csv


                                                                                

Day 12_02_2017: 64 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_02_2017.csv


                                                                                

Day 05_10_2017: 140 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_10_2017.csv


                                                                                

Day 24_04_2018: 271 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_04_2018.csv


                                                                                

Day 01_11_2017: 111 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_11_2017.csv


                                                                                

Day 09_02_2018: 216 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_02_2018.csv


                                                                                

Day 11_05_2017: 126 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_05_2017.csv


                                                                                

Day 03_11_2017: 143 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_11_2017.csv


                                                                                

Day 21_08_2018: 243 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_08_2018.csv


                                                                                

Day 16_12_2017: 131 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_12_2017.csv


                                                                                

Day 15_06_2018: 186 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_06_2018.csv


                                                                                

Day 04_03_2017: 80 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_03_2017.csv


                                                                                

Day 12_02_2018: 218 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_02_2018.csv


                                                                                

Day 25_03_2018: 190 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_03_2018.csv


                                                                                

Day 09_06_2017: 108 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_06_2017.csv


                                                                                

Day 21_06_2017: 35 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_06_2017.csv


                                                                                

Day 22_09_2017: 138 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_09_2017.csv


                                                                                

Day 17_02_2018: 204 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_02_2018.csv


                                                                                

Day 01_06_2018: 184 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_06_2018.csv


                                                                                

Day 02_04_2018: 282 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_04_2018.csv


                                                                                

Day 01_01_2018: 74 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_01_2018.csv


                                                                                

Day 15_03_2017: 108 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_03_2017.csv


                                                                                

Day 16_01_2017: 19 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_01_2017.csv


                                                                                

Day 05_01_2018: 210 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_01_2018.csv


                                                                                

Day 30_09_2017: 109 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_09_2017.csv


                                                                                

Day 10_02_2017: 66 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_02_2017.csv


                                                                                

Day 06_02_2018: 268 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_02_2018.csv


                                                                                

Day 22_07_2017: 88 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_07_2017.csv


                                                                                

Day 12_01_2018: 243 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_01_2018.csv


                                                                                

Day 08_11_2017: 175 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_11_2017.csv


                                                                                

Day 02_02_2017: 69 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_02_2017.csv


                                                                                

Day 08_01_2017: 6 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_01_2017.csv


                                                                                

Day 01_06_2017: 124 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_06_2017.csv


                                                                                

Day 15_01_2017: 14 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_01_2017.csv


                                                                                

Day 26_08_2017: 93 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_08_2017.csv


                                                                                

Day 04_05_2018: 265 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_05_2018.csv


                                                                                

Day 07_08_2018: 370 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_08_2018.csv


                                                                                

Day 03_08_2017: 148 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_08_2017.csv


                                                                                

Day 19_04_2017: 97 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_04_2017.csv


                                                                                

Day 18_02_2018: 202 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_02_2018.csv


                                                                                

Day 08_12_2017: 219 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_12_2017.csv


                                                                                

Day 08_03_2018: 234 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_03_2018.csv


                                                                                

Day 11_09_2017: 180 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_09_2017.csv


                                                                                

Day 18_04_2018: 280 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_04_2018.csv


                                                                                

Day 17_02_2017: 46 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_02_2017.csv


                                                                                

Day 28_12_2017: 146 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_12_2017.csv


                                                                                

Day 15_09_2017: 150 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_09_2017.csv


                                                                                

Day 02_08_2018: 302 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_08_2018.csv


                                                                                

Day 23_04_2018: 285 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_04_2018.csv


                                                                                

Day 26_10_2017: 144 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_10_2017.csv


                                                                                

Day 21_12_2017: 143 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_12_2017.csv


                                                                                

Day 03_04_2018: 247 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_04_2018.csv


                                                                                

Day 15_05_2017: 157 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_05_2017.csv


                                                                                

Day 05_04_2017: 96 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_04_2017.csv


                                                                                

Day 16_09_2017: 129 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_09_2017.csv


                                                                                

Day 07_02_2018: 248 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_02_2018.csv


                                                                                

Day 06_08_2018: 372 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_08_2018.csv


                                                                                

Day 06_05_2018: 211 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_05_2018.csv


                                                                                

Day 16_02_2017: 55 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_02_2017.csv


                                                                                

Day 05_01_2017: 32 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_01_2017.csv


                                                                                

Day 12_08_2018: 197 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_08_2018.csv


                                                                                

Day 24_03_2017: 79 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_03_2017.csv


                                                                                

Day 29_10_2017: 132 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_10_2017.csv


                                                                                

Day 10_08_2017: 157 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_08_2017.csv


                                                                                

Day 09_03_2017: 93 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_03_2017.csv


                                                                                

Day 10_01_2018: 277 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_01_2018.csv


                                                                                

Day 24_06_2017: 76 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_06_2017.csv


                                                                                

Day 29_08_2017: 137 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_08_2017.csv


                                                                                

Day 17_04_2017: 65 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_04_2017.csv


                                                                                

Day 09_04_2018: 253 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_04_2018.csv


                                                                                

Day 08_05_2017: 124 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_05_2017.csv


                                                                                

Day 13_09_2017: 207 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_09_2017.csv


                                                                                

Day 12_04_2018: 255 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_04_2018.csv


                                                                                

Day 06_09_2017: 140 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_09_2017.csv


                                                                                

Day 14_04_2018: 146 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_04_2018.csv


                                                                                

Day 31_03_2017: 70 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_03_2017.csv


                                                                                

Day 21_04_2018: 156 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_04_2018.csv


                                                                                

Day 12_06_2017: 126 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_06_2017.csv


                                                                                

Day 11_11_2017: 159 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_11_2017.csv


                                                                                

Day 29_05_2018: 149 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_05_2018.csv


                                                                                

Day 01_04_2017: 68 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_04_2017.csv


                                                                                

Day 26_09_2017: 180 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_09_2017.csv


                                                                                

Day 21_01_2017: 24 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_01_2017.csv


                                                                                

Day 22_01_2017: 31 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_01_2017.csv


                                                                                

Day 12_05_2018: 207 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_05_2018.csv


                                                                                

Day 14_11_2017: 192 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_14_11_2017.csv


                                                                                

Day 07_07_2017: 121 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_07_2017.csv


                                                                                

Day 08_09_2017: 116 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_09_2017.csv


                                                                                

Day 21_02_2017: 41 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_02_2017.csv


                                                                                

Day 21_11_2017: 228 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_11_2017.csv


                                                                                

Day 07_03_2017: 82 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_03_2017.csv


                                                                                

Day 01_03_2017: 99 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_03_2017.csv


                                                                                

Day 27_02_2017: 43 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_02_2017.csv


                                                                                

Day 31_07_2017: 148 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_07_2017.csv


                                                                                

Day 04_07_2018: 256 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_07_2018.csv


                                                                                

Day 03_05_2017: 103 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_05_2017.csv


                                                                                

Day 07_05_2017: 111 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_05_2017.csv


                                                                                

Day 02_12_2017: 216 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_12_2017.csv


                                                                                

Day 05_02_2018: 271 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_02_2018.csv


                                                                                

Day 29_06_2017: 114 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_06_2017.csv


                                                                                

Day 31_07_2018: 322 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_07_2018.csv


                                                                                

Day 15_08_2018: 288 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_08_2018.csv


                                                                                

Day 05_02_2017: 75 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_02_2017.csv


                                                                                

Day 19_05_2017: 144 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_05_2017.csv


                                                                                

Day 17_10_2017: 202 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_10_2017.csv


                                                                                

Day 31_10_2017: 160 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_10_2017.csv


                                                                                

Day 27_10_2017: 133 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_10_2017.csv


                                                                                

Day 19_02_2017: 40 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_02_2017.csv


                                                                                

Day 07_06_2018: 223 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_06_2018.csv


                                                                                

Day 09_10_2017: 193 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_10_2017.csv


                                                                                

Day 04_12_2017: 337 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_12_2017.csv


                                                                                

Day 19_01_2018: 235 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_01_2018.csv


                                                                                

Day 03_07_2018: 216 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_07_2018.csv


                                                                                

Day 26_06_2017: 72 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_06_2017.csv


                                                                                

Day 04_01_2018: 258 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_01_2018.csv


                                                                                

Day 02_03_2017: 72 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_03_2017.csv


                                                                                

Day 04_05_2017: 109 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_05_2017.csv


                                                                                

Day 31_01_2018: 256 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_01_2018.csv


                                                                                

Day 25_01_2017: 63 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_01_2017.csv


                                                                                

Day 17_01_2018: 282 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_01_2018.csv


                                                                                

Day 02_08_2017: 157 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_08_2017.csv


                                                                                

Day 18_07_2017: 192 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_07_2017.csv


                                                                                

Day 09_03_2018: 204 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_03_2018.csv


                                                                                

Day 31_03_2018: 168 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_03_2018.csv


                                                                                

Day 02_01_2018: 204 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_01_2018.csv


                                                                                

Day 29_11_2017: 323 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_11_2017.csv


                                                                                

Day 11_02_2017: 49 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_02_2017.csv


                                                                                

Day 03_01_2018: 225 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_01_2018.csv


                                                                                

Day 22_05_2018: 199 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_05_2018.csv


                                                                                

Day 19_08_2018: 204 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_08_2018.csv


                                                                                

Day 11_06_2018: 294 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_06_2018.csv


                                                                                

Day 16_07_2017: 114 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_07_2017.csv


                                                                                

Day 24_01_2018: 244 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_01_2018.csv


                                                                                

Day 09_02_2017: 77 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_02_2017.csv


                                                                                

Day 13_09_2016: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_09_2016.csv


                                                                                

Day 08_01_2018: 293 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_01_2018.csv


                                                                                

Day 19_10_2017: 176 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_10_2017.csv


                                                                                

Day 26_04_2017: 125 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_04_2017.csv


                                                                                

Day 16_06_2018: 165 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_06_2018.csv


                                                                                

Day 18_11_2017: 149 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_11_2017.csv


                                                                                

Day 17_11_2017: 197 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_11_2017.csv


                                                                                

Day 18_05_2018: 236 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_05_2018.csv


                                                                                

Day 20_06_2018: 217 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_06_2018.csv


                                                                                

Day 28_03_2018: 220 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_03_2018.csv


                                                                                

Day 28_09_2017: 143 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_09_2017.csv


                                                                                

Day 18_05_2017: 125 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_05_2017.csv


                                                                                

Day 04_06_2017: 102 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_04_06_2017.csv


                                                                                

Day 26_07_2017: 124 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_07_2017.csv


                                                                                

Day 06_03_2017: 105 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_03_2017.csv


                                                                                

Day 03_06_2018: 192 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_06_2018.csv


                                                                                

Day 08_03_2017: 79 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_03_2017.csv


                                                                                

Day 25_07_2018: 268 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_07_2018.csv


                                                                                

Day 21_02_2018: 266 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_02_2018.csv


                                                                                

Day 28_01_2017: 29 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_01_2017.csv


                                                                                

Day 18_07_2018: 307 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_07_2018.csv


                                                                                

Day 27_11_2017: 403 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_11_2017.csv


                                                                                

Day 02_06_2017: 127 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_06_2017.csv


                                                                                

Day 19_03_2018: 303 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_19_03_2018.csv


                                                                                

Day 13_08_2017: 96 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_08_2017.csv


                                                                                

Day 21_07_2018: 187 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_07_2018.csv


                                                                                

Day 08_10_2017: 126 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_10_2017.csv


                                                                                

Day 18_08_2018: 198 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_08_2018.csv


                                                                                

Day 15_07_2018: 152 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_07_2018.csv


                                                                                

Day 11_12_2017: 267 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_12_2017.csv


                                                                                

Day 28_06_2017: 120 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_06_2017.csv


                                                                                

Day 29_09_2017: 121 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_09_2017.csv


                                                                                

Day 25_05_2018: 104 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_05_2018.csv


                                                                                

Day 16_06_2017: 97 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_06_2017.csv


                                                                                

Day 13_05_2018: 207 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_13_05_2018.csv


                                                                                

Day 26_06_2018: 243 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_06_2018.csv


                                                                                

Day 25_10_2017: 156 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_10_2017.csv


                                                                                

Day 24_11_2017: 1176 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_11_2017.csv


                                                                                

Day 27_06_2017: 141 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_06_2017.csv


                                                                                

Day 09_09_2017: 106 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_09_2017.csv


                                                                                

Day 31_12_2017: 74 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_12_2017.csv


                                                                                

Day 05_08_2018: 276 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_08_2018.csv


                                                                                

Day 27_01_2018: 153 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_01_2018.csv


                                                                                

Day 27_06_2018: 215 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_06_2018.csv


                                                                                

Day 20_02_2018: 289 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_02_2018.csv


                                                                                

Day 05_07_2018: 195 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_05_07_2018.csv


                                                                                

Day 20_12_2017: 171 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_12_2017.csv


                                                                                

Day 24_12_2017: 59 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_12_2017.csv


                                                                                

Day 08_02_2018: 230 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_08_02_2018.csv


                                                                                

Day 23_02_2018: 235 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_02_2018.csv


                                                                                

Day 29_03_2017: 74 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_03_2017.csv


                                                                                

Day 23_08_2018: 144 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_08_2018.csv


                                                                                

Day 28_02_2017: 55 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_02_2017.csv


                                                                                

Day 23_02_2017: 59 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_02_2017.csv


                                                                                

Day 24_02_2018: 191 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_24_02_2018.csv


                                                                                

Day 03_07_2017: 118 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_07_2017.csv


                                                                                

Day 18_02_2017: 54 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_18_02_2017.csv


                                                                                

Day 03_06_2017: 89 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_06_2017.csv


                                                                                

Day 26_12_2017: 168 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_12_2017.csv


                                                                                

Day 16_10_2017: 195 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_10_2017.csv


                                                                                

Day 10_09_2017: 136 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_09_2017.csv


                                                                                

Day 21_01_2018: 199 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_21_01_2018.csv


                                                                                

Day 06_04_2018: 189 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_04_2018.csv


                                                                                

Day 16_07_2018: 245 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_07_2018.csv


                                                                                

Day 09_10_2016: 26 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_10_2016.csv


                                                                                

Day 07_01_2017: 4 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_01_2017.csv


                                                                                

Day 27_02_2018: 298 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_02_2018.csv


                                                                                

Day 23_07_2018: 307 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_07_2018.csv


                                                                                

Day 30_04_2017: 68 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_04_2017.csv


                                                                                

Day 07_10_2016: 46 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_07_10_2016.csv


                                                                                

Day 29_07_2018: 188 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_07_2018.csv


                                                                                

Day 11_09_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_09_2018.csv


                                                                                

Day 27_09_2017: 147 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_27_09_2017.csv


                                                                                

Day 30_03_2018: 165 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_30_03_2018.csv


                                                                                

Day 28_05_2018: 143 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_28_05_2018.csv


                                                                                

Day 11_06_2017: 115 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_11_06_2017.csv


                                                                                

Day 16_03_2018: 252 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_03_2018.csv


                                                                                

Day 20_07_2018: 250 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_07_2018.csv


                                                                                

Day 10_09_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_09_2018.csv


                                                                                

Day 15_09_2016: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_09_2016.csv


                                                                                

Day 03_09_2018: 4 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_09_2018.csv


                                                                                

Day 06_01_2017: 4 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_06_01_2017.csv


                                                                                

Day 10_01_2017: 6 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_10_01_2017.csv


                                                                                

Day 23_12_2016: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_23_12_2016.csv


                                                                                

Day 25_09_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_25_09_2018.csv


                                                                                

Day 03_10_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_03_10_2018.csv


                                                                                

Day 09_01_2017: 5 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_09_01_2017.csv


                                                                                

Day 26_09_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_26_09_2018.csv


                                                                                

Day 17_10_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_17_10_2018.csv


                                                                                

Day 16_10_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_16_10_2018.csv


                                                                                

Day 22_10_2016: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_22_10_2016.csv


                                                                                

Day 29_09_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_29_09_2018.csv


                                                                                

Day 01_10_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_01_10_2018.csv


                                                                                

Day 12_09_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_12_09_2018.csv


                                                                                

Day 02_10_2016: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_02_10_2016.csv


                                                                                

Day 31_08_2018: 1 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_31_08_2018.csv


                                                                                

Day 20_08_2018: 256 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_20_08_2018.csv


                                                                                

Day 15_02_2018: 283 rows
Saved to: datamart/bronze/orders/bronze_olist_orders_15_02_2018.csv


DataFrame[order_id: string, customer_id: string, order_status: string, order_purchase_timestamp: timestamp, order_approved_at: timestamp, order_delivered_carrier_date: timestamp, order_delivered_customer_date: timestamp, order_estimated_delivery_date: timestamp, snapshot_date: string]

In [4]:
# Inspect some output

df_bronze = spark.read.parquet("datamart/bronze/customers/bronze_olist_customers.parquet")
df_bronze.show(5)

+--------------------+--------------------+------------------------+--------------------+--------------+
|         customer_id|  customer_unique_id|customer_zip_code_prefix|       customer_city|customer_state|
+--------------------+--------------------+------------------------+--------------------+--------------+
|503840d4f2a1a7609...|ffc4233210eac4ec1...|                   14811|          araraquara|            SP|
|52e73a5d0a1d4c56b...|b43530186123fb6d9...|                   62625|               missi|            CE|
|16cb62869f9719571...|c3cc321141423ab8a...|                   55560|           barreiros|            PE|
|4979ba0e6037e4b28...|80768413a59684f1e...|                   29307|cachoeiro de itap...|            ES|
|11ec4bc0610184925...|bd836cf4fce7f808b...|                   22420|      rio de janeiro|            RJ|
+--------------------+--------------------+------------------------+--------------------+--------------+
only showing top 5 rows


## Build Silver Table

Important note: There is some discrepancy in where the datamart folder is created when the main.py script is run vs this Jupyter notebook is run.

* This Jupyter notebook will create the datamart folder inside `scripts` folder and output the silver tables there.
* When you run the main.py script, the datamart folder will be created inside `app` folder (i.e. root) and output the silver tables there.

Need to have team meeting to resolve this

In [5]:
# Create silver root directory
silver_root = "datamart/silver"
os.makedirs(silver_root, exist_ok=True)
print(f"Silver root directory: {silver_root}")

Silver root directory: datamart/silver


In [6]:
# Create all required output directories

# Create silver directory to save customer data
silver_cust_directory = "datamart/silver/customers/"
if not os.path.exists(silver_cust_directory):
    os.makedirs(silver_cust_directory)

# Create silver directory to save seller data
silver_sell_directory = "datamart/silver/sellers/"
if not os.path.exists(silver_sell_directory):
    os.makedirs(silver_sell_directory)

# Create silver directory to save geolocation data
silver_geo_directory = "datamart/silver/geolocation/"
if not os.path.exists(silver_geo_directory):
    os.makedirs(silver_geo_directory)


# Create silver directory to save products data
silver_prod_directory = "datamart/silver/products/"
if not os.path.exists(silver_prod_directory):
    os.makedirs(silver_prod_directory)

# Create silver directory to save product_categories_translation data
silver_prod_cat_trans_directory = "datamart/silver/category_translation/"
if not os.path.exists(silver_prod_cat_trans_directory):
    os.makedirs(silver_prod_cat_trans_directory)

# Create silver directory to save orders data
silver_orders_directory = "datamart/silver/orders/"
if not os.path.exists(silver_orders_directory):
    os.makedirs(silver_orders_directory)

# Create silver directory to save order_items data
silver_order_items_directory = "datamart/silver/order_items/"
if not os.path.exists(silver_order_items_directory):
    os.makedirs(silver_order_items_directory)

In [7]:
# Process all bronze tables into silver
print("\nProcessing bronze tables...")
silver_processing.process_silver_olist_customers("datamart/bronze/customers/",silver_cust_directory, spark)
silver_processing.process_silver_olist_sellers("datamart/bronze/sellers/",silver_sell_directory, spark)
silver_processing.process_silver_olist_geolocation("datamart/bronze/geolocation/",silver_geo_directory, spark)
# silver_processing.process_silver_olist_products("datamart/bronze/products/",silver_prod_directory, spark)
# silver_processing.process_silver_olist_order_items("datamart/bronze/order_items/",silver_order_items_directory, spark)
# silver_processing.process_silver_olist_product_categories_translation("datamart/bronze/category_translation",silver_prod_cat_trans_directory, spark)
# silver_processing.process_silver_olist_orders("datamart/bronze/orders/",silver_orders_directory, spark)



Processing bronze tables...
loaded from: datamart/bronze/customers/bronze_olist_customers.parquet row count: 99441
Number of duplicated 'customer_id': 0


                                                                                

saved to: datamart/silver/customers/silver_olist_customers.parquet
loaded from: datamart/bronze/sellers/bronze_olist_sellers.parquet row count: 3095
Number of duplicated 'seller_id': 0
saved to: datamart/silver/sellers/silver_olist_sellers.parquet
loaded from: datamart/bronze/geolocation/bronze_olist_geolocation.parquet row count: 1000325


                                                                                

saved to: datamart/silver/geolocation/silver_olist_geolocation.parquet


DataFrame[geolocation_zip_code_prefix: string, geolocation_lat: double, geolocation_lng: double]

In [8]:
# Inspect some output
df_silver = spark.read.parquet("datamart/silver/geolocation/silver_olist_geolocation.parquet")
df_silver.show(5)

+---------------------------+-------------------+-------------------+
|geolocation_zip_code_prefix|    geolocation_lat|    geolocation_lng|
+---------------------------+-------------------+-------------------+
|                      49290|-11.274805005391439|-37.790795516967776|
|                      49630|-10.605308055877686|-37.113027572631836|
|                      55445|   -8.5616774559021|  -35.8295783996582|
|                      57051| -9.655002400681779| -35.73440123893119|
|                      57085| -9.558634171119103| -35.73914117079515|
+---------------------------+-------------------+-------------------+
only showing top 5 rows


### Build Customer Table

In [9]:
# Create silver directory to save customer data
silver_cust_directory = "datamart/silver/customers/"
if not os.path.exists(silver_cust_directory):
    os.makedirs(silver_cust_directory)

In [10]:
def process_silver_olist_customers(bronze_directory, silver_directory, spark):
    
    # connect to bronze table
    partition_name = "bronze_olist_customers.parquet"
    filepath = bronze_directory + partition_name
    df = spark.read.parquet(filepath)
    print('loaded from:', filepath, 'row count:', df.count())

    # clean data: enforce schema / data type
    # Dictionary specifying columns and their desired datatypes
    column_type_map = {
        "customer_id": StringType(),
        "customer_unique_id": StringType(),
        "customer_zip_code_prefix": StringType(),
        "customer_city": StringType(),
        "customer_state": StringType(),
    }

    for column, new_type in column_type_map.items():
        df = df.withColumn(column, col(column).cast(new_type))

    # Check customer_id duplicates (total rows - distinct ids)
    total_rows = df.count()
    distinct_rows = df.select("customer_id").distinct().count()
    duplicates_customer_id = total_rows - distinct_rows
    print(f"Number of duplicated 'customer_id': {duplicates_customer_id}")

    # Add missing leading zero
    df = df.withColumn(
        "customer_zip_code_prefix",
        F.lpad(col("customer_zip_code_prefix"), 5, "0")
    )
    
    # save silver table - IRL connect to database to write
    partition_name = "silver_olist_customers.parquet"
    filepath = silver_directory + partition_name
    df.write.mode("overwrite").parquet(filepath)
    print('saved to:', filepath)
    
    return df

In [11]:
# Run function manually to test
df = process_silver_olist_customers("datamart/bronze/customers/",silver_cust_directory, spark)

loaded from: datamart/bronze/customers/bronze_olist_customers.parquet row count: 99441
Number of duplicated 'customer_id': 0
saved to: datamart/silver/customers/silver_olist_customers.parquet


In [12]:
# Check schema enforced
df.printSchema()

root
 |-- customer_id: string (nullable = true)
 |-- customer_unique_id: string (nullable = true)
 |-- customer_zip_code_prefix: string (nullable = true)
 |-- customer_city: string (nullable = true)
 |-- customer_state: string (nullable = true)



In [13]:
# Check missing leading zero padded
df.groupBy(F.length("customer_zip_code_prefix").alias("length")).count().show()

+------+-----+
|length|count|
+------+-----+
|     5|99441|
+------+-----+



### Build Seller Table

In [14]:
# Create silver directory to save seller data
silver_sell_directory = "datamart/silver/sellers/"
if not os.path.exists(silver_sell_directory):
    os.makedirs(silver_sell_directory)

In [15]:
def process_silver_olist_sellers(bronze_directory, silver_directory, spark):
    
    # connect to bronze table
    partition_name = "bronze_olist_sellers.parquet"
    filepath = bronze_directory + partition_name
    df = spark.read.parquet(filepath)
    print('loaded from:', filepath, 'row count:', df.count())

    # clean data: enforce schema / data type
    # Dictionary specifying columns and their desired datatypes
    column_type_map = {
        "seller_id": StringType(),
        "seller_zip_code_prefix": StringType(),
        "seller_city": StringType(),
        "seller_state": StringType(),
    }

    for column, new_type in column_type_map.items():
        df = df.withColumn(column, col(column).cast(new_type))

    # Check seller_id duplicates (total rows - distinct ids)
    total_rows = df.count()
    distinct_rows = df.select("seller_id").distinct().count()
    duplicates_seller_id = total_rows - distinct_rows
    print(f"Number of duplicated 'seller_id': {duplicates_seller_id}")

    # Add missing leading zero
    df = df.withColumn(
        "seller_zip_code_prefix",
        F.lpad(col("seller_zip_code_prefix"), 5, "0")
    )
    
    # save silver table - IRL connect to database to write
    partition_name = "silver_olist_sellers.parquet"
    filepath = silver_directory + partition_name
    df.write.mode("overwrite").parquet(filepath)
    print('saved to:', filepath)
    
    return df

In [16]:
# Run function manually to test
df = process_silver_olist_sellers("datamart/bronze/sellers/",silver_sell_directory, spark)

loaded from: datamart/bronze/sellers/bronze_olist_sellers.parquet row count: 3095
Number of duplicated 'seller_id': 0
saved to: datamart/silver/sellers/silver_olist_sellers.parquet


In [17]:
# Check schema enforced
df.printSchema()

root
 |-- seller_id: string (nullable = true)
 |-- seller_zip_code_prefix: string (nullable = true)
 |-- seller_city: string (nullable = true)
 |-- seller_state: string (nullable = true)



In [18]:
# Check missing leading zero padded
df.groupBy(F.length("seller_zip_code_prefix").alias("length")).count().show()

+------+-----+
|length|count|
+------+-----+
|     5| 3095|
+------+-----+



### Build Geolocation Table

In [19]:
# Create silver directory to save geolocation data
silver_geo_directory = "datamart/silver/geolocation/"
if not os.path.exists(silver_geo_directory):
    os.makedirs(silver_geo_directory)

In [20]:
def process_silver_olist_geolocation(bronze_directory, silver_directory, spark):
    
    # connect to bronze table
    partition_name = "bronze_olist_geolocation.parquet"
    filepath = bronze_directory + partition_name
    df = spark.read.parquet(filepath)
    print('loaded from:', filepath, 'row count:', df.count())

    # clean data: enforce schema / data type
    # Dictionary specifying columns and their desired datatypes
    column_type_map = {
        "geolocation_zip_code_prefix": StringType(),
        "geolocation_lat": FloatType(),
        "geolocation_lng": FloatType(),
        "geolocation_city": StringType(),
        "geolocation_state": StringType(),
    }

    for column, new_type in column_type_map.items():
        df = df.withColumn(column, col(column).cast(new_type))

    # Add missing leading zero
    df = df.withColumn(
        "geolocation_zip_code_prefix",
        F.lpad(col("geolocation_zip_code_prefix"), 5, "0")
    )

    # Deduplicate zipcodes by just taking the centroid (mean of lat,lng)
    df_dedupe = df.groupBy("geolocation_zip_code_prefix").agg(
        F.avg("geolocation_lat").alias("geolocation_lat"),
        F.avg("geolocation_lng").alias("geolocation_lng")
    )
    
    # save silver table - IRL connect to database to write
    partition_name = "silver_olist_geolocation.parquet"
    filepath = silver_directory + partition_name
    df.write.mode("overwrite").parquet(filepath)
    print('saved to:', filepath)
    
    return df_dedupe

In [21]:
# Run function manually to test
df = process_silver_olist_geolocation("datamart/bronze/geolocation/",silver_geo_directory, spark)

loaded from: datamart/bronze/geolocation/bronze_olist_geolocation.parquet row count: 1000325


                                                                                

saved to: datamart/silver/geolocation/silver_olist_geolocation.parquet


In [22]:
# Check schema enforced
df.printSchema()

root
 |-- geolocation_zip_code_prefix: string (nullable = true)
 |-- geolocation_lat: double (nullable = true)
 |-- geolocation_lng: double (nullable = true)



In [23]:
# Check missing leading zero padded
df.groupBy(F.length("geolocation_zip_code_prefix").alias("length")).count().show()

+------+-----+
|length|count|
+------+-----+
|     5|19177|
+------+-----+



In [24]:
# Check every geolocation_zip_code_prefix only has 1 count. Group by prefix and count occurrences
df.groupBy("geolocation_zip_code_prefix") \
    .agg(F.count("*").alias("count")) \
    .filter("count > 1") \
    .show()

+---------------------------+-----+
|geolocation_zip_code_prefix|count|
+---------------------------+-----+
+---------------------------+-----+



### Build Products Table

In [None]:
# Create silver directory to save products data
silver_prod_directory = "datamart/silver/products/"
if not os.path.exists(silver_prod_directory):
    os.makedirs(silver_prod_directory)

In [None]:
def process_silver_olist_products(bronze_directory, silver_directory, spark):
    
    # connect to bronze table
    partition_name = "bronze_olist_products.parquet"
    filepath = bronze_directory + partition_name
    df = spark.read.parquet(filepath)
    print('loaded from:', filepath, 'row count:', df.count())

    # Rename columns due to spelling mistakes 
    df = df.withColumnRenamed("product_name_lenght", "product_name_length") \
           .withColumnRenamed("product_description_lenght", "product_description_length")

    
    # clean data: enforce schema / data type
    # Dictionary specifying columns and their desired datatypes
    column_type_map = {
        "product_id": StringType(),
        "product_category_name": StringType(),
        "product_name_length": DoubleType(),
        "product_description_length": DoubleType(),
        "product_photos_qty": DoubleType(),
        "product_weight_g": DoubleType(),
        "product_length_cm": DoubleType(),
        "product_height_cm": DoubleType(),
        "product_width_cm": DoubleType(),
    }

    for column, new_type in column_type_map.items():
        df = df.withColumn(column, col(column).cast(new_type))

    # Inputting missing values as NaN
    df = df.fillna({"product_category_name": "NaN"})
    df = df.fillna({"product_name_length": float('nan')}) 
    df = df.fillna({"product_description_length": float('nan')}) 
    df = df.fillna({"product_photos_qty": float('nan')}) 
    
    # Check product_id duplicates (total rows - distinct ids)
    total_rows = df.count()
    distinct_rows = df.select("product_id").distinct().count()
    duplicates_product_id = total_rows - distinct_rows
    print(f"Number of duplicated 'product_id': {duplicates_product_id}")


    
    # save silver table - IRL connect to database to write
    partition_name = "silver_olist_products.parquet"
    filepath = silver_directory + partition_name
    df.write.mode("overwrite").parquet(filepath)
    print('saved to:', filepath)
    
    return df

In [None]:
# Run function manually to test
# I inputted the bronze_directory manually (amend after our path discrepancies are resolved)
df = process_silver_olist_products("datamart/bronze/products/",silver_prod_directory, spark)

loaded from: datamart/bronze/products/bronze_olist_products.parquet row count: 32951
Number of duplicated 'product_id': 0
saved to: datamart/silver/products/silver_olist_products.parquet


In [None]:
# Check schema enforced
df.printSchema()

root
 |-- product_id: string (nullable = true)
 |-- product_category_name: string (nullable = false)
 |-- product_name_length: double (nullable = false)
 |-- product_description_length: double (nullable = false)
 |-- product_photos_qty: double (nullable = false)
 |-- product_weight_g: double (nullable = true)
 |-- product_length_cm: double (nullable = true)
 |-- product_height_cm: double (nullable = true)
 |-- product_width_cm: double (nullable = true)



In [None]:
# Inspect some output
df = spark.read.parquet("datamart/silver/products/silver_olist_products.parquet")
df.show(5)

# Can read

+--------------------+---------------------+-------------------+--------------------------+------------------+----------------+-----------------+-----------------+----------------+
|          product_id|product_category_name|product_name_length|product_description_length|product_photos_qty|product_weight_g|product_length_cm|product_height_cm|product_width_cm|
+--------------------+---------------------+-------------------+--------------------------+------------------+----------------+-----------------+-----------------+----------------+
|1e9e8ef04dbcff454...|           perfumaria|               40.0|                     287.0|               1.0|           225.0|             16.0|             10.0|            14.0|
|3aa071139cb16b67c...|                artes|               44.0|                     276.0|               1.0|          1000.0|             30.0|             18.0|            20.0|
|96bd76ec8810374ed...|        esporte_lazer|               46.0|                     250.0|    

### Build Product Categories Table??

In [None]:
# not sure 

### Build Order_Items Table

In [None]:
# Create silver directory to save order_items data
silver_order_items_directory = "datamart/silver/order_items/"
if not os.path.exists(silver_order_items_directory):
    os.makedirs(silver_order_items_directory)

In [None]:
def process_silver_olist_order_items(bronze_directory, silver_directory, spark):
    
    # connect to bronze table
    partition_name = "bronze_olist_order_items.parquet"
    filepath = bronze_directory + partition_name
    df = spark.read.parquet(filepath)
    print('loaded from:', filepath, 'row count:', df.count())

    
    # clean data: enforce schema / data type
    # Dictionary specifying columns and their desired datatypes
    column_type_map = {
        "order_id": StringType(),
        "order_item_id": LongType(),
        "product_id": StringType(),
        "seller_id": StringType(),
        "shipping_limit_date": TimestampType(),
        "price": DoubleType(),
        "freight_value": DoubleType(),
    }

    for column, new_type in column_type_map.items():
        df = df.withColumn(column, col(column).cast(new_type))

    
    # Checking for invalid seller IDs
    # Load df_sellers from SILVER <<<<<<----------------------------------<<<<<<<<<<<<<
    df_sellers = spark.read.parquet("datamart/silver/sellers/silver_olist_sellers.parquet")  

    # Get distinct valid seller IDs
    valid_seller_ids_df = df_sellers.select("seller_id").distinct()
    
    # Perform a left anti join to find sellers with invalid seller_id
    invalid_orders = df.join(valid_seller_ids_df, on="seller_id", how="left_anti")
    
    # Count how many invalid seller IDs there are
    invalid_seller_count = invalid_orders.count()

    # Conditionally drop invalid orders
    if invalid_seller_count > 0:
        initial_count = df.count()
        print("Dropping orders with invalid seller_id...")
        df = df.join(valid_seller_ids_df, on="seller_id", how="inner")
        final_count = df.count()
        dropped_count = initial_count - final_count
        print(f"Dropped {dropped_count} rows")
        
    else:
        print("All seller ids are valid — no need to drop!!")

    
    # save silver table - IRL connect to database to write
    partition_name = "silver_olist_order_items.parquet"
    filepath = silver_directory + partition_name
    df.write.mode("overwrite").parquet(filepath)
    print('saved to:', filepath)
    
    return df

In [None]:
# Run function manually to test
# I inputted the bronze_directory manually (amend after our path discrepancies are resolved)
df = process_silver_olist_order_items("datamart/bronze/order_items/",silver_order_items_directory, spark)

loaded from: datamart/bronze/order_items/bronze_olist_order_items.parquet row count: 112650
All seller ids are valid — no need to drop!!


                                                                                

saved to: datamart/silver/order_items/silver_olist_order_items.parquet


In [None]:
# Check schema enforced
df.printSchema()

root
 |-- order_id: string (nullable = true)
 |-- order_item_id: long (nullable = true)
 |-- product_id: string (nullable = true)
 |-- seller_id: string (nullable = true)
 |-- shipping_limit_date: timestamp (nullable = true)
 |-- price: double (nullable = true)
 |-- freight_value: double (nullable = true)



In [None]:
# Inspect some output
df = spark.read.parquet("datamart/silver/order_items/silver_olist_order_items.parquet")
df.show(5)

# Can read

+--------------------+-------------+--------------------+--------------------+-------------------+------+-------------+
|            order_id|order_item_id|          product_id|           seller_id|shipping_limit_date| price|freight_value|
+--------------------+-------------+--------------------+--------------------+-------------------+------+-------------+
|8ac26cb701a7887cc...|            1|4ebb87ba41ca44632...|7a67c85e85bb2ce85...|2017-05-22 16:05:14|109.99|        18.02|
|8ac2728285fd4228f...|            1|8b90be4893a4277a9...|004c9cd9d87a3c30c...|2017-03-15 14:09:17|109.99|         8.27|
|8ac2728285fd4228f...|            2|fa94f25a73969e3a2...|004c9cd9d87a3c30c...|2017-03-15 14:09:17|109.99|        16.55|
|8ac2728285fd4228f...|            3|b01cedfa96d891427...|004c9cd9d87a3c30c...|2017-03-15 14:09:17|259.99|        21.01|
|8ac2728285fd4228f...|            4|fa94f25a73969e3a2...|004c9cd9d87a3c30c...|2017-03-15 14:09:17|109.99|        16.55|
+--------------------+-------------+----

### Build Orders Table

In [None]:
# Create silver directory to save orders data
silver_orders_directory = "datamart/silver/orders/"
if not os.path.exists(silver_orders_directory):
    os.makedirs(silver_orders_directory)

In [None]:
def process_silver_olist_orders(bronze_directory, silver_directory, spark, partition_name):
    filepath = os.path.join(bronze_directory, partition_name)
    df = spark.read.option("header", True).option("inferSchema", True).csv(filepath)
    print('loaded from:', filepath, 'row count:', df.count())


# def process_silver_olist_orders(bronze_directory, silver_directory, spark):
    
    # connect to bronze table
    
    # partition_name = "bronze_olist_orders.parquet"
    # filepath = bronze_directory + partition_name
    # df = spark.read.parquet(filepath)
    # print('loaded from:', filepath, 'row count:', df.count())



    # clean data: enforce schema / data type
    # Dictionary specifying columns and their desired datatypes
    column_type_map = {
        "order_id": StringType(),
        "customer_id": StringType(),
        "order_status": StringType(),
        "order_purchase_timestamp": TimestampType(),
        "order_approved_at": TimestampType(),
        "order_delivered_carrier_date": TimestampType(),
        "order_delivered_customer_date": TimestampType(),
        "order_estimated_delivery_date": TimestampType(),
    }

    for column, new_type in column_type_map.items():
        df = df.withColumn(column, col(column).cast(new_type))

    # Removing Invalid order ids
    # Load the SILVER table  <<<<<<<<<<--------------------------------<<<<<<<<<<<<
    df_order_items = spark.read.parquet("datamart/silver/order_items/silver_olist_order_items.parquet") 
    
    # Get distinct order IDs that exist in order items
    valid_order_ids_df = df_order_items.select("order_id").distinct()
    
    
    # Keep only orders that exist in df_order_items
    df_orders_clean = df.join(valid_order_ids_df, on="order_id", how="inner")
    
    # Count how many were dropped
    dropped_orders = df.count() - df_orders_clean.count()
    print(f"Dropped {dropped_orders} orders with no items.")

    df = df_orders_clean


    # Checking for invalid customer IDs
    # Load df_customers from SILVER   <<<<<<<< ---------------------
    df_customers = spark.read.parquet("datamart/silver/customers/silver_olist_customers.parquet")  

    # Get distinct valid customer IDs
    valid_customer_ids_df = df_customers.select("customer_id").distinct()
    
    # Perform a left anti join to find orders with invalid customer_id
    invalid_orders = df.join(valid_customer_ids_df, on="customer_id", how="left_anti")
    
    # Count how many invalid customer IDs there are
    invalid_customer_count = invalid_orders.count()

    # Conditionally drop invalid orders
    if invalid_customer_count > 0:
        initial_count = df.count()
        print("Dropping orders with invalid customer_id...")
        df = df.join(valid_customer_ids_df, on="customer_id", how="inner")
        final_count = df.count()
        dropped_count = initial_count - final_count
        print(f"Dropped {dropped_count} rows")
        
    else:
        print("All customer ids are valid — no need to drop!!")



    # Enforcing enum for order statuses
    # Define valid statuses 
    valid_statuses = {
        "created",
        "approved",
        "processing",
        "invoiced",
        "shipped",
        "delivered",
        "canceled",
        "unavailable"
    }
    
    # Clean and standardize the `order_status` column
    df = df.withColumn("order_status", trim(lower(col("order_status"))))
    
    # dentify invalid statuses (those NOT in the valid_statuses set)
    invalid_statuses_df = df.filter(~col("order_status").isin(list(valid_statuses)))
    
    # Print the unique invalid statuses
    invalid_statuses_list = invalid_statuses_df.select("order_status").distinct().rdd.flatMap(lambda x: x).collect()

    if invalid_statuses_list:
        print(f"Invalid statuses found: {invalid_statuses_list}")
    else:
        print("No invalid status found!!")


    
    # # save silver table - IRL connect to database to write
    # partition_name = "silver_olist_orders_2016_09.parquet"  
    # filepath = silver_directory + partition_name
    # df.write.mode("overwrite").parquet(filepath)
    # print('saved to:', filepath)


    # save 
    parquet_name = partition_name.replace("bronze", "silver").replace(".csv", ".parquet")
    output_path = os.path.join(silver_directory, parquet_name)
    df.write.mode("overwrite").parquet(output_path)
    print("-----> saved to:", output_path)

    return df

In [None]:
# Run function manually to test

# Set base directory
bronze_orders_directory = "datamart/bronze/orders/"
silver_orders_directory = "datamart/silver/orders/"

# List all CSV files in the bronze orders folder
csv_files = [f for f in os.listdir(bronze_orders_directory) if f.endswith(".csv")]

# Sort the files according to date
csv_files.sort()

# Loop through each file 
for partition_name in csv_files:
    print(f"\n======== Processing {partition_name} ......... \n")
    df = process_silver_olist_orders(bronze_orders_directory, silver_orders_directory, spark, partition_name)
    
    # Check schema enforced
    df.printSchema()




loaded from: datamart/bronze/orders/bronze_olist_orders_2016_09.csv row count: 4
Dropped 1 orders with no items.
All customer ids are valid — no need to drop!!
No invalid status found!!
-----> saved to: datamart/silver/orders/silver_olist_orders_2016_09.parquet
root
 |-- order_id: string (nullable = true)
 |-- customer_id: string (nullable = true)
 |-- order_status: string (nullable = true)
 |-- order_purchase_timestamp: timestamp (nullable = true)
 |-- order_approved_at: timestamp (nullable = true)
 |-- order_delivered_carrier_date: timestamp (nullable = true)
 |-- order_delivered_customer_date: timestamp (nullable = true)
 |-- order_estimated_delivery_date: timestamp (nullable = true)
 |-- snapshot_date: string (nullable = true)



loaded from: datamart/bronze/orders/bronze_olist_orders_2016_10.csv row count: 324
Dropped 16 orders with no items.
All customer ids are valid — no need to drop!!
No invalid status found!!
-----> saved to: datamart/silver/orders/silver_olist_orders_2016_

                                                                                

-----> saved to: datamart/silver/orders/silver_olist_orders_2018_10.parquet
root
 |-- order_id: string (nullable = true)
 |-- customer_id: string (nullable = true)
 |-- order_status: string (nullable = true)
 |-- order_purchase_timestamp: timestamp (nullable = true)
 |-- order_approved_at: timestamp (nullable = true)
 |-- order_delivered_carrier_date: timestamp (nullable = true)
 |-- order_delivered_customer_date: timestamp (nullable = true)
 |-- order_estimated_delivery_date: timestamp (nullable = true)
 |-- snapshot_date: string (nullable = true)



### Building Derived Table - Order logistics

In [None]:

def build_order_features(spark, order_file_path):
    # Read inputs
    df_order_items = spark.read.parquet("datamart/silver/order_items/silver_olist_order_items.parquet")
    df_products = spark.read.parquet("datamart/silver/products/silver_olist_products.parquet")
    df_categories = spark.read.parquet("datamart/bronze/category_translation/bronze_product_category_translation.parquet")
    df_orders = spark.read.parquet(order_file_path)

    order_metrics = df_order_items.groupBy("order_id").agg(
        F.max("order_item_id").alias("total_qty"),
        F.sum("price").alias("total_price"),
        F.sum("freight_value").alias("total_freight_value")
    )

    df_items_with_products = df_order_items.select("order_id", "product_id") \
        .join(
            df_products.select(
                "product_id", "product_weight_g",
                "product_length_cm", "product_height_cm", "product_width_cm"
            ),
            on="product_id", how="left"
        )

    df_items_with_products = df_items_with_products.withColumn(
        "product_volume_cm3",
        col("product_length_cm") * col("product_height_cm") * col("product_width_cm")
    )

    product_metrics = df_items_with_products.groupBy("order_id").agg(
        F.sum("product_weight_g").alias("total_weight_g"),
        F.sum("product_volume_cm3").alias("total_volume_cm3")
    )

    final_df = df_orders.select("order_id", "order_purchase_timestamp") \
        .join(order_metrics, on="order_id", how="inner") \
        .join(product_metrics, on="order_id", how="left") \
        .withColumn(
            "total_density",
            when(col("total_volume_cm3") != 0,
                 col("total_weight_g") / col("total_volume_cm3")
            ).otherwise(None)
        )

    df_items_with_cats = df_order_items.select("order_id", "product_id") \
        .join(df_products.select("product_id", "product_category_name"), on="product_id", how="left") \
        .join(df_categories.select("product_category_name", "main_category", "sub_category"), on="product_category_name", how="left")

    main_cat_counts = df_items_with_cats.groupBy("order_id", "main_category") \
        .agg(count("*").alias("main_cat_count"))
    main_cat_window = Window.partitionBy("order_id").orderBy(col("main_cat_count").desc())
    most_common_main = main_cat_counts.withColumn(
        "rank", row_number().over(main_cat_window)
    ).filter(col("rank") == 1).drop("rank", "main_cat_count")

    sub_cat_counts = df_items_with_cats.groupBy("order_id", "sub_category") \
        .agg(count("*").alias("sub_cat_count"))
    sub_cat_window = Window.partitionBy("order_id").orderBy(col("sub_cat_count").desc())
    most_common_sub = sub_cat_counts.withColumn(
        "rank", row_number().over(sub_cat_window)
    ).filter(col("rank") == 1).drop("rank", "sub_cat_count")

    order_categories = most_common_main.join(most_common_sub, on="order_id", how="outer")
    final_df_with_cats = final_df.join(order_categories, on="order_id", how="left")

    return final_df_with_cats


In [None]:
# Keep track of failures
failed_files = []
processed_files = []

order_files = sorted(glob.glob("datamart/silver/orders/silver_olist_orders_*.parquet"))

# Create output directory if it doesn't exist
os.makedirs("datamart/silver/order_logistics", exist_ok=True)

# Loop over files 
for idx, file_path in enumerate(order_files, 1):
    
    basename = os.path.basename(file_path)  
    year_month = basename.replace("silver_olist_orders_", "").replace(".parquet", "")

    output_path = f"datamart/silver/order_logistics/silver_olist_order_logistics_{year_month}.parquet"
    
    print(f"\n[{idx}/{len(order_files)}]  Processing {year_month} ({basename})...")

    # Skip if already exists
    if os.path.exists(output_path):
        print(f" Skipping {year_month} (already exists)")
        continue
    
    try:
        start_time = time.time()

        # Run feature engineering
        final_df = build_order_features(spark, file_path)

        # Save to parquet
        final_df.write.mode("overwrite").parquet(output_path)

        # Verify row count
        count = final_df.count()
        duration = round(time.time() - start_time, 2)

        print(f"---> Saved: {output_path} → {count} rows in {duration}s")

        processed_files.append((year_month, count, duration))

    except Exception as e:
        print(f" Failed on {year_month}: {e}")
        failed_files.append((year_month, str(e)))

# Summary
print("\n===== Processing Summary =====")
print(f" Successfully processed: {len(processed_files)} files")
for ym, count, duration in processed_files:
    print(f"  - {ym}: {count} rows in {duration}s")

if failed_files:
    print(f"\n Failed files: {len(failed_files)}")
    for ym, err in failed_files:
        print(f"  - {ym}: {err}")
else:
    print("\n All files processed successfully")



[1/25]  Processing 2016_09 (silver_olist_orders_2016_09.parquet)...
 Failed on 2016_09: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `main_category` cannot be resolved. Did you mean one of the following? [`product_category_name`, `product_category_name_english`].;
'Project [product_category_name#8333, 'main_category, 'sub_category]
+- Relation [product_category_name#8333,product_category_name_english#8334] parquet


[2/25]  Processing 2016_10 (silver_olist_orders_2016_10.parquet)...
 Failed on 2016_10: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `main_category` cannot be resolved. Did you mean one of the following? [`product_category_name`, `product_category_name_english`].;
'Project [product_category_name#8481, 'main_category, 'sub_category]
+- Relation [product_category_name#8481,product_category_name_english#8482] parquet


[3/25]  Processing 2016_12 (silver_olist_orders_2016_12.parquet)...
 Failed on 2016_12: [UNRESO

In [None]:
# Inspect some output
df_orders_logistics = spark.read.parquet("datamart/silver/order_logistics/silver_olist_order_logistics_2018_01.parquet")
df_orders_logistics.show(10)

AnalysisException: [PATH_NOT_FOUND] Path does not exist: file:/app/scripts/datamart/silver/order_logistics/silver_olist_order_logistics_2018_01.parquet.

## Build Gold Table (Features)

In [None]:
# snapshot_date_str = "2023-01-01"

# start_date_str = "2023-01-01"
# end_date_str = "2024-12-01"

In [None]:
gold_root = "datamart/gold"
os.makedirs(gold_root, exist_ok=True)
print(f"Gold root directory: {gold_root}")

gold_feature_directory = "datamart/gold/feature_store/"
if not os.path.exists(gold_feature_directory):
    os.makedirs(gold_feature_directory)

Gold root directory: datamart/gold


In [None]:
silver_directory = "datamart/silver"
gold_directory = "datamart/gold"

def read_silver_table(table, silver_db, spark):
    """
    Helper function to read all partitions of a silver table
    """
    folder_path = os.path.join(silver_db, table)
    files_list = [os.path.join(folder_path, os.path.basename(f)) for f in glob.glob(os.path.join(folder_path, '*'))]
    df = spark.read.option("header", "true").parquet(*files_list)
    return df

# cust_df = read_silver_table('customers', silver_directory, spark)
# geo_df = read_silver_table('geolocation', silver_directory, spark)
# items_df = read_silver_table('order_items', silver_directory, spark)
# # to be changed
# logistic_df = read_silver_table('order_items', silver_directory, spark)
# prod_df = read_silver_table('products', silver_directory, spark)
# sellers_df = read_silver_table('sellers', silver_directory, spark)
orders_df = read_silver_table('orders', silver_directory, spark)

# print(f"Snapshot date for this run: {snapshot_date_str}")
snapshot_date_str = "2016-09-05"

gold_processing_feature.process_feature_gold_table(snapshot_date_str, silver_directory, gold_directory, orders_df, spark)

# gold_processing_feature.process_feature_gold_table(snapshot_date_str, silver_directory, gold_directory, 
#                           cust_df, geo_df, items_df, logistic_df, orders_df, prod_df, sellers_df, spark)

                                                                                

saved to: datamart/gold/gold_feature_store_2016_09_05.parquet
Feature gold table processing completed for snapshot date: 2016-09-05


In [None]:
# silver_directory = "datamart/silver"
# gold_directory = "datamart/gold/features"

# cust_path = silver_directory + "/customers/silver_olist_customers.parquet"
# geo_path = silver_directory + "/geolocation/silver_olist_geolocation.parquet"
# items_path = silver_directory + "/items/silver_olist_order_items.parquet"
# logistics_path = silver_directory + "/logistics/silver_olist_order_payments.parquet"
# products_path = silver_directory + "/products/silver_olist_products.parquet"
# sellers_path = silver_directory + "/sellers/silver_olist_sellers.parquet"

# gold_processing_feature.process_feature_gold_table(snapshot_date_str, silver_directory, gold_directory, 
#                           cust_path, geo_path, items_path, logistics_path,
#                           products_path, sellers_path, spark)

In [None]:
df_silver = spark.read.parquet("datamart/gold/gold_feature_store_2016_09_05.parquet")
df_silver.show(5)

+--------------------+------------------------+
|         customer_id|order_purchase_timestamp|
+--------------------+------------------------+
|683c54fc24d40ee9f...|              2016-09-05|
+--------------------+------------------------+



## Inspect Feature Store

## Build Gold Table (Label)

In [None]:
# Create gold datalake
silver_directory = "datamart/silver"
gold_directory = "datamart/gold"

In [None]:
partitions_list = ['2017-10-04']
y= gold_label_processing.process_gold_label(silver_directory, gold_directory, partitions_list, spark)
orders = y.toPandas()

print(f"Number of rows in label store: {orders.shape[0]}")
#orders.groupby('snapshot_date').size()

Building label store...


Saving labels: 100%|██████████| 1/1 [00:00<00:00,  6.13it/s]


Label store Completed
Number of rows in label store: 96478


In [None]:
def read_silver_table(table, silver_directory, spark):
    """
    Helper function to read all partitions of a silver table
    """
    folder_path = os.path.join(silver_directory, table)
    files_list = [os.path.join(folder_path, os.path.basename(f)) for f in glob.glob(os.path.join(folder_path, '*'))]
    df = spark.read.option("header", "true").parquet(*files_list)
    return df

gold_directory = "datamart/gold"
order_df = read_silver_table('label_store', gold_directory, spark)
order_df = order_df.toPandas()
order_df

Unnamed: 0,order_id,miss_delivery_sla,snapshot_date
0,95bfa2a85ef50d3192609d8f29b92cf9,0,2017-10-04
1,c3fd670b03599718895218d479f660b6,1,2017-10-04
2,b6c70f4b37438a78c820423809997c20,1,2017-10-04
3,ab53b19e9f59776c6556ebf49e85a52c,0,2017-10-04
4,9913ce9487d390ef37cd3b6cc3883f0e,1,2017-10-04
...,...,...,...
151,d36b13fdc087b62c490a9db5c0e0a913,0,2017-10-04
152,d4304f4104fca54e2a93b03e5b04962b,0,2017-10-04
153,d9e98b1f6961932f22bf340d0153bbad,0,2017-10-04
154,f495e955026183e7f6bbb3dac79b88e6,0,2017-10-04


In [None]:
order_df['order_purchase_timestamp'].min()
#max_date = '2018-09-03'
#min_date = '2016-09-04'


Timestamp('2016-09-04 21:15:19')

In [None]:
def read_silver_table(table, silver_directory, spark):
    """
    Helper function to read all partitions of a silver table
    """
    folder_path = os.path.join(silver_directory, table)
    files_list = [os.path.join(folder_path, os.path.basename(f)) for f in glob.glob(os.path.join(folder_path, '*'))]
    df = spark.read.option("header", "true").parquet(*files_list)
    return df

############################
# Label Store
############################
def build_label_store(sla, df):
    """
    Function to build label store
    """
    ####################
    # Create labels
    ####################

    # get customer at mob
    df = df.filter(col("order_status") == 'delivered')

    # get label
    df = df.withColumn("order_purchase_timestamp", to_date(col("order_purchase_timestamp")))
    df = df.withColumn("snapshot_date", col("order_purchase_timestamp"))
    df = df.withColumn("miss_delivery_sla", when(col("order_delivered_customer_date") > date_add(col("snapshot_date"), sla), 1).otherwise(0))

    # select columns to save
    df = df.select("order_id", "miss_delivery_sla", "snapshot_date")

    return df

############################
# Pipeline
############################

def process_gold_label(silver_directory, gold_directory, partitions_list, spark):
    """
    Wrapper function to build all gold tables
    """
    # Read silver tables
    orders_df = read_silver_table('orders', silver_directory, spark)

    # Build label store
    print("Building label store...")
    df_label = build_label_store(14, orders_df)

    for date_str in tqdm(partitions_list, total=len(partitions_list), desc="Saving labels"):
        partition_name = date_str.replace('-','_') + '.parquet'
        label_filepath = os.path.join(gold_directory, 'label_store', partition_name)
        df_label.filter(col('snapshot_date') == date_str).write.mode('overwrite').parquet(label_filepath)
        #df_label_filtered = df_label.filter(col('snapshot_date') == date_str)

    print("Label store Completed")

    return df_label

In [None]:
def process_gold_label(silver_directory, gold_directory, partitions_list, spark):
    """
    Wrapper function to build all gold tables
    """
    # Read silver tables
    orders_df = read_silver_table('orders', silver_directory, spark)

    # Build label store
    print("Building label store...")
    df_label = build_label_store(14, orders_df)

    for date_str in tqdm(partitions_list, total=len(partitions_list), desc="Saving labels"):
        partition_name = date_str.replace('-','_') + '.parquet'
        label_filepath = os.path.join(gold_directory, 'label_store', partition_name)
        df_label.filter(col('snapshot_date') == date_str).write.mode('overwrite').parquet(label_filepath)
        #df_label_filtered = df_label.filter(col('snapshot_date') == date_str)

    print("Label store Completed")

    return df_label

In [None]:
start_date_str = ['2017-01-01']

print("Building gold feature tables...")
# Create gold datalake
silver_directory = "datamart/silver"
gold_directory = "datamart/gold"

if not os.path.exists(gold_directory):
    os.makedirs(gold_directory)

# Build gold tables
y = process_gold_label(silver_directory, gold_directory, start_date_str, spark)

# Check for the rows ingested
y_pdf = y.toPandas()
y_count = y_pdf.shape[0]
print(f"Number of rows in label store: {y_pdf.shape[0]}")

print(f"Gold feature tables built successfully from start date: {start_date_str}")

Building gold feature tables...
Building label store...


Saving labels: 100%|██████████| 1/1 [00:00<00:00,  2.19it/s]


Label store Completed
Number of rows in label store: 96478
Gold feature tables built successfully from start date: ['2017-01-01']


## Inspect Label Store

## Stop Spark Session

In [None]:
# End spark session
spark.stop()

print('\n\n---completed job---\n\n')