<a href="https://colab.research.google.com/github/ruan-narici/brazilian-ecommerce-analytics/blob/main/brazilian_ecommerce_analytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Desafio de Projeto: Análise de Vendas e Entregas no E-commerce Brasileiro

### Imports

In [211]:
import pandas as pd

### Load datasets

In [212]:
df_customers = pd.read_csv("/content/drive/MyDrive/Documentos/Dataset/Brazilian E-Commerce Public Dataset by Olist/olist_customers_dataset.csv") #OK
# df_geolocation = pd.read_csv("/content/drive/MyDrive/Documentos/Dataset/Brazilian E-Commerce Public Dataset by Olist/olist_geolocation_dataset.csv") #EXCLUDE
df_order_items = pd.read_csv("/content/drive/MyDrive/Documentos/Dataset/Brazilian E-Commerce Public Dataset by Olist/olist_order_items_dataset.csv") #OK
df_order_payments = pd.read_csv("/content/drive/MyDrive/Documentos/Dataset/Brazilian E-Commerce Public Dataset by Olist/olist_order_payments_dataset.csv") #OK
df_order_reviews = pd.read_csv("/content/drive/MyDrive/Documentos/Dataset/Brazilian E-Commerce Public Dataset by Olist/olist_order_reviews_dataset.csv") #OK
df_orders = pd.read_csv("/content/drive/MyDrive/Documentos/Dataset/Brazilian E-Commerce Public Dataset by Olist/olist_orders_dataset.csv") #OK
df_products = pd.read_csv("/content/drive/MyDrive/Documentos/Dataset/Brazilian E-Commerce Public Dataset by Olist/olist_products_dataset.csv") #OK
df_sellers = pd.read_csv("/content/drive/MyDrive/Documentos/Dataset/Brazilian E-Commerce Public Dataset by Olist/olist_sellers_dataset.csv") #OK
df_product_category_name_translation = pd.read_csv("/content/drive/MyDrive/Documentos/Dataset/Brazilian E-Commerce Public Dataset by Olist/product_category_name_translation.csv") #OK

### Define Types

In [213]:
# DateTimeType
df_order_items["shipping_limit_date"] = pd.to_datetime(df_order_items["shipping_limit_date"])
df_order_reviews["review_creation_date"] = pd.to_datetime(df_order_reviews["review_creation_date"])
df_order_reviews["review_answer_timestamp"] = pd.to_datetime(df_order_reviews["review_answer_timestamp"])
df_orders["order_purchase_timestamp"] = pd.to_datetime(df_orders["order_purchase_timestamp"])
df_orders["order_approved_at"] = pd.to_datetime(df_orders["order_approved_at"])
df_orders["order_delivered_carrier_date"] = pd.to_datetime(df_orders["order_delivered_carrier_date"])
df_orders["order_delivered_customer_date"] = pd.to_datetime(df_orders["order_delivered_customer_date"])
df_orders["order_estimated_delivery_date"] = pd.to_datetime(df_orders["order_estimated_delivery_date"])

### Merge datasets

In [214]:
df_merged = df_orders.merge(df_order_items, on="order_id")
df_merged = df_merged.merge(df_customers, on="customer_id")
df_merged = df_merged.merge(df_sellers, on="seller_id")
df_merged = df_merged.merge(df_products, on="product_id")
df_merged = df_merged.merge(df_order_payments, on="order_id")
df_merged = df_merged.merge(df_product_category_name_translation, on="product_category_name")
df_merged = df_merged.merge(df_order_reviews, on="order_id")

df_merged.columns

Index(['order_id', 'customer_id', 'order_status', 'order_purchase_timestamp',
       'order_approved_at', 'order_delivered_carrier_date',
       'order_delivered_customer_date', 'order_estimated_delivery_date',
       'order_item_id', 'product_id', 'seller_id', 'shipping_limit_date',
       'price', 'freight_value', 'customer_unique_id',
       'customer_zip_code_prefix', 'customer_city', 'customer_state',
       'seller_zip_code_prefix', 'seller_city', 'seller_state',
       'product_category_name', 'product_name_lenght',
       'product_description_lenght', 'product_photos_qty', 'product_weight_g',
       'product_length_cm', 'product_height_cm', 'product_width_cm',
       'payment_sequential', 'payment_type', 'payment_installments',
       'payment_value', 'product_category_name_english', 'review_id',
       'review_score', 'review_comment_title', 'review_comment_message',
       'review_creation_date', 'review_answer_timestamp'],
      dtype='object')

### Handling null values

In [215]:
# DateTimeType
df_merged["order_approved_at"].notna()
df_merged["order_delivered_carrier_date"].notna()
df_merged["order_delivered_customer_date"].notna()

# FloatType
df_merged["product_weight_g"] = df_merged["product_weight_g"].fillna('-2')
df_merged["product_length_cm"] = df_merged["product_length_cm"].fillna('-2')
df_merged["product_height_cm"] = df_merged["product_height_cm"].fillna('-2')
df_merged["product_width_cm"] = df_merged["product_width_cm"].fillna('-2')
df_merged["review_comment_message"] = df_merged["product_width_cm"].fillna('-2')

# StrType
df_merged["review_comment_title"] = df_merged["review_comment_title"].fillna('-2')

df_merged.isna().sum()

Unnamed: 0,0
order_id,0
customer_id,0
order_status,0
order_purchase_timestamp,0
order_approved_at,14
order_delivered_carrier_date,1195
order_delivered_customer_date,2400
order_estimated_delivery_date,0
order_item_id,0
product_id,0


### Selecting and Creating some columns

In [226]:
# Selecting some Columns
df_analysis = df_merged[
    [
        "order_id",
        "seller_id",
        "customer_id",
        "order_purchase_timestamp",
        "order_approved_at",
        "shipping_limit_date",
        "order_estimated_delivery_date",
        "order_delivered_carrier_date",
        "order_delivered_customer_date",
        "order_status",
        "product_id",
        "seller_city",
        "seller_state",
        "customer_city",
        "customer_state",
        "product_category_name_english",
        "payment_type",
        "price",
        "freight_value",
        "payment_value",
        "review_score",
        "review_creation_date",
        "review_answer_timestamp",
        ]
    ].copy()


# Creating some columns
df_analysis["order_delivered_averaga_days"] = (df_analysis["order_delivered_customer_date"] - df_analysis["order_delivered_carrier_date"]).dt.days
df_analysis["order_estimated_delivery_days"] = (df_analysis["order_estimated_delivery_date"] - df_analysis["order_delivered_carrier_date"]).dt.days

# Preview
df_analysis.sample(5)

Unnamed: 0,order_id,seller_id,customer_id,order_purchase_timestamp,order_approved_at,shipping_limit_date,order_estimated_delivery_date,order_delivered_carrier_date,order_delivered_customer_date,order_status,...,product_category_name_english,payment_type,price,freight_value,payment_value,review_score,review_creation_date,review_answer_timestamp,order_delivered_averaga_days,order_estimated_delivery_days
88399,9e05a5c7ef1a7074746243745a430219,2a261b5b644fa05f4f2700eb93544f2c,b6a347b55c80817052b0d04c1299daf3,2018-01-11 22:54:35,2018-01-11 23:07:27,2018-01-17 23:07:27,2018-02-08,2018-01-15 14:34:12,2018-02-16 22:46:51,delivered,...,furniture_decor,credit_card,55.0,16.15,142.3,1,2018-02-10,2018-02-15 20:23:42,32.0,23.0
65897,0576822a11b3068cce2e3c8551e4e327,4869f7a5dfa277a7dca6462dcf3b52b2,d92a0146dec6700c8843c392da07a41e,2017-11-12 00:07:59,2017-11-12 00:30:46,2017-11-17 00:30:33,2017-12-13,2017-11-16 12:28:55,2017-11-27 20:28:44,delivered,...,watches_gifts,credit_card,133.0,43.34,176.34,4,2017-11-28,2017-11-29 21:38:25,11.0,26.0
32059,06b2c7035561ef12b16045ef4bf459e2,f8db351d8c4c4c22c6835c19a46f01b0,8bb9547aec97f493e91a078f66fa7ac5,2017-07-05 07:54:14,2017-07-05 17:38:29,2017-07-11 17:28:03,2017-07-27,2017-07-06 11:55:27,2017-07-12 17:53:56,delivered,...,housewares,credit_card,52.9,15.12,68.02,5,2017-07-19,2017-07-27 16:35:51,6.0,20.0
41215,458661c70cd0af0cb182a2afb5033c8a,8160255418d5aaa7dbdc9f4c64ebda44,7cbcf7f49823f300c2309bb4344c0e61,2018-05-10 12:03:12,2018-05-11 03:37:47,2018-05-17 03:37:47,2018-06-05,2018-05-11 16:28:00,2018-05-18 20:58:43,delivered,...,bed_bath_table,boleto,79.9,13.28,93.18,4,2018-05-19,2018-05-30 04:26:31,7.0,24.0
74408,7fb9240a1a18bc95290744b4277a0244,11d4c477d09821164bca4f70a2eae031,c1b41aa8cc67484698faea4ce1674f30,2017-07-18 20:52:33,2017-07-18 21:05:14,2017-07-24 21:05:14,2017-08-07,2017-07-19 14:47:56,2017-07-24 17:56:30,delivered,...,costruction_tools_garden,credit_card,8.82,11.85,41.34,5,2017-07-25,2017-07-28 00:53:12,5.0,18.0


### Exploratory Analysis

Top 7 of most sellers by seller state

In [227]:
# Top 7 of most sellers by seller state
df_most_sellers_by_seller_state = df_analysis.groupby(["seller_state"]).agg({"payment_value": "sum", "seller_state": "count"}).rename(columns={"seller_state": "total_sales", "payment_value": "total_sale_value"}).sort_values(by="total_sales", ascending=False)
df_most_sellers_by_seller_state.head(7)

Unnamed: 0_level_0,total_sale_value,total_sales
seller_state,Unnamed: 1_level_1,Unnamed: 2_level_1
SP,13102725.34,82417
MG,1524405.49,9014
PR,1826605.49,8964
RJ,1075181.72,4906
SC,880990.16,4221
RS,551309.32,2224
DF,135787.01,937


Top 7 of most purchases by customer state

In [228]:
# Top 7 of most purchases by customer state
df_most_purchases_by_customer_state = df_analysis.groupby(["customer_state"]).agg({"payment_value": "sum", "customer_state": "count"}).rename(columns={"payment_value": "total_payment_value", "customer_state": "total_purchases"}).sort_values(by="total_purchases", ascending=False)
df_most_purchases_by_customer_state.head(7)

Unnamed: 0_level_0,total_payment_value,total_purchases
customer_state,Unnamed: 1_level_1,Unnamed: 2_level_1
SP,7502926.95,48797
RJ,2708839.33,14987
MG,2288949.71,13429
RS,1131899.22,6413
PR,1055747.81,5879
SC,769744.94,4218
BA,780334.54,3942


Top 7 average of most expensive freight by customer state

In [229]:
# Top 7 average of most expensive freight by customer state
df_avg_most_expensive_freight_by_customer = df_analysis.groupby(["customer_state"]).agg({"freight_value": "mean"}).rename(columns={"freight_value": "avg_freight_value"}).sort_values(by="avg_freight_value", ascending=False)
df_avg_most_expensive_freight_by_customer.head(7)

Unnamed: 0_level_0,avg_freight_value
customer_state,Unnamed: 1_level_1
RR,43.587
PB,43.43685
RO,41.077849
AC,40.232473
TO,40.007778
PI,39.205989
MA,38.293858


Top 7 average of most days to delivered by customer state

In [232]:
# Top 7 average of most days to delivered by customer state
df_avg_most_days_to_delivered_by_customer_state = df_analysis.groupby("customer_state").agg({"order_estimated_delivery_days": "mean", "order_delivered_averaga_days": "mean", "review_score": "mean"}).rename(columns={"order_delivered_averaga_days": "avg_days_to_delivery", "order_estimated_delivery_days": "avg_order_estimated_delivery_days", "review_score": "avg_review_score"}).sort_values(by="avg_days_to_delivery", ascending=False)
df_avg_most_days_to_delivered_by_customer_state.head(7)

Unnamed: 0_level_0,avg_order_estimated_delivery_days,avg_days_to_delivery,avg_review_score
customer_state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RR,40.795918,24.090909,3.56
AP,42.182927,24.04878,4.240964
AM,42.315476,23.191617,4.095238
AL,28.447427,20.356164,3.716484
PA,33.526952,19.59375,3.79371
SE,26.637755,17.327225,3.821883
MA,26.660218,17.292079,3.686298


Top 7 most sales product

In [231]:
# Top 7 most sales product
def_most_sales_products = df_analysis.groupby("product_id").agg({
        "product_category_name_english": "last",
        "product_id": "count",
        "price": "mean",
        "order_estimated_delivery_days": "mean",
        "order_delivered_averaga_days": "mean",
        "review_score": "mean"
        }).rename(columns={
            "product_id": "total_sales",
            "price": "avg_price",
            "order_estimated_delivery_days": "avg_order_estimated_delivery_days",
            "order_delivered_averaga_days": "avg_order_delivered_averaga_days",
            "review_score": "avg_review_score",
            "product_category_name_english": "product_category_name"
            }).sort_values(by="total_sales", ascending=False)
def_most_sales_products.head(7)

Unnamed: 0_level_0,product_category_name,total_sales,avg_price,avg_order_estimated_delivery_days,avg_order_delivered_averaga_days,avg_review_score
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
aca2eb7d00ea1a7b8ebd4e68314663af,furniture_decor,533,71.347655,18.405253,8.872624,4.020638
99a4788cb24856965c36a24e339b6058,bed_bath_table,517,88.175551,20.397287,10.455621,3.914894
422879e10f46682990de24d770e7f83d,garden_tools,507,54.82785,21.601578,11.692308,3.927022
389d119b48cf3043d311335e499d9c6b,garden_tools,405,54.635284,20.679012,11.171642,4.106173
368c6c730842d78016ad823897a372db,garden_tools,395,54.287089,21.531646,10.863291,3.908861
53759a2ecddad2bb87a079a1f1519f73,garden_tools,389,54.722108,21.426735,11.066838,3.884319
d1c427060a0f73f6b889a5c7c61f2ac4,computers_accessories,354,137.554802,23.892655,11.264535,4.096045
