In [1]:
%%time
%matplotlib inline
%load_ext autoreload
%autoreload 2

import sys
from dataclasses import dataclass
from pathlib import Path

import pandas as pd
from dotenv import load_dotenv


@dataclass
class Config:
    project_dir: Path = Path("../../")
    model_dir: Path = project_dir / "models"
    outputs_dir: Path = project_dir / "outputs"
    data_dir: Path = project_dir / "data"
    raw_dir: Path = data_dir / "raw"
    interim_dir: Path = data_dir / "interim"
    processed_dir: Path = data_dir / "processed"


config = Config()
sys.path.append(str(config.project_dir.resolve()))
load_dotenv()

CPU times: user 276 ms, sys: 80.1 ms, total: 356 ms
Wall time: 567 ms


False

In [2]:
df_reviews = pd.read_csv(
    config.interim_dir
    / "olist_order_reviews_translated_label_review_categories_merged.csv",
)

df_orders = pd.read_csv(config.interim_dir / "olist_orders_customer_merged.csv")
df_item_products = pd.read_csv(
    config.interim_dir / "olist_item_product_seller_merged.csv"
)
df_orders_item_products_merged = pd.merge(
    df_orders, df_item_products, on="order_id", how="left"
)
assert (
    df_orders_item_products_merged["order_id"].nunique()
    == df_orders["order_id"].nunique()
)
df_orders_item_products_review_merged = pd.merge(
    df_orders_item_products_merged,
    df_reviews,
    on="order_id",
    how="left",
)
assert (
    df_orders_item_products_review_merged["order_id"].nunique()
    == df_orders_item_products_merged["order_id"].nunique()
)

In [3]:
df_order_info_details = pd.read_csv(config.processed_dir / "order_info_details.csv")
df_reviews = pd.read_csv(
    config.interim_dir
    / "olist_order_reviews_translated_label_review_categories_merged.csv",
)
df_reviews_last = df_reviews.groupby("order_id").last().reset_index()
df_order_info_details_reviews_merged = pd.merge(
    df_order_info_details, df_reviews_last, on="order_id", how="left"
)
assert df_order_info_details_reviews_merged.shape[0] == df_order_info_details.shape[0]

df_review_no_null = df_order_info_details_reviews_merged.dropna(
    subset=["review_comment_message_en"]
)
print(f"df_review_no_null.shape: {df_review_no_null.shape}")


df_review_no_null.shape: (40818, 90)


In [4]:
fh_df_reviews_no_null = df_review_no_null.query(
    "order_purchase_date >= '2017-02' & order_purchase_date <= '2017-10'"
)
sh_df_reviews_no_null = df_review_no_null.query(
    "order_purchase_date >= '2017-12' & order_purchase_date <= '2018-08'"
)

# Item Categories with Low Sales Growth

In [5]:
from src.analyzer.agg_utils import agg_review_category_ratio_by_category_name

## watches_gifts

### First Half

In [6]:
agg_review_category_ratio_by_category_name(fh_df_reviews_no_null, "watches_gifts")

Praise Review num:278
Praise Issue num:130
Issue ratio:0.28
Overall
                       category  count  percentage
0               Delivery_Praise    132        0.29
1                Delivery_Issue     44        0.10
2        Product Quality_Praise     39        0.09
3                Unclassifiable     39        0.09
4                Product_Praise     34        0.07
5         Product Quality_Issue     12        0.03
6        Product Mismatch_Issue     11        0.02
7   Product Satisfaction_Praise     10        0.02
8          Product Defect_Issue      9        0.02
9                Service_Praise      9        0.02
10       Customer Service_Issue      7        0.02
11      Product Condition_Issue      7        0.02
12      Customer Service_Praise      5        0.01
13                       Praise      4        0.01
14               Quality_Praise      4        0.01
15                Product_Issue      4        0.01
16                 Store_Praise      3        0.01
17           P

### Second Half

In [7]:
agg_review_category_ratio_by_category_name(sh_df_reviews_no_null, "watches_gifts")

Praise Review num:756
Praise Issue num:634
Issue ratio:0.41
Overall
                       category  count  percentage
0               Delivery_Praise    356        0.23
1                Delivery_Issue    251        0.16
2                Unclassifiable    132        0.09
3        Product Quality_Praise    106        0.07
4                Product_Praise     97        0.06
5        Product Mismatch_Issue     81        0.05
6         Product Quality_Issue     43        0.03
7               Packaging_Issue     28        0.02
8        Customer Service_Issue     27        0.02
9   Product Satisfaction_Praise     26        0.02
10               Service_Praise     20        0.01
11      Customer Service_Praise     19        0.01
12   Product Authenticity_Issue     16        0.01
13         Product Defect_Issue     14        0.01
14               Quality_Praise     11        0.01
15           Product Size_Issue     11        0.01
16                Product_Issue     11        0.01
17         Pro

## cools_stuff

In [8]:
agg_review_category_ratio_by_category_name(fh_df_reviews_no_null, "cool_stuff")

Praise Review num:355
Praise Issue num:161
Issue ratio:0.28
Overall
                         category  count  percentage
0                 Delivery_Praise    162        0.28
1          Product Quality_Praise     64        0.11
2                  Delivery_Issue     63        0.11
3                  Unclassifiable     49        0.09
4                  Product_Praise     45        0.08
5           Product Quality_Issue     11        0.02
6         Customer Service_Praise     11        0.02
7                  Service_Praise     10        0.02
8          Product Mismatch_Issue     10        0.02
9                  Quality_Praise      7        0.01
10                  Product_Issue      6        0.01
11    Product Satisfaction_Praise      6        0.01
12                   Store_Praise      5        0.01
13            Shipping Cost_Issue      4        0.01
14                  Quality_Issue      4        0.01
15    Overall Satisfaction_Praise      4        0.01
16                         Prai

In [9]:
agg_review_category_ratio_by_category_name(sh_df_reviews_no_null, "cool_stuff")

Praise Review num:343
Praise Issue num:226
Issue ratio:0.36
Overall
                       category  count  percentage
0               Delivery_Praise    168        0.27
1                Delivery_Issue    103        0.16
2                Unclassifiable     55        0.09
3        Product Quality_Praise     52        0.08
4                Product_Praise     38        0.06
5         Product Quality_Issue     15        0.02
6                Quality_Praise     14        0.02
7        Product Mismatch_Issue     13        0.02
8   Product Satisfaction_Praise      8        0.01
9        Customer Service_Issue      8        0.01
10              Packaging_Issue      6        0.01
11               Service_Praise      6        0.01
12                       Praise      5        0.01
13      Customer Service_Praise      5        0.01
14                Product_Issue      5        0.01
15        Recommendation_Praise      5        0.01
16         Product Defect_Issue      5        0.01
17            

## toys

### First Half

In [10]:
agg_review_category_ratio_by_category_name(fh_df_reviews_no_null, "toys")

Praise Review num:294
Praise Issue num:120
Issue ratio:0.26
Overall
                         category  count  percentage
0                 Delivery_Praise    152        0.33
1                  Unclassifiable     44        0.09
2                  Delivery_Issue     42        0.09
3                  Product_Praise     32        0.07
4          Product Quality_Praise     28        0.06
5          Product Mismatch_Issue     14        0.03
6     Product Satisfaction_Praise     10        0.02
7           Product Quality_Issue     10        0.02
8         Customer Service_Praise      9        0.02
9                  Quality_Praise      8        0.02
10           Product Damage_Issue      7        0.02
11  Product Recommendation_Praise      7        0.02
12         Customer Service_Issue      5        0.01
13                 Service_Praise      5        0.01
14                  Quality_Issue      5        0.01
15                         Praise      4        0.01
16                  Seller_Prai

### Second Half

In [11]:
agg_review_category_ratio_by_category_name(sh_df_reviews_no_null, "toys")

Praise Review num:300
Praise Issue num:252
Issue ratio:0.40
Overall
                       category  count  percentage
0               Delivery_Praise    146        0.23
1                Delivery_Issue    104        0.17
2                Unclassifiable     65        0.10
3                Product_Praise     40        0.06
4        Product Quality_Praise     37        0.06
5        Product Mismatch_Issue     27        0.04
6         Product Quality_Issue     25        0.04
7       Customer Service_Praise     12        0.02
8                Service_Praise      8        0.01
9          Product Damage_Issue      6        0.01
10      Product Condition_Issue      6        0.01
11              Packaging_Issue      6        0.01
12  Product Satisfaction_Praise      6        0.01
13               Quality_Praise      5        0.01
14                Product_Issue      5        0.01
15           Product Size_Issue      5        0.01
16                       Praise      5        0.01
17     Order C

## sports_leisure

### First Half

In [12]:
agg_review_category_ratio_by_category_name(fh_df_reviews_no_null, "sports_leisure")

Praise Review num:443
Praise Issue num:242
Issue ratio:0.31
Overall
                         category  count  percentage
0                 Delivery_Praise    223        0.28
1                  Delivery_Issue    116        0.15
2                  Unclassifiable     88        0.11
3          Product Quality_Praise     57        0.07
4                  Product_Praise     36        0.05
5          Product Mismatch_Issue     26        0.03
6         Customer Service_Praise     14        0.02
7                    Store_Praise     11        0.01
8                  Service_Praise     10        0.01
9                 Packaging_Issue      8        0.01
10    Product Satisfaction_Praise      8        0.01
11        Order Fulfillment_Issue      6        0.01
12          Product Quality_Issue      6        0.01
13                         Praise      6        0.01
14          Recommendation_Praise      5        0.01
15                    Order_Issue      5        0.01
16  Product Recommendation_Prai

# Item Categories with High Sales Growth

## health_beauty

### First Half

In [13]:
agg_review_category_ratio_by_category_name(fh_df_reviews_no_null, "health_beauty")

Praise Review num:452
Praise Issue num:242
Issue ratio:0.29
Overall
                       category  count  percentage
0               Delivery_Praise    226        0.27
1                Unclassifiable    117        0.14
2                Delivery_Issue     95        0.12
3                Product_Praise     59        0.07
4        Product Quality_Praise     44        0.05
5        Product Mismatch_Issue     32        0.04
6         Product Quality_Issue     16        0.02
7                        Praise     13        0.02
8                Service_Praise     13        0.02
9                Quality_Praise     10        0.01
10  Product Satisfaction_Praise      9        0.01
11    Product Performance_Issue      9        0.01
12      Customer Service_Praise      7        0.01
13              Packaging_Issue      7        0.01
14        Recommendation_Praise      6        0.01
15                Product_Issue      6        0.01
16   Product Authenticity_Issue      5        0.01
17         Pro

### Second Half

In [14]:
agg_review_category_ratio_by_category_name(sh_df_reviews_no_null, "health_beauty")

Praise Review num:934
Praise Issue num:675
Issue ratio:0.37
Overall
                       category  count  percentage
0               Delivery_Praise    506        0.28
1                Delivery_Issue    326        0.18
2                Unclassifiable    194        0.11
3        Product Quality_Praise     93        0.05
4                Product_Praise     72        0.04
5        Product Mismatch_Issue     68        0.04
6                Service_Praise     36        0.02
7         Product Quality_Issue     27        0.01
8   Product Satisfaction_Praise     26        0.01
9        Customer Service_Issue     21        0.01
10      Customer Service_Praise     21        0.01
11              Packaging_Issue     17        0.01
12               Quality_Praise     16        0.01
13               Shipping_Issue     16        0.01
14         Product Damage_Issue     14        0.01
15                Product_Issue     12        0.01
16                  Order_Issue     10        0.01
17      Order 