### 🔹 Step 1: Setup & Imports

In [12]:
# Imports
import pandas as pd
import numpy as np
import os

# Define folder paths
base_path = r'C:\Users\moein\anaconda3\4 - Python Fundamentals\02 Data'
prepared_path = os.path.join(base_path, 'Prepared Data')

# Load the previously merged dataframe
file_path_1 = os.path.join(prepared_path, 'orders_products_combined.pkl')
orders_products_combined = pd.read_pickle(file_path_1)
file_path_2 = os.path.join(prepared_path, 'ords_prods_merge.pkl')
ords_prods_merge = pd.read_pickle(file_path_2)

In [13]:
orders_products_combined.columns

Index(['order_id', 'user_id', 'order_number', 'orders_day_of_week',
       'order_hour_of_day', 'days_since_prior_order', 'product_id',
       'add_to_cart_order', 'reordered', '_merge'],
      dtype='object')

In [14]:
ords_prods_merge.columns

Index(['order_id', 'user_id', 'order_number', 'orders_day_of_week',
       'order_hour_of_day', 'days_since_prior_order', 'product_id',
       'add_to_cart_order', 'reordered', '_merge', 'product_name', 'aisle_id',
       'department_id', 'prices', 'merge_flag'],
      dtype='object')

### 🔹 Step 2: Create price_label Column

In [15]:
# Create price_label column based on price ranges
ords_prods_merge.loc[ords_prods_merge['prices'] <= 5, 'price_label'] = 'Low-range product'
ords_prods_merge.loc[(ords_prods_merge['prices'] > 5) & (ords_prods_merge['prices'] <= 15), 'price_label'] = 'Mid-range product'
ords_prods_merge.loc[ords_prods_merge['prices'] > 15, 'price_label'] = 'High-range product'

### 🔹 Step 3: Identify Top and Bottom Days by Order Volume

In [17]:
# Count orders by day of week
day_counts = ords_prods_merge['orders_day_of_week'].value_counts().sort_values(ascending=False)
print(day_counts)

orders_day_of_week
0    6209632
1    5665830
6    4500246
2    4217766
5    4209449
3    3844096
4    3787193
Name: count, dtype: int64


In [18]:
# Define busiest and slowest days (replace with actual days from Step 3 output)
busiest_days = [0, 1]   # Example: Saturday & Sunday
slowest_days = [4, 5]   # Example: Wednesday & Thursday

# Assign new labels
ords_prods_merge['busiest_days'] = 'Regular days'
ords_prods_merge.loc[ords_prods_merge['orders_day_of_week'].isin(busiest_days), 'busiest_days'] = 'Busiest days'
ords_prods_merge.loc[ords_prods_merge['orders_day_of_week'].isin(slowest_days), 'busiest_days'] = 'Slowest days'

# Check new column distribution
print(ords_prods_merge['busiest_days'].value_counts(dropna=False))

busiest_days
Regular days    12562108
Busiest days    11875462
Slowest days     7996642
Name: count, dtype: int64


### 🔹 Step 5: Identify Order Volume by Hour

In [19]:
# Count frequency of orders by hour
hour_counts = ords_prods_merge['order_hour_of_day'].value_counts().sort_values(ascending=False)
print(hour_counts)

order_hour_of_day
10    2764390
11    2738585
14    2691563
15    2664522
13    2663272
12    2620800
16    2537469
9     2456661
17    2089452
8     1719952
18    1637922
19    1259382
20     977017
7      891928
21     796362
22     634737
23     402612
6      290770
0      218942
1      115786
5       88057
2       69431
4       53283
3       51317
Name: count, dtype: int64


### 🔹 Step 6: Create busiest_period_of_day Column

In [20]:
# Define busiest and quietest hours (adjust based on Step 5 output)
most_orders = [10, 11, 14, 15]   # Example busiest hours
fewest_orders = [3, 4, 5, 6]     # Example quietest hours

# Assign labels
ords_prods_merge['busiest_period_of_day'] = 'Average orders'
ords_prods_merge.loc[ords_prods_merge['order_hour_of_day'].isin(most_orders), 'busiest_period_of_day'] = 'Most orders'
ords_prods_merge.loc[ords_prods_merge['order_hour_of_day'].isin(fewest_orders), 'busiest_period_of_day'] = 'Fewest orders'

# Check new column distribution
print(ords_prods_merge['busiest_period_of_day'].value_counts(dropna=False))

busiest_period_of_day
Average orders    21091725
Most orders       10859060
Fewest orders       483427
Name: count, dtype: int64


### 🔹 Step 7: Export Updated DataFrame

In [21]:
# Export the updated dataframe
output_path = os.path.join(base_path, 'ords_prods_merge.pkl')
ords_prods_merge.to_pickle(output_path)