# 4.8 Grouping Data & Aggregating Variables.ipynb — TOC
	1.	Setup (imports + paths)
	2.	Exercise
        2.1 Create + check dataframe
        2.2 Grouping with pandas
        2.3 Aggregating with agg()
        2.4 Aggregating with transform()
        2.5 Deriving columns with loc()
	3.	Task
        3.1 Run aggregations on full dataset
        3.2 Investigate price outliers
        3.3 Decision / handling for outliers (flag/exclude)
	4.	Outputs / notes

# Importing

In [249]:
# Import libraries
import pandas as pd
import numpy as np
import os

In [250]:
# Import datasets
path = r'/Users/spencer/Documents/Career Foundry/Data Immersion/4 Python Fundamentals for Data Analysts/Instacart Basket Analysis'
ords_prods_merge = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared Data', 'orders_products_merged_updated.pkl'))

# Exercise

## Create and check dataframe

In [251]:
# Create subset of first 1M
df = ords_prods_merge[:1000000]

In [252]:
df.shape

(1000000, 18)

In [253]:
df.head()

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,first_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,prices,price_label,busiest_day,busiest_days,busiest_period_of_day
0,2539329,1,1,2,8,,True,196,1,0,Soda,77.0,7.0,9.0,Mid-range product,Regularly busy,Regularly busy,Average orders
1,2539329,1,1,2,8,,True,14084,2,0,Organic Unsweetened Vanilla Almond Milk,91.0,16.0,12.5,Mid-range product,Regularly busy,Regularly busy,Average orders
2,2539329,1,1,2,8,,True,12427,3,0,Original Beef Jerky,23.0,19.0,4.4,Low-range product,Regularly busy,Regularly busy,Average orders
3,2539329,1,1,2,8,,True,26088,4,0,Aged White Cheddar Popcorn,23.0,19.0,4.7,Low-range product,Regularly busy,Regularly busy,Average orders
4,2539329,1,1,2,8,,True,26405,5,0,XL Pick-A-Size Paper Towel Rolls,54.0,17.0,1.0,Low-range product,Regularly busy,Regularly busy,Average orders


## Grouping Data with pandas

You should always use the groupby() function as part of a series of steps, namely, the following:

1. Split the data into groups based on some criteria.
2. Apply a function to each group separately.
3. Combine the results into a dataframe or alternative data structure or create a new column in the current dataframe.

### Split data into groups

In [254]:
df.groupby('product_name')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x149d1c110>

### Aggregating Data with agg()


#### Performing a Single Aggregation

In [255]:
df.groupby('department_id').agg({'order_number': ['mean']})

Unnamed: 0_level_0,order_number
Unnamed: 0_level_1,mean
department_id,Unnamed: 1_level_2
1.0,14.793523
2.0,17.091743
3.0,17.920595
4.0,17.893164
5.0,15.21427
6.0,15.382228
7.0,17.699986
8.0,16.485269
9.0,15.965921
10.0,20.091818


In [256]:
# Same result without agg() function
df.groupby('department_id')['order_number'].mean()

department_id
1.0     14.793523
2.0     17.091743
3.0     17.920595
4.0     17.893164
5.0     15.214270
6.0     15.382228
7.0     17.699986
8.0     16.485269
9.0     15.965921
10.0    20.091818
11.0    16.485395
12.0    15.617735
13.0    16.485052
14.0    17.523474
15.0    15.690354
16.0    18.015014
17.0    16.155822
18.0    19.606536
19.0    17.630640
20.0    17.138204
21.0    21.996844
Name: order_number, dtype: float64

#### Performing Multiple Aggregations


In [257]:
df.groupby('department_id').agg({'order_number': ['mean', 'min', 'max']})

Unnamed: 0_level_0,order_number,order_number,order_number
Unnamed: 0_level_1,mean,min,max
department_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1.0,14.793523,1,99
2.0,17.091743,1,98
3.0,17.920595,1,99
4.0,17.893164,1,99
5.0,15.21427,1,99
6.0,15.382228,1,99
7.0,17.699986,1,99
8.0,16.485269,1,91
9.0,15.965921,1,99
10.0,20.091818,1,99


### Aggregating Data with transform()


1. Split the data into groups based on the “user_id” column.
2. Apply the transform() function on the “order_number” column to generate the maximum orders for each user.
3. Create a new column, “max_order,” into which you’ll place the results of your aggregation.

In [258]:
# All 3 steps in 1 code
ords_prods_merge['max_order'] = ords_prods_merge.groupby(['user_id'])['order_number'].transform("max")

In [259]:
ords_prods_merge.head(100)

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,first_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,prices,price_label,busiest_day,busiest_days,busiest_period_of_day,max_order
0,2539329,1,1,2,8,,True,196,1,0,Soda,77.0,7.0,9.0,Mid-range product,Regularly busy,Regularly busy,Average orders,10
1,2539329,1,1,2,8,,True,14084,2,0,Organic Unsweetened Vanilla Almond Milk,91.0,16.0,12.5,Mid-range product,Regularly busy,Regularly busy,Average orders,10
2,2539329,1,1,2,8,,True,12427,3,0,Original Beef Jerky,23.0,19.0,4.4,Low-range product,Regularly busy,Regularly busy,Average orders,10
3,2539329,1,1,2,8,,True,26088,4,0,Aged White Cheddar Popcorn,23.0,19.0,4.7,Low-range product,Regularly busy,Regularly busy,Average orders,10
4,2539329,1,1,2,8,,True,26405,5,0,XL Pick-A-Size Paper Towel Rolls,54.0,17.0,1.0,Low-range product,Regularly busy,Regularly busy,Average orders,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,738281,2,4,2,10,8.0,False,21150,13,0,Fire Grilled Steak Bowl,38.0,1.0,5.9,Mid-range product,Regularly busy,Regularly busy,Most orders,14
96,1673511,2,5,3,11,8.0,False,47144,1,0,Unsweetened Original Almond Breeze Almond Milk,91.0,16.0,14.0,Mid-range product,Regularly busy,Least busy,Most orders,14
97,1673511,2,5,3,11,8.0,False,5322,2,0,Gluten Free Dark Chocolate Chunk Chewy with a ...,3.0,19.0,2.9,Low-range product,Regularly busy,Least busy,Most orders,14
98,1673511,2,5,3,11,8.0,False,17224,3,0,Oats & Honey Gluten Free Granola,3.0,19.0,1.6,Low-range product,Regularly busy,Least busy,Most orders,14


### Deriving Columns with loc()

In [260]:
# Create loyalty flags based on max orders
ords_prods_merge.loc[ords_prods_merge['max_order'] > 40, 'loyalty_flag'] = 'Loyal customer'
ords_prods_merge.loc[(ords_prods_merge['max_order'] <= 40) & (ords_prods_merge['max_order'] > 10), 'loyalty_flag'] = 'Regular customer'
ords_prods_merge.loc[ords_prods_merge['max_order'] <= 10, 'loyalty_flag'] = 'New customer'

In [261]:
# Check values
ords_prods_merge['loyalty_flag'].value_counts()

loyalty_flag
Regular customer    15891172
Loyal customer      10293959
New customer         6249358
Name: count, dtype: int64

In [262]:
# Check head() of only columns of interest using df['column']
ords_prods_merge[['user_id', 'loyalty_flag', 'order_number']].head(60)

Unnamed: 0,user_id,loyalty_flag,order_number
0,1,New customer,1
1,1,New customer,1
2,1,New customer,1
3,1,New customer,1
4,1,New customer,1
5,1,New customer,2
6,1,New customer,2
7,1,New customer,2
8,1,New customer,2
9,1,New customer,2


# Task

In [263]:
# Task 1 verification
ords_prods_merge.head()

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,first_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,prices,price_label,busiest_day,busiest_days,busiest_period_of_day,max_order,loyalty_flag
0,2539329,1,1,2,8,,True,196,1,0,Soda,77.0,7.0,9.0,Mid-range product,Regularly busy,Regularly busy,Average orders,10,New customer
1,2539329,1,1,2,8,,True,14084,2,0,Organic Unsweetened Vanilla Almond Milk,91.0,16.0,12.5,Mid-range product,Regularly busy,Regularly busy,Average orders,10,New customer
2,2539329,1,1,2,8,,True,12427,3,0,Original Beef Jerky,23.0,19.0,4.4,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer
3,2539329,1,1,2,8,,True,26088,4,0,Aged White Cheddar Popcorn,23.0,19.0,4.7,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer
4,2539329,1,1,2,8,,True,26405,5,0,XL Pick-A-Size Paper Towel Rolls,54.0,17.0,1.0,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer


In [264]:
# 2. Create aggregated mean of “order_number” column grouped by “department_id”
ords_prods_merge.groupby('department_id')['order_number'].mean()

department_id
1.0     15.457838
2.0     17.277920
3.0     17.170395
4.0     17.811403
5.0     15.215751
6.0     16.439806
7.0     17.225802
8.0     15.340650
9.0     15.895474
10.0    20.197148
11.0    16.170638
12.0    15.887671
13.0    16.583536
14.0    16.773669
15.0    16.165037
16.0    17.666284
17.0    15.694469
18.0    19.310514
19.0    17.177343
20.0    16.473447
21.0    22.902379
Name: order_number, dtype: float64

In [265]:
# 3. Analyze the result
# Compare to subset results:
df.groupby('department_id')['order_number'].mean()

department_id
1.0     14.793523
2.0     17.091743
3.0     17.920595
4.0     17.893164
5.0     15.214270
6.0     15.382228
7.0     17.699986
8.0     16.485269
9.0     15.965921
10.0    20.091818
11.0    16.485395
12.0    15.617735
13.0    16.485052
14.0    17.523474
15.0    15.690354
16.0    18.015014
17.0    16.155822
18.0    19.606536
19.0    17.630640
20.0    17.138204
21.0    21.996844
Name: order_number, dtype: float64

## The aggregations results for the entire dataframe are similar to that of the subset. The departments with the highest mean order remains the same (dept 21). 

In [266]:
# 4. Follow the instructions in the Exercise for creating a loyalty flag for existing customers using the transform() and loc() functions.
# Already completed:
ords_prods_merge[['user_id', 'loyalty_flag', 'order_number']].head()

Unnamed: 0,user_id,loyalty_flag,order_number
0,1,New customer,1
1,1,New customer,1
2,1,New customer,1
3,1,New customer,1
4,1,New customer,1


In [267]:
# 5.The marketing team at Instacart wants to know whether there’s a difference between the spending habits of the three types of customers you identified.
ords_prods_merge.groupby('loyalty_flag')['prices'].mean()

loyalty_flag
Loyal customer      10.386384
New customer        13.294943
Regular customer    12.495916
Name: prices, dtype: float64

## This seems a little high on average for a normal grocery store item. Checking some stats:

In [268]:
ords_prods_merge['prices'].describe()

count    3.240429e+07
mean     1.198038e+01
std      4.956598e+02
min      1.000000e+00
25%      4.200000e+00
50%      7.400000e+00
75%      1.130000e+01
max      9.999900e+04
Name: prices, dtype: float64

## -> $99,999 is crazy high for a grocery store item. Doing some investigating

In [269]:
ords_prods_merge.loc[ords_prods_merge['prices'] > 100]

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,first_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,prices,price_label,busiest_day,busiest_days,busiest_period_of_day,max_order,loyalty_flag
1576,912404,17,12,2,14,5.0,False,21553,5,0,Lowfat 2% Milkfat Cottage Cheese,108.0,16.0,14900.0,High-range product,Regularly busy,Regularly busy,Most orders,40,Regular customer
1638,603376,17,22,6,16,4.0,False,21553,3,1,Lowfat 2% Milkfat Cottage Cheese,108.0,16.0,14900.0,High-range product,Regularly busy,Regularly busy,Most orders,40,Regular customer
16534,3264360,135,2,2,21,13.0,False,21553,6,0,Lowfat 2% Milkfat Cottage Cheese,108.0,16.0,14900.0,High-range product,Regularly busy,Regularly busy,Average orders,4,New customer
16540,892534,135,3,0,8,12.0,False,21553,3,1,Lowfat 2% Milkfat Cottage Cheese,108.0,16.0,14900.0,High-range product,Busiest day,Busiest days,Average orders,4,New customer
53711,229704,342,8,1,19,30.0,False,21553,9,0,Lowfat 2% Milkfat Cottage Cheese,108.0,16.0,14900.0,High-range product,Regularly busy,Busiest days,Average orders,16,Regular customer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32350064,3172853,205650,18,1,9,7.0,False,21553,17,1,Lowfat 2% Milkfat Cottage Cheese,108.0,16.0,14900.0,High-range product,Regularly busy,Busiest days,Most orders,25,Regular customer
32377297,2504315,205818,3,5,15,3.0,False,21553,13,0,Lowfat 2% Milkfat Cottage Cheese,108.0,16.0,14900.0,High-range product,Regularly busy,Regularly busy,Most orders,25,Regular customer
32377307,1108388,205818,5,4,5,1.0,False,21553,5,1,Lowfat 2% Milkfat Cottage Cheese,108.0,16.0,14900.0,High-range product,Least busy,Least busy,Fewest orders,25,Regular customer
32410137,1916142,206049,1,2,17,,True,21553,2,0,Lowfat 2% Milkfat Cottage Cheese,108.0,16.0,14900.0,High-range product,Regularly busy,Regularly busy,Average orders,5,New customer


In [270]:
ords_prods_merge.loc[ords_prods_merge['prices'] > 90000]

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,first_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,prices,price_label,busiest_day,busiest_days,busiest_period_of_day,max_order,loyalty_flag
129036,183964,873,3,0,10,7.0,False,33664,11,0,2 % Reduced Fat Milk,84.0,16.0,99999.0,High-range product,Busiest day,Busiest days,Most orders,8,New customer
129049,1851256,873,4,6,12,13.0,False,33664,8,1,2 % Reduced Fat Milk,84.0,16.0,99999.0,High-range product,Regularly busy,Regularly busy,Most orders,8,New customer
284539,1915696,1893,1,5,17,,True,33664,10,0,2 % Reduced Fat Milk,84.0,16.0,99999.0,High-range product,Regularly busy,Regularly busy,Average orders,6,New customer
284550,2763293,1893,2,4,16,13.0,False,33664,6,1,2 % Reduced Fat Milk,84.0,16.0,99999.0,High-range product,Least busy,Least busy,Most orders,6,New customer
284573,2564805,1893,4,1,17,30.0,False,33664,3,1,2 % Reduced Fat Milk,84.0,16.0,99999.0,High-range product,Regularly busy,Busiest days,Average orders,6,New customer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32109300,2249946,204099,29,0,8,4.0,False,33664,1,0,2 % Reduced Fat Milk,84.0,16.0,99999.0,High-range product,Busiest day,Busiest days,Average orders,39,Regular customer
32109317,2363282,204099,31,0,9,2.0,False,33664,1,1,2 % Reduced Fat Milk,84.0,16.0,99999.0,High-range product,Busiest day,Busiest days,Most orders,39,Regular customer
32155162,3181945,204395,13,3,15,8.0,False,33664,25,0,2 % Reduced Fat Milk,84.0,16.0,99999.0,High-range product,Regularly busy,Least busy,Most orders,15,Regular customer
32284597,2486215,205227,7,3,20,4.0,False,33664,8,0,2 % Reduced Fat Milk,84.0,16.0,99999.0,High-range product,Regularly busy,Least busy,Average orders,12,Regular customer


## This is either free-range cottage cheese and milk imported from space, or there's a pricing error. Assume pricing error

In [271]:
# Turn values > 100 into NaNs
ords_prods_merge.loc[ords_prods_merge['prices'] > 100, 'prices'] = np.nan

In [272]:
# Check for outliers again
ords_prods_merge.loc[ords_prods_merge['prices'] > 50]

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,first_order,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,prices,price_label,busiest_day,busiest_days,busiest_period_of_day,max_order,loyalty_flag


In [273]:
# Complete task 5 again with cleaned data
ords_prods_merge.groupby('loyalty_flag')['prices'].mean()

loyalty_flag
Loyal customer      7.773606
New customer        7.801332
Regular customer    7.798362
Name: prices, dtype: float64

In [274]:
# 6. Create a spending flag for each user based on the average price across all their orders
ords_prods_merge['user_avg_item_price'] = ords_prods_merge.groupby(['user_id'])['prices'].transform("mean")

In [275]:
ords_prods_merge.head(100)

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,first_order,product_id,add_to_cart_order,reordered,...,aisle_id,department_id,prices,price_label,busiest_day,busiest_days,busiest_period_of_day,max_order,loyalty_flag,user_avg_item_price
0,2539329,1,1,2,8,,True,196,1,0,...,77.0,7.0,9.0,Mid-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797
1,2539329,1,1,2,8,,True,14084,2,0,...,91.0,16.0,12.5,Mid-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797
2,2539329,1,1,2,8,,True,12427,3,0,...,23.0,19.0,4.4,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797
3,2539329,1,1,2,8,,True,26088,4,0,...,23.0,19.0,4.7,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797
4,2539329,1,1,2,8,,True,26405,5,0,...,54.0,17.0,1.0,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,738281,2,4,2,10,8.0,False,21150,13,0,...,38.0,1.0,5.9,Mid-range product,Regularly busy,Regularly busy,Most orders,14,Regular customer,7.515897
96,1673511,2,5,3,11,8.0,False,47144,1,0,...,91.0,16.0,14.0,Mid-range product,Regularly busy,Least busy,Most orders,14,Regular customer,7.515897
97,1673511,2,5,3,11,8.0,False,5322,2,0,...,3.0,19.0,2.9,Low-range product,Regularly busy,Least busy,Most orders,14,Regular customer,7.515897
98,1673511,2,5,3,11,8.0,False,17224,3,0,...,3.0,19.0,1.6,Low-range product,Regularly busy,Least busy,Most orders,14,Regular customer,7.515897


In [276]:
# Create spending flags based on avg order price
ords_prods_merge.loc[ords_prods_merge['user_avg_item_price'] < 10, 'spending_habit'] = 'Low spender'
ords_prods_merge.loc[ords_prods_merge['user_avg_item_price'] >= 10, 'spending_habit'] = 'High spender'

In [277]:
# Check values
ords_prods_merge['spending_habit'].value_counts()

spending_habit
Low spender     32314611
High spender      119878
Name: count, dtype: int64

In [278]:
# 7. Create an order frequency flag that marks the regularity of a user’s ordering behavior
ords_prods_merge['median_days_between_orders'] = ords_prods_merge.groupby(['user_id'])['days_since_prior_order'].transform("median")

In [279]:
ords_prods_merge.head(100)

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,first_order,product_id,add_to_cart_order,reordered,...,prices,price_label,busiest_day,busiest_days,busiest_period_of_day,max_order,loyalty_flag,user_avg_item_price,spending_habit,median_days_between_orders
0,2539329,1,1,2,8,,True,196,1,0,...,9.0,Mid-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5
1,2539329,1,1,2,8,,True,14084,2,0,...,12.5,Mid-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5
2,2539329,1,1,2,8,,True,12427,3,0,...,4.4,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5
3,2539329,1,1,2,8,,True,26088,4,0,...,4.7,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5
4,2539329,1,1,2,8,,True,26405,5,0,...,1.0,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,738281,2,4,2,10,8.0,False,21150,13,0,...,5.9,Mid-range product,Regularly busy,Regularly busy,Most orders,14,Regular customer,7.515897,Low spender,13.0
96,1673511,2,5,3,11,8.0,False,47144,1,0,...,14.0,Mid-range product,Regularly busy,Least busy,Most orders,14,Regular customer,7.515897,Low spender,13.0
97,1673511,2,5,3,11,8.0,False,5322,2,0,...,2.9,Low-range product,Regularly busy,Least busy,Most orders,14,Regular customer,7.515897,Low spender,13.0
98,1673511,2,5,3,11,8.0,False,17224,3,0,...,1.6,Low-range product,Regularly busy,Least busy,Most orders,14,Regular customer,7.515897,Low spender,13.0


In [280]:
# Create user frequency flags based on median days since prior order
ords_prods_merge.loc[ords_prods_merge['median_days_between_orders'] > 20, 'order_frequency_flag'] = 'Non-frequent customer'
ords_prods_merge.loc[(ords_prods_merge['median_days_between_orders'] <= 20) & (ords_prods_merge['median_days_between_orders'] > 10), 'order_frequency_flag'] = 'Regular customer'
ords_prods_merge.loc[ords_prods_merge['median_days_between_orders'] <= 10, 'order_frequency_flag'] = 'Frequent customer'

In [281]:
ords_prods_merge['order_frequency_flag'].value_counts()

order_frequency_flag
Frequent customer        21577653
Regular customer          7217190
Non-frequent customer     3639646
Name: count, dtype: int64

In [282]:
ords_prods_merge.head()

Unnamed: 0,order_id,user_id,order_number,orders_day_of_week,order_hour_of_day,days_since_prior_order,first_order,product_id,add_to_cart_order,reordered,...,price_label,busiest_day,busiest_days,busiest_period_of_day,max_order,loyalty_flag,user_avg_item_price,spending_habit,median_days_between_orders,order_frequency_flag
0,2539329,1,1,2,8,,True,196,1,0,...,Mid-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5,Non-frequent customer
1,2539329,1,1,2,8,,True,14084,2,0,...,Mid-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5,Non-frequent customer
2,2539329,1,1,2,8,,True,12427,3,0,...,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5,Non-frequent customer
3,2539329,1,1,2,8,,True,26088,4,0,...,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5,Non-frequent customer
4,2539329,1,1,2,8,,True,26405,5,0,...,Low-range product,Regularly busy,Regularly busy,Average orders,10,New customer,6.367797,Low spender,20.5,Non-frequent customer


In [283]:
# 9. Export your dataframe as a pickle file and store it correctly in your “Prepared Data” folder.
ords_prods_merge.to_pickle(os.path.join(path, '02 Data','Prepared Data', 'orders_products_aggregated.pkl'))