## Table of Contents

#### 01. Importing Libraries
#### 02. Profiling Based on Age
#### 03. Profiling Based on Income
#### 04. Profiling Based on "Produce"
#### 05. Profiling Based on Number of Dependents
#### 06. Aggregating
#### 07. Export 

## 01. Importing Libraries

In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import os

In [2]:
# Defining path
path = r'/Users/kurtson/Desktop/Instacart Basket Analysis'

In [3]:
# Import data
df = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared Data','df_regions.pkl'))

## 02. Profiling Based on Age

In [4]:
#create age group flags
df.loc[df['age']<35,'age_group']='Young Age'

In [5]:
df.loc[(df['age']>=35) & (df['age']<55),'age_group']='Middle Age'

In [6]:
df.loc[df['age']>=55,'age_group']='Old Age'

In [7]:
#check frequency
df['age_group'].value_counts(dropna=False)

age_group
Old Age       13604145
Middle Age    10173928
Young Age      8627968
Name: count, dtype: int64

In [8]:
#create day of week flags
df.loc[df['order_dow'] == 0, 'order_dow'] = 'Saturday'

In [9]:
df.loc[df['order_dow'] == 1, 'order_dow'] = 'Sunday'

In [10]:
df.loc[df['order_dow'] == 2, 'order_dow'] = 'Monday'

In [11]:
df.loc[df['order_dow'] == 3, 'order_dow'] = 'Tuesday'

In [12]:
df.loc[df['order_dow'] == 4, 'order_dow'] = 'Wednesday'

In [13]:
df.loc[df['order_dow'] == 5, 'order_dow'] = 'Thursday'

In [14]:
df.loc[df['order_dow'] == 6, 'order_dow'] = 'Friday'

## 03. Profiling Based on Income

In [15]:
#create flag for income groups
df.loc[df['income']<75000,'Income_Groups']= 'lower class'

In [16]:
df.loc[(df['income']>=75000)&(df['income']<150000),'Income_Groups']= 'middle class'

In [17]:
df.loc[(df['income']>=150000),'Income_Groups']= 'upper class'

In [18]:
df['Income_Groups'].value_counts(dropna=False)

Income_Groups
middle class    17911161
lower class     10443165
upper class      4051715
Name: count, dtype: int64

## 04. Profiling Based on "Produce"

In [19]:
# creating a flag to profile produce buyers

df.loc[df['department_id']==4,'produce_buyers']='Produce Buyer'

In [20]:
#check frequency
df['produce_buyers'].value_counts(dropna=False)

produce_buyers
NaN              22926750
Produce Buyer     9479291
Name: count, dtype: int64

## 05. Profiling Based on Number of Dependents

In [21]:
#create flag for parents
df.loc[df['n_dependants'] >0, 'parent_group'] = 'Parent'

In [22]:
df.loc[df['n_dependants'] ==0, 'parent_group'] = 'No Children'

In [23]:
# Check frequency 

df['parent_group'].value_counts(dropna=False)

parent_group
Parent         24308182
No Children     8097859
Name: count, dtype: int64

In [24]:
#create flag for prices
df.loc[df['prices'] > 15, 'price_range_loc'] = 'High-range product'

In [25]:
df.loc[(df['prices'] <= 15) & (df['prices'] > 5), 'price_range_loc'] = 'Mid-range product' 

In [26]:
df.loc[df['prices'] <= 5, 'price_range_loc'] = 'Low-range product'

In [27]:
df['price_range_loc'].value_counts(dropna=False)

price_range_loc
Mid-range product     21861997
Low-range product     10126366
High-range product      417678
Name: count, dtype: int64

## 06. Aggregating

In [28]:
# Create Parent Profiles
df.loc[(df['age_group'] == 'Old Age') & (df['parent_group'] == 'Parent'), 'parent_profile'] = 'Older Parent'

In [29]:
df.loc[(df['age_group'] == 'Middle Age') & (df['parent_group'] == 'Parent'), 'parent_profile'] = 'Middle Aged Parent'

In [30]:
df.loc[(df['age_group'] == 'Young Age') & (df['parent_group'] == 'Parent'), 'parent_profile'] = 'Younger Parent'

In [31]:
df.loc[(df['age_group'] == 'Old Age') & (df['parent_group'] == 'No Children'), 'parent_profile'] = 'Older No Kids'

In [32]:
df.loc[(df['age_group'] == 'Middle Age') & (df['parent_group'] == 'No Children'), 'parent_profile'] = 'Middle Aged No Kids'

In [33]:
df.loc[(df['age_group'] == 'Young Age') & (df['parent_group'] == 'No Children'), 'parent_profile'] = 'Younger No Kids'

In [34]:
#check frequency
df['parent_profile'].value_counts(dropna=False)

parent_profile
Older Parent           10175407
Middle Aged Parent      7661830
Younger Parent          6470945
Older No Kids           3428738
Middle Aged No Kids     2512098
Younger No Kids         2157023
Name: count, dtype: int64

## 07. Export

In [35]:
#export data with all flags 
df.to_pickle(os.path.join(path, '02 Data','Prepared Data', 'df_flags.pkl'))