In [17]:
#import basic Libraries
import pandas as pd
import numpy as np
from tqdm import tqdm

#Importing Sklearn
from sklearn.preprocessing import StandardScaler, OneHotEncoder

#Import Market Basket Models
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Cleaning Dataset

In [18]:
opt = pd.read_csv('../data/hidem_ord.csv')
opt = opt.drop('Unnamed: 0', axis=1)

In [19]:
opt

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,department,aisle,hi_dem,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,1,11109,2,1,Organic 4% Milk Fat Whole Milk Cottage Cheese,108,16,dairy eggs,other creams cheeses,True,112108,train,4,4,10,9.0
1,1,22035,8,1,Organic Whole String Cheese,21,16,dairy eggs,packaged cheese,True,112108,train,4,4,10,9.0
2,36,19660,2,1,Spring Water,115,7,beverages,water seltzer sparkling water,True,79431,train,23,6,18,30.0
3,36,48679,7,1,Organic Garnet Sweet Potato (Yam),83,4,produce,fresh vegetables,True,79431,train,23,6,18,30.0
4,36,46979,8,1,Asparagus,83,4,produce,fresh vegetables,True,79431,train,23,6,18,30.0
5,38,21616,4,1,Organic Baby Arugula,123,4,produce,packaged vegetables fruits,True,42756,train,6,6,16,24.0
6,96,40706,3,1,Organic Grape Tomatoes,123,4,produce,packaged vegetables fruits,True,17227,train,7,6,20,30.0
7,98,43654,3,1,Whole Milk Greek Blended Vanilla Bean Yogurt,120,16,dairy eggs,yogurt,True,56463,train,41,3,8,14.0
8,98,35951,8,1,Organic Unsweetened Almond Milk,91,16,dairy eggs,soy lactosefree,True,56463,train,41,3,8,14.0
9,98,9896,10,1,Uncured Applewood Smoked Bacon,106,12,meat seafood,hot dogs bacon sausage,True,56463,train,41,3,8,14.0


In [20]:
#adding food group for non food departments as 'other'
non_food_dep = ['household','pets','personal care', 'alcohol','beverages', 'babies', 'missing']
#adding food group for departments which are not ambiguous to proper food group
food_dep = {'bakery':'sugar, snacks, and bakeries','meat seafood':'proteins', 'canned goods':'processed foods'}

for (i, row) in opt.iterrows():
    if row['department'] in non_food_dep:
        opt.at[i,'foodgroup'] = 'others'
    elif row['department'] in food_dep:
        opt.at[i,'foodgroup'] = food_dep[row['department']]

In [21]:
#converting the foodgroup to string to preserve order.
opt['foodgroup'] = opt['foodgroup'].astype(str)

This project aims to implement recommender specifically for diet. Therefore, information related to diet could be different with other diet research and not percise. 

This project uses 8 food groups, such as #carbohydrates #proteins #vegetables and fruits #diary products and alternatives #fat #processed foods #sugar, sweets, and bakeries #others.

In [22]:
#adding food group for aisle
food_aisle_carbohydrates = ['bulk grains rice dried goods', 'cereal', 'doughs gelatins bake mixes', 'dry pasta', 'fresh pasta', 'frozen breads doughs', 'grains rice dried goods', 'granola','hot cereal pancake mixes']
food_aisle_protein = ['eggs','frozen meat seafood', 'lunch meat','tofu meat alternatives']
food_aisle_vegetables_and_fruits = ['bulk dried fruits vegetables', 'fresh fruits', 'fresh herbs', 'fresh vegetables', 'frozen juice', 'packaged vegetables fruits', 'pickled goods olives', 'packaged produce']
food_aisle_diary_products_and_alternatives = ['cream', 'other creams cheeses', 'packaged cheese','soy lactosefree', 'specialty cheeses', 'yogurt', 'milk']
food_aisle_fat = ['butter', 'oils vinegars', 'nuts seeds dried fruit']
food_aisle_processed_foods = ['frozen meals', 'frozen pizza', 'instant foods', 'prepared meals', 'chips pretzels', 'crackers', 'energy granola bars', 'fruit vegetable snacks', 'breakfast bars pastries', 'frozen appetizers sides', 'prepared soups salads', 'frozen produce', 'frozen vegan vegetarian']
food_aisle_sugar_sweets_and_bakeries = ['frozen dessert','honeys syrups nectars', 'ice cream ice', 'refrigerated pudding desserts', 'candy chocolate', 'cookies cakes', 'ice cream toppings', 'mint gum', 'popcorn jerky', 'trail mix snack mix', 'baking ingredients', 'frozen breakfast']

for (i, row) in opt.iterrows():
    if row['aisle'] in food_aisle_carbohydrates:
        opt.at[i,'foodgroup'] = 'carbohydrates'
    elif row['aisle'] in food_aisle_protein:
        opt.at[i,'foodgroup'] = 'proteins'
    elif row['aisle'] in food_aisle_vegetables_and_fruits:
        opt.at[i,'foodgroup'] = 'vegetables and fruits'
    elif row['aisle'] in food_aisle_diary_products_and_alternatives:
        opt.at[i,'foodgroup'] = 'diary products and alternatives'
    elif row['aisle'] in food_aisle_fat:
        opt.at[i,'foodgroup'] = 'fat'
    elif row['aisle'] in food_aisle_processed_foods:
        opt.at[i,'foodgroup'] = 'processed foods'
    elif row['aisle'] in food_aisle_sugar_sweets_and_bakeries:
        opt.at[i,'foodgroup'] = 'sugar sweets and bakeries'
    else:
        opt.at[i,'foodgroup'] = 'others'

In [23]:
unique_pair = opt[opt.foodgroup == 'nan'].groupby(['aisle','department']).size()

In [24]:
unique_pair

Series([], dtype: int64)

In [25]:
opt[opt.foodgroup == 'nan']

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,department,aisle,hi_dem,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,foodgroup


In [27]:
save_file = opt.to_csv('../data/merged_data_for_diet.csv')

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,department,aisle,hi_dem,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,foodgroup
0,1,11109,2,1,Organic 4% Milk Fat Whole Milk Cottage Cheese,108,16,dairy eggs,other creams cheeses,True,112108,train,4,4,10,9.0,diary products and alternatives
1,1,22035,8,1,Organic Whole String Cheese,21,16,dairy eggs,packaged cheese,True,112108,train,4,4,10,9.0,diary products and alternatives
2,36,19660,2,1,Spring Water,115,7,beverages,water seltzer sparkling water,True,79431,train,23,6,18,30.0,others
3,36,48679,7,1,Organic Garnet Sweet Potato (Yam),83,4,produce,fresh vegetables,True,79431,train,23,6,18,30.0,vegetables and fruits
4,36,46979,8,1,Asparagus,83,4,produce,fresh vegetables,True,79431,train,23,6,18,30.0,vegetables and fruits
5,38,21616,4,1,Organic Baby Arugula,123,4,produce,packaged vegetables fruits,True,42756,train,6,6,16,24.0,vegetables and fruits
6,96,40706,3,1,Organic Grape Tomatoes,123,4,produce,packaged vegetables fruits,True,17227,train,7,6,20,30.0,vegetables and fruits
7,98,43654,3,1,Whole Milk Greek Blended Vanilla Bean Yogurt,120,16,dairy eggs,yogurt,True,56463,train,41,3,8,14.0,diary products and alternatives
8,98,35951,8,1,Organic Unsweetened Almond Milk,91,16,dairy eggs,soy lactosefree,True,56463,train,41,3,8,14.0,diary products and alternatives
9,98,9896,10,1,Uncured Applewood Smoked Bacon,106,12,meat seafood,hot dogs bacon sausage,True,56463,train,41,3,8,14.0,others
