## Final Project

#### Recipe generator

In [512]:
import pandas as pd            # Read the csv as df
import ast                     # Change lists that are strings to actual lists

import dotenv                  # Load the data into SQL
import os                      # Load the data into SQL
import sqlalchemy as alch      # Load the data into SQL

import random as rd            # Pick the recipes
import warnings                # Makes it readable without 'errors'
warnings.filterwarnings("ignore")

In [513]:
raw_recipes = pd.read_csv('./recipes/RAW_recipes.csv')

In [514]:
len(raw_recipes)

231637

In [515]:
raw_recipes.sample()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
44663,chicken lickin good pork chops,35097,380,49611,2002-07-27,"['weeknight', 'time-to-make', 'course', 'main-...","[361.1, 39.0, 0.0, 65.0, 41.0, 41.0, 3.0]",5,"['dredge pork chops in mixture of flour , salt...",,"['lean pork chops', 'flour', 'salt', 'garlic p...",6


In [516]:
raw_recipes.tags = raw_recipes.tags.map(lambda x: ast.literal_eval(x))
raw_recipes.steps = raw_recipes.steps.map(lambda x: ast.literal_eval(x))
raw_recipes.ingredients = raw_recipes.ingredients.map(lambda x: ast.literal_eval(x))

In [148]:
# list_tags = list(filter(lambda x: 'main-dish' in x, raw_recipes.tags))
# len(list_tags)

In [517]:
raw_recipes["new"] = raw_recipes.tags.apply(lambda x: "main-dish" in x)

In [856]:
df = raw_recipes[raw_recipes.new].drop(columns='new').reset_index().drop(columns='index')

In [864]:
tag_list=[]
for row in df.tags:
    tag_list+=row
tag_list = list(set(tag_list))
tag_list.sort()

In [866]:
tag_list[:7]

['1-day-or-more',
 '15-minutes-or-less',
 '3-steps-or-less',
 '30-minutes-or-less',
 '4-hours-or-less',
 '5-ingredients-or-less',
 '60-minutes-or-less']

30 minutes or less seems the most reasonable time to add as a new column for the query

In [867]:
df["<30min"] = df.tags.apply(lambda x: "30-minutes-or-less" in x)

In [868]:
df["url"]=''
for index, row in df.iterrows():
    df["url"][index]= f"https://www.food.com/recipe/{row['name'].replace(' ','-')}-{row['id']}"

In [869]:
df.drop(columns=['contributor_id', 'submitted'], inplace=True)

In [870]:
df.shape[0]

71786

In [871]:
df.isna().sum()

name                0
id                  0
minutes             0
tags                0
nutrition           0
n_steps             0
steps               0
description      1546
ingredients         0
n_ingredients       0
<30min              0
url                 0
dtype: int64

In [872]:
ing_list=[]
for row in df.ingredients:
    ing_list+=row
ing_list = list(set(ing_list))
ing_list.sort()

In [873]:
len(ing_list)

9484

In [874]:
ing_list[:5]

['1% fat buttermilk',
 '1% fat cottage cheese',
 '1% low-fat milk',
 '10 inch low-fat flour tortillas',
 '10% cream']

In [875]:
milk = [i for i in ing_list if 'milk' in i and 'almond' not in i and 'coconut' not in i and 'non-dairy' not in i and 'oat' not in i and 'rice' not in i and 'soy' not in i]

In [876]:
milk[:5]

['1% fat buttermilk',
 '1% low-fat milk',
 '2% evaporated milk',
 '2% low-fat milk',
 '2% milk']

In [877]:
cheese = [i for i in ing_list if 'cheese' in i and 'non-dairy' not in i and 'soy' not in i and 'vegan' not in i and 'vegetarian' not in i]

In [878]:
cheese[:5]

['1% fat cottage cheese',
 '2% cheddar cheese',
 '2% fat cottage cheese',
 '2% mexican cheese blend',
 '2% mozzarella cheese']

In [879]:
cream = [i for i in ing_list if 'cream' in i and 'coconut' not in i and 'non-dairy' not in i and 'non-creamy' not in i and 'soy' not in i and 'vegan' not in i] 

In [880]:
cream[:5]

['10% cream',
 '15% cream',
 '18% table cream',
 '35% cream',
 '98% fat free condensed cream of celery soup']

In [881]:
butter = [i for i in ing_list if 'butter' in i and 'almond' not in i and 'apple' not in i and 'butterfly' not in i and 'butter substitute' not in i and 'butterball' not in i 
           and 'buttercup' not in i and 'butterfish' not in i and 'butterflied' not in i and 'butternut' not in i and "i can't believe it's not butter" not in i
            and 'butter beans' not in i and 'lettuce' not in i and 'peas' not in i and 'coconut' not in i and 'vegan' not in i]

In [882]:
butter[:5]

['1% fat buttermilk',
 'bread and butter pickles',
 'butter',
 'butter buds',
 'butter chicken paste']

In [883]:
yogurt = [i for i in ing_list if ('yogurt' in i or 'yoghurt' in i) and 'soy' not in i]

In [884]:
yogurt[:5]

['bulgarian yogurt',
 'fat free greek yogurt',
 'fat free pina colada yogurt',
 'fat-free key lime yogurt',
 'fat-free lemon yogurt']

**Dairy**
(No Fermented or Custard on the ingredient list)
- Butter
- Cheese
- Cream
- Milk
- Yogurt

In [885]:
dairy = butter + cheese + cream + milk + yogurt
dairy = list(set(dairy))
dairy.sort()

In [886]:
dairy [:5]

['1% fat buttermilk',
 '1% fat cottage cheese',
 '1% low-fat milk',
 '10% cream',
 '15% cream']

**Gluten**
- Barley
- Flour
- Lasagna
- Pasta
- Pizza
- Wheat

In [887]:
grain = [i for i in ing_list if ('wheat' in i or 'flour' in i or 'harina' in i or 'barley' in i or 'bread' in i or 'rye' in i
          or 'farro' in i or 'couscous' in i or 'kamut' in i or 'bagel' in i) 
          and 'gluten-free' not in i and 'polenta' not in i and 'rice' not in i and 'buckwheat' not in i and 'chestnut'
          not in i and 'coconut' not in i and 'corn' not in i and 'chickpea' not in i and 'oat' not in i and 'bean' not in i
          and 'almond' not in i and 'urad dal' not in i and 'tapioca' not in i and 'soy' not in i and 'amaranth' not in i
          and 'besan' not in i and 'gram' not in i and 'garbanzo' not in i and 'sorghum' not in i and 'potato' not in i
          and 'millet' not in i and 'manioc' not in i and 'sweetbreads' not in i and 'breadfruit' not in i and 'injera bread' not in i
         ]

In [888]:
grain[:5]

['10 inch low-fat flour tortillas',
 '10-inch flour tortilla',
 '10-inch flour tortillas',
 '12-inch flour tortillas',
 '6-inch flour tortillas']

In [889]:
pasta = [i for i in ing_list if ('pasta' in i or 'rigati' in i or 'campanelle' in i or 'cappelletti' in i or 'cavatappi' in i
        or 'rigatoni' in i or 'ditali' in i or 'penne' in i or 'farfalle' in i or 'fettuccine' in i or 'gemelli' in i
        or 'lasagna' in i or 'pappardelle' in i or 'tagliatelle' in i or 'pizza' in i or 'orzo' in i or 'macaroni' in i
        or 'tortellini' in i or 'spaghetti' in i or "mac n' cheese" in i or 'gnocchi' in i or 'dumplings' in i or 'pretzels' in i
        or 'cookie' in i) 
        and 'acini di pepe' not in i and 'angel hair' not in i and 'rice' not in i and 'bucatini' not in i and 'tubetti' not in i
        and 'impastata ricotta' not in i and 'gluten-free' not in i and 'quinoa' not in i]

In [890]:
pasta[:5]

['12-inch pizza crust',
 '15 inch pizza crusts',
 'alphabet pasta',
 'alphabet pasta and vegetable soup',
 'amaretti cookie']

In [891]:
gluten = grain + pasta
gluten = list(set(gluten))
gluten.sort()

In [892]:
nuts = [i for i in ing_list if ('nut' in i or 'almond' in i or 'pecan' in i or 'pistachio' in i or 'macadamia' in i
        or 'cereal' in i or 'cashew' in i or 'hickory' in i or 'filbert' in i)
       and 'minute' not in i and 'coconut' not in i and 'nutmeg' not in i and 'butternut' not in i
       ]

In [893]:
nuts[:5]

['all-bran cereal',
 'almond breeze non-dairy beverage',
 'almond butter',
 'almond extract',
 'almond flour']

In [894]:
# Vegetarian, Vegan

In [895]:
meat = [i for i in ing_list if ('meat' in i or 'pork' in i or 'lamb' in i or 'chicken' in i or 'beef' in i or 'duck' in i
        or 'buffalo' in i or 'poultry' in i or 'cod' in i or 'fish' in i or 'sushi' in i or 'tuna' in i or 'bass' in i
        or 'fillet' in i or 'ribs' in i or 'trout' in i or 'anchovy' in i or 'barramundi' in i or 'steak' in i or 'basa' in i
        or 'tenderloin' in i or 'bison' in i or 'angus' in i or 'salmon' in i or 'snapper' in i or 'liver' in i or 'pollo' in i
        or 'plaice' in i or 'pickerel' in i or 'ostrich' in i or 'roughy' in i or 'opah' in i or 'perch' in i or 'pike' in i
        or 'ahi' in i or 'marlin' in i or 'crab' in i or 'mackerel' in i or 'loin' in i or 'lobster' in i or 'leg' in i
        or 'sausage' in i or 'frankfurt' in i or 'lean' in i or 'shark' in i or 'whale' in i or 'doplhin' in i or 'clam' in i
        or 'kidney' in i or 'heart' in i or 'brain' in i or 'tongue' in i or 'arm roast' in i or 'bear' in i or 'rump' in i
        or 'round roast' in i or 'broil' in i or 'caribou' in i or 'deer' in i or 'blade' in i
        or 'chuck' in i or 'elk' in i or 'ham' in i or 'turkey' in i or 'mortadella' in i or 'kangaroo' in i or 'kobe' in i
        or 'mahi' in i or 'alligator' in i or 'crocodile' in i or 'llama' in i or 'bangus' in i or 'bacon' in i
        or 'antelope' in i)
       and 'veggie' not in i and 'vegetarian' not in i and 'vegan' not in i and 'meatless' not in i and 'artichoke heart' not in i 
       and 'bechamel' not in i and 'champagne' not in i and 'graham' not in i and 'chamomile' not in i and 'mace blades' not in i]

In [896]:
meat[:5]

['70% lean ground beef',
 '80% lean ground beef',
 '85% lean ground beef',
 '90% lean ground beef',
 '92% lean ground beef']

In [897]:
egg = [i for i in ing_list if 'egg' in i
            and 'eggplant' not in i and 'veggie' not in i and 'egg-free' not in i]

In [898]:
non_vegan = meat + dairy + egg

In [899]:
non_vegan = list(set(non_vegan))
non_vegan.sort()

Create a restrictions column to easily filter recipes afterwards

In [900]:
#df.drop(columns='restrictions', inplace=True) 
df['restrictions'] = np.empty((len(df), 0)).tolist()

In [901]:
for index, row in df.iterrows():
    for i in row['ingredients']:
        if i in dairy:
            df.restrictions[index].append('dairy')
            break
for index, row in df.iterrows():
    for i in row['ingredients']:
        if i in gluten:
            df.restrictions[index].append('gluten')
            break
for index, row in df.iterrows():
    for i in row['ingredients']:
        if i in nuts:
            df.restrictions[index].append('nuts')
            break
for index, row in df.iterrows():
    for i in row['ingredients']:
        if i in meat:
            df.restrictions[index].append('non-veggie')
            break
for index, row in df.iterrows():
    for i in row['ingredients']:
        if i in non_vegan:
            df.restrictions[index].append('non-vegan')
            break

In [903]:
df["dairy"] = df.restrictions.apply(lambda x: "dairy" in x)
df["gluten"] = df.restrictions.apply(lambda x: "gluten" in x)
df["nuts"] = df.restrictions.apply(lambda x: "nuts" in x)
df["non-veggie"] = df.restrictions.apply(lambda x: "non-veggie" in x)
df["non-vegan"] = df.restrictions.apply(lambda x: "non-vegan" in x)

In [905]:
df.sample()

Unnamed: 0,name,id,minutes,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,<30min,url,restrictions,dairy,gluten,nuts,non-veggie,non-vegan
34344,jackie kennedy onasis roast rack of lamb,158518,40,"[celebrity, 60-minutes-or-less, time-to-make, ...","[95.0, 8.0, 3.0, 4.0, 3.0, 3.0, 3.0]",8,"[preheat the oven to 450 degrees f, in a small...","from: cooking for madam, by marta sgubin.","[dry breadcrumbs, parsley, garlic clove, fresh...",8,False,https://www.food.com/recipe/jackie-kennedy-ona...,"[gluten, non-veggie, non-vegan]",False,True,False,True,True


In [None]:
# calories (#)
# total fat (PDV)
# sugar (PDV)
# sodium (PDV) , 
# protein (PDV) , 
# saturated fat (PDV)
# carbohydrates (PDV)
# PDV stands for Percent Daily Value.
# The percent Daily Value (%DV) shows how much a nutrient in a serving of food contributes to a total daily diet.

### Export the df to SQL after selecting only main dishes

In [907]:
df.to_csv('recipes.csv')

dotenv.load_dotenv()
password = os.getenv("sql_password") #pulls password to access SQL
dbName = "recipes" # finds the database that will create the conection
connectionData = f"mysql+pymysql://root:{password}@localhost/{dbName}" # establishes the conection
engine = alch.create_engine(connectionData) # creates the engine to run the config and conect

df.to_sql('recipes', con=engine, if_exists='append') # export to SQL.

In [908]:
def restrict(df, *args):
    '''
    This function applies your food restrictions and returns a filtered df,
    based on up to the 5 restrictions that are created: dairy, gluten, nuts, vegan, vegetarian.
    '''
    
    if len(args)==0:
        return df
    if len(args)==1:
        return df[ df[ args[0] ] == False]
    elif len(args)==2:
        return df[(df[ args[0] ] == False) & (df[ args[1] ] == False)]
    elif len(args)==3:
        return df[(df[ args[0] ] == False) & (df[ args[1] ] == False) & (df[ args[2] ] == False)]
    elif len(args)==4:
        return df[(df[ args[0] ] == False) & (df[ args[1] ] == False) & (df[ args[2] ] == False) & (df[ args[3] ] == False)]
    elif len(args)==5:
        return df[(df[ args[0] ] == False) & (df[ args[1] ] == False) & (df[ args[2] ] == False) & (df[ args[3] ] == False) & (df[ args[4] ] == False)]

In [843]:
def realfooder(*args):
    '''
    This function selects your favourite ingredients and returns a filtered df by those ingredients only
    '''
    new_list=[i for ing in args for i in ing_list if ing in i]
    new_list = set(new_list)
    
    ok=[]
    for index, row in df.iterrows():
        for i in row['ingredients']:
            if i in new_list:
                ok.append(row.id)
                break
    return df[df.id.isin(ok)]

In [842]:
realfooder('chicken', 'pork').sample()

Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,<30min,restrictions,dairy,gluten,nut,meat,url
8037,braised to be praised paprika chicken,438035,85,"[time-to-make, course, main-ingredient, prepar...",16,[use a paper towel to pat the chicken pieces d...,very delicious.,"[chicken, salt, ground pepper, extra virgin ol...",15,False,"[dairy, gluten, meat]",True,True,False,True,https://www.food.com/recipe/braised-to-be-prai...


In [853]:
def picky_eater(*args):
    '''
    This function excludes your loathed ingredients and returns a filtered df excluding those ingredients
    '''
    new_list=[i for ing in args for i in ing_list if ing in i]
    new_list = set(new_list)
    
    yuck=[]
    for index, row in df.iterrows():
        for i in row['ingredients']:
            if i in new_list:
                yuck.append(row.id)
                break
    return df[~df.id.isin(yuck)]

In [855]:
picky_eater('meat', 'chicken', 'pork', 'beef', 'cheese').sample()

Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,<30min,restrictions,dairy,gluten,nut,meat,url
56755,shrimp in garlic spices,20701,19,"[30-minutes-or-less, time-to-make, course, mai...",9,"[cut each shrimp in half lengthwise, combine t...",,"[large shrimp, dry white wine, olive oil, garl...",10,True,[dairy],True,False,False,False,https://www.food.com/recipe/shrimp-in-garlic-s...


**Watch out for k>n**

In [906]:
def n_of_recipes (n, k):
    '''
    This function recieves a total number of desired recipes and returns a random sample of indexes.
    These indexes will be used to determine the lucky df recipe indexes that are picked.
    '''

    if type(n)==int and type(k)==int and n>0 and k>0:
        if k<n:
            return rd.sample(range(n), k=10)
        else:
            print(f'There are only {n} recipes in this database')
            return range(n)
    else:
        return f'Nice try, now please provide a valid number between 1 and {n}'

**WWVA Test**

In [940]:
df2 = restrict(df, 'gluten', 'dairy').reset_index(drop=True)

In [942]:
len(df2)

25112

In [943]:
pick = n_of_recipes(len(df2), 10)
pick = [n-1 for n in pick]
pick

[22166, 11340, 9695, 2754, 4930, 16333, 4976, 22849, 5332, 19005]

In [945]:
df_chosen = df2.filter(items = pick, axis=0)

In [947]:
df_chosen.head()

Unnamed: 0,name,id,minutes,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,<30min,url,restrictions,dairy,gluten,nuts,non-veggie,non-vegan
22166,sweet and sour chicken thighs,358061,50,"[60-minutes-or-less, time-to-make, course, mai...","[292.7, 10.0, 82.0, 6.0, 67.0, 8.0, 7.0]",7,[preheat oven to 350f spray pan with nonstick ...,delicious chicken thighs with a kick! prepare ...,"[boneless skinless chicken thighs, low calorie...",10,False,https://www.food.com/recipe/sweet-and-sour-chi...,"[non-veggie, non-vegan]",False,False,False,True,True
11340,hot wings in the oven,266089,60,"[60-minutes-or-less, time-to-make, course, pre...","[400.3, 36.0, 2.0, 59.0, 85.0, 30.0, 0.0]",8,"[preheat oven at 350 degrees, sprinkle drumsti...",low fat version to buffalo wings. i like to us...,"[frank's red hot sauce, margarine, seasoning s...",7,False,https://www.food.com/recipe/hot-wings-in-the-o...,"[non-veggie, non-vegan]",False,False,False,True,True
9695,grilled alaskan salmon fillets simple sweet...,358576,20,"[30-minutes-or-less, time-to-make, course, mai...","[412.4, 28.0, 125.0, 20.0, 47.0, 15.0, 11.0]",17,[wash the salmon filets and pat dry with paper...,this taste sensation gets its distinctive flav...,"[salmon fillets, kosher salt, fresh ground bla...",19,True,https://www.food.com/recipe/grilled-alaskan-sa...,"[nuts, non-veggie, non-vegan]",False,False,True,True,True
2754,bloody mary short ribs,335959,230,"[time-to-make, course, main-ingredient, prepar...","[676.0, 88.0, 22.0, 13.0, 47.0, 121.0, 4.0]",13,[preheat oven to 350f season short ribs with s...,adapted from a recipe by rcheiss at allrecipes...,"[beef short ribs, kosher salt, ground black pe...",13,False,https://www.food.com/recipe/bloody-mary-short-...,"[non-veggie, non-vegan]",False,False,False,True,True
4930,chile shrimp,78584,20,"[30-minutes-or-less, time-to-make, course, mai...","[265.7, 19.0, 17.0, 8.0, 49.0, 9.0, 3.0]",8,"[heat the oil in a pan , then add the shallots...",mmmmmm. spicy.,"[olive oil, shallots, garlic, red chili pepper...",11,True,https://www.food.com/recipe/chile-shrimp-78584,[],False,False,False,False,False


In [948]:
shop_list=[]
for row in df_chosen.ingredients:
    shop_list+=row
shop_list = list(set(shop_list))
shop_list.sort()

In [949]:
shop_list[:5]

['apple juice',
 'apples',
 'balsamic vinegar',
 'bay leaf',
 'beef bouillon cubes']