## Final Project

#### Recipe generator

In [158]:

import pandas as pd            # Read the csv as df
import ast                     # Change lists that are strings to actual lists

import dotenv                  # Load the data into SQL
import os                      # Load the data into SQL
import sqlalchemy as alch      # Load the data into SQL

import random as rd            # Pick the recipes

In [144]:
raw_recipes = pd.read_csv('./recipes/RAW_recipes.csv')

In [145]:
len(raw_recipes)

231637

In [146]:
raw_recipes.sample()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
55934,coconut rice with beef stir fry,316610,25,283251,2008-07-30,"['30-minutes-or-less', 'time-to-make', 'course...","[404.9, 17.0, 16.0, 16.0, 57.0, 20.0, 15.0]",16,"['to prepare rice , bring first 4 ingredients ...",cooking light,"['water', 'salt', 'ground red pepper', 'light ...",14


In [147]:
raw_recipes.tags = raw_recipes.tags.map(lambda x: ast.literal_eval(x))
raw_recipes.steps = raw_recipes.steps.map(lambda x: ast.literal_eval(x))
raw_recipes.ingredients = raw_recipes.ingredients.map(lambda x: ast.literal_eval(x))

In [148]:
# list_tags = list(filter(lambda x: 'main-dish' in x, raw_recipes.tags))
# len(list_tags)

In [149]:
raw_recipes["new"] = raw_recipes.tags.apply(lambda x: True if "main-dish" in x else False)

In [155]:
df = raw_recipes[raw_recipes.new].drop(columns='new').reset_index().drop(columns='index')

In [165]:
df.sample()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
59795,spaghetti with bacon and peas,17345,20,27643,2002-01-17,"[30-minutes-or-less, time-to-make, course, mai...","[530.6, 22.0, 43.0, 29.0, 36.0, 22.0, 27.0]",8,"[in med, skillet cook bacon until crisp, drain...",my kids called this the,"[bacon, garlic, onion, diced tomatoes, frozen ...",6


In [70]:
df.shape[0]

71786

In [157]:
df.isna().sum()

name                 0
id                   0
minutes              0
contributor_id       0
submitted            0
tags                 0
nutrition            0
n_steps              0
steps                0
description       1546
ingredients          0
n_ingredients        0
dtype: int64

### Export the df to SQL after selecting only main dishes

In [168]:
df.to_csv('recipes.csv')


dotenv.load_dotenv()
password = os.getenv("sql_password") #pulls password to access SQL
dbName = "recipes" # finds the database that will create the conection
connectionData = f"mysql+pymysql://root:{password}@localhost/{dbName}" # establishes the conection
engine = alch.create_engine(connectionData) # creates the engine to run the config and conect

df.to_sql('recipes', con=engine, if_exists='append') # export to SQL.

In [167]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71786 entries, 0 to 71785
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   name            71786 non-null  object
 1   id              71786 non-null  int64 
 2   minutes         71786 non-null  int64 
 3   contributor_id  71786 non-null  int64 
 4   submitted       71786 non-null  object
 5   tags            71786 non-null  object
 6   nutrition       71786 non-null  object
 7   n_steps         71786 non-null  int64 
 8   steps           71786 non-null  object
 9   description     70240 non-null  object
 10  ingredients     71786 non-null  object
 11  n_ingredients   71786 non-null  int64 
dtypes: int64(5), object(7)
memory usage: 6.6+ MB


In [74]:
ing_list=[]
for row in df.ingredients:
    ing_list+=row
ing_list = list(set(ing_list))
ing_list.sort()

In [75]:
len(ing_list)

9484

In [76]:
ing_list[:5]

['1% fat buttermilk',
 '1% fat cottage cheese',
 '1% low-fat milk',
 '10 inch low-fat flour tortillas',
 '10% cream']

In [174]:
milk_d = [i for i in ing_list if 'milk' in i and 'almond' not in i and 'coconut' not in i and 'non-dairy' not in i and 'oat' not in i and 'rice' not in i and 'soy' not in i]

In [175]:
cheese_d = [i for i in ing_list if 'cheese' in i and 'non-dairy' not in i and 'soy' not in i and 'vegan' not in i and 'vegetarian' not in i]

In [176]:
milk_d[:5]

['1% fat buttermilk',
 '1% low-fat milk',
 '2% evaporated milk',
 '2% low-fat milk',
 '2% milk']

In [177]:
cheese_d[:5]

['1% fat cottage cheese',
 '2% cheddar cheese',
 '2% fat cottage cheese',
 '2% mexican cheese blend',
 '2% mozzarella cheese']

In [187]:
cream_d = [i for i in ing_list if 'cream' in i and 'coconut' not in i and 'non-dairy' not in i and 'non-creamy' not in i and 'soy' not in i and 'vegan' not in i] 

In [189]:
cream_d[:5]

['10% cream',
 '15% cream',
 '18% table cream',
 '35% cream',
 '98% fat free condensed cream of celery soup']

In [201]:
butter_d = [i for i in ing_list if 'butter' in i and 'almond' not in i and 'apple' not in i and 'butterfly' not in i and 'butter substitute' not in i and 'butterball' not in i 
           and 'buttercup' not in i and 'butterfish' not in i and 'butterflied' not in i and 'butternut' not in i and "i can't believe it's not butter" not in i
            and 'butter beans' not in i and 'lettuce' not in i and 'peas' not in i and 'coconut' not in i and 'vegan' not in i]

In [203]:
butter_d[:5]

['1% fat buttermilk',
 'bread and butter pickles',
 'butter',
 'butter buds',
 'butter chicken paste']

In [214]:
yogurt_d = [i for i in ing_list if ('yogurt' in i or 'yoghurt' in i) and 'soy' not in i]

In [222]:
yogurt_d[:5]

['bulgarian yogurt',
 'fat free greek yogurt',
 'fat free pina colada yogurt',
 'fat-free key lime yogurt',
 'fat-free lemon yogurt']

In [102]:
# Lactose-free, Gluten-free, Vegetarian, Vegan
# 1. Milk
# 2. Cheese
# 3. Cream
# 4. Butter

'''
Fermented.
Yogurt.
Custard.
Frozen.
'''

'\nCream.\nButter.\nFermented.\nYogurt.\nCustard.\nFrozen.\n'

**Watch out for k>n**

In [125]:
def length (n, k):
    if type(n)==int and type(k)==int and n>0 and k>0:
        if k<n:
            return rd.sample(range(n), k=10)
        else:
            print(f'There are only {n} recipes in this database')
            return range(n)
    else:
        return f'Nice try, now please provide a valid number between 1 and {n}'

In [126]:
# pick = length(len(df2), 10)

**Test for intolerances**

In [115]:
'''
df2=df
for index, row in df.iterrows():
    for i in row['ingredients']:
        if i in milk:
            df2.drop(index=index, inplace=True)
            break
'''
# df2.shape[0]

In [136]:
df2=df.reset_index()
pick = length(len(df2), 10)
pick = [n-1 for n in pick]
df_chosen = df.filter(items = pick, axis=0)

In [137]:
pick

[9430, 32725, 55301, 29219, 19906, 19924, 20587, 63241, 54251, 62484]

In [172]:
df_chosen.sample()

Unnamed: 0,index,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
54251,179416,salsa chicken burrito filling,155336,30,148626,2006-02-09,"[30-minutes-or-less, time-to-make, course, mai...","[82.0, 2.0, 7.0, 13.0, 25.0, 1.0, 1.0]",6,[1 place chicken breasts and tomato sauce in a...,""" quick, easy burrito/taco filling that is del...","[boneless skinless chicken breast halves, toma...",8


In [140]:
shop_list=[]
for row in df_chosen.ingredients:
    shop_list+=row
shop_list = list(set(shop_list))
shop_list.sort()

In [173]:
shop_list[:5]

['balsamic vinegar',
 'beef tenderloin steaks',
 'black beans',
 'black pepper',
 'blackening seasoning']