In [1]:
import requests
import json
import pandas as pd
import numpy as np
from datetime import datetime
import os
import glob
import time

First, you need to create a json file in the same directory as this notebook with the follow structure. 

```
{
    "APIKey":"put your Spoonacular API key here", 
}
```

In [4]:
with open("apikey.json") as f:
    content = f.read()
    API_KEY = json.loads(content)['gmail']

## Get recipe data from Spoonacular API

In [5]:
# Helper function
# export as csv
def export_df_as_csv(data):
    now = datetime.now()
    timestamp=f"{now.date()}-{now.hour}:{now.minute}:{now.second}"
    data.to_csv(f"data/recipedata-{timestamp}.csv")
    
def get_recipes(n, request_fn):
    '''
    This function calls request_fn n times to get n*100 recipes in total
    and create a DataFrame with the data
    
    return: pd.Dataframe
    '''
    recipes = []
    for _ in range(n):
        recipes_100, is_success = request_fn()
        if not is_success: 
            break
        recipes+=recipes_100
    df = pd.DataFrame(recipes)
    return df

### Random recipes

In [62]:
# number of recipes

n_hundred = 10 # 10 hundred = 1000 random recipes

In [63]:
def get_100_random_recipes():
    '''
    This function requests 100 random recipes from Spoonacular 
    100 is the highest number of recipes one could get from a single request
    
    return: 
        - List( Dict() ) - a list of recipes (full information)
        - True if status code is 200, False otherwise
    '''
    res = requests.get(f"https://api.spoonacular.com/recipes/random?apiKey={API_KEY}&number=100")
    
    if res.status_code == 200:
        recipes = json.loads(res.content)["recipes"]
        return recipes, True
    else:
        print(res.status_code)
        print(res.content)
        return [], False

In [64]:
# get data from spoonacular
recipe_data = get_recipes(n_hundred, get_100_random_recipes)

In [65]:
recipe_data.shape

(1000, 38)

In [66]:
export_df_as_csv(recipe_data)

### Get randome recipes based on complex search

One problem with the above approach is that it returns a very similar set of 1000 recipes for request. Therefore, less than 1000 recipes are collected even after multiple requests for 1000 recipes. 

To overcome this, we are going to use the complex search method, with the `sort` option set to `random`. 

In [71]:
def random_complex_search_100():
    '''
    This function gets 100 random recipes from Spoonacular with the complex search method
    
    returns: List( Dict() ) - a list of recipes (limited information)
    '''
    res = requests.get(f"https://api.spoonacular.com/recipes/complexSearch?apiKey={API_KEY}&number=100&sort=random")
    
    time.sleep(1)
    
    if res.status_code == 200:
        recipes = json.loads(res.content)["results"]
        return recipes, True
    else:
        print(res.status_code)
        print(res.content)
        return [], False

In [82]:
recipe_data1 = get_recipes(1, random_complex_search_100)

However, instead of getting all data related to the recipes, we only get `id`, `title`, `image` and `imageType`. To overcome this, we will get the information for each recipe by sending another set of requests. 

In [83]:
def get_recipe_info(recipe_ids):
    '''
    params:
        - recipes_ids: List(int) - list of recipe ids
    returns: 
        - pd.DataFrame
    '''
    recipes = []
    for recipe_id in recipe_ids:
        res = requests.get(f"https://api.spoonacular.com/recipes/{recipe_id}/information?apiKey={API_KEY}&includeNutrition=true")
        time.sleep(1)
        if res.status_code == 200:
            recipe = json.loads(res.content)
            recipes.append(recipe)
        else:
            print(res.status_code)
            print(res.content)
            break
    return pd.DataFrame(recipes)

In [84]:
recipe_ids = recipe_data1["id"].to_list()
recipe_info = get_recipe_info(recipe_ids)

402
b'{"status":"failure", "code":402,"message":"Your daily points limit of 150 has been reached. Please upgrade your plan to continue using the API."}'


In [85]:
recipe_info.head()

Unnamed: 0,vegetarian,vegan,glutenFree,dairyFree,veryHealthy,cheap,veryPopular,sustainable,weightWatcherSmartPoints,gaps,...,nutrition,summary,cuisines,dishTypes,diets,occasions,winePairing,instructions,analyzedInstructions,originalId
0,False,False,True,False,False,False,False,False,11,no,...,"{'nutrients': [{'name': 'Calories', 'amount': ...",Twice Baked Potatoes is a <b>gluten free</b> s...,[],"[antipasti, starter, snack, appetizer, antipas...",[gluten free],[],{},InstructionsNote: click on times in the instru...,"[{'name': '', 'steps': [{'number': 1, 'step': ...",
1,False,False,True,False,False,False,False,False,5,no,...,"{'nutrients': [{'name': 'Calories', 'amount': ...",If <b>$1.73 per serving</b> falls in your budg...,[],[side dish],"[gluten free, pescatarian]",[],"{'pairedWines': ['chardonnay', 'pinot noir', '...","Boil 2 cups water, wine, and 1 tsp. salt in a ...","[{'name': '', 'steps': [{'number': 1, 'step': ...",
2,False,False,True,False,False,False,False,False,7,no,...,"{'nutrients': [{'name': 'Calories', 'amount': ...","One serving contains <b>143 calories</b>, <b>2...",[],"[antipasti, starter, snack, appetizer, antipas...",[gluten free],[christmas],{},"Line a 8-inch baking pan with parchment paper,...","[{'name': '', 'steps': [{'number': 1, 'step': ...",
3,False,False,True,True,True,False,False,False,13,no,...,"{'nutrients': [{'name': 'Calories', 'amount': ...",Veggie Paella W/ Saffron & Orange might be jus...,"[Spanish, European]",[side dish],"[gluten free, dairy free]",[],"{'pairedWines': ['tempranillo', 'albarino', 'g...",,[],
4,False,False,True,True,False,False,False,False,4,no,...,"{'nutrients': [{'name': 'Calories', 'amount': ...",This recipe makes 15 servings with <b>116 calo...,[],"[antipasti, starter, snack, appetizer, antipas...","[gluten free, dairy free, fodmap friendly]",[],{},"In a medium bowl, mix the dry ingredients toge...","[{'name': '', 'steps': [{'number': 1, 'step': ...",


In [86]:
print(f"{recipe_info.shape[0]} recipes downloaded")

33 recipes downloaded


In [87]:
export_df_as_csv(recipe_info)

## Grouping all csv files together

In [88]:
# This is just for us to know many recipes we originaly have 
grouped = pd.read_csv("./data/recipedata-grouped.csv")

In [89]:
# Group all csv files into one DataFrame

recipe_csv_files = glob.glob('./data/recipedata*.csv')
df = pd.concat(map(lambda path: pd.read_csv(path, index_col=0), recipe_csv_files))
df.head()

Unnamed: 0,vegetarian,vegan,glutenFree,dairyFree,veryHealthy,cheap,veryPopular,sustainable,weightWatcherSmartPoints,gaps,...,diets,occasions,winePairing,instructions,analyzedInstructions,originalId,spoonacularSourceUrl,preparationMinutes,cookingMinutes,author
0,True,False,True,False,False,False,False,False,15,no,...,"['gluten free', 'lacto ovo vegetarian', 'primal']",[],"{'pairedWines': [], 'pairingText': '', 'produc...",For garlic broth:\nAdd all ingredients to the ...,"[{'name': 'For garlic broth', 'steps': [{'numb...",,https://spoonacular.com/roasted-butternut-squa...,,,
1,False,False,False,False,False,False,False,False,6,no,...,[],[],"{'pairedWines': [], 'pairingText': '', 'produc...","<ol><li>If you're using ground turkey, start o...","[{'name': '', 'steps': [{'number': 1, 'step': ...",,https://spoonacular.com/turkey-goulash-by-momm...,,,
2,False,False,False,False,False,False,False,False,13,no,...,[],[],{},wash and rinse pork chops and place into the s...,"[{'name': '', 'steps': [{'number': 1, 'step': ...",,https://spoonacular.com/what-to-make-for-dinne...,5.0,30.0,
3,True,False,True,False,False,False,False,False,4,no,...,"['gluten free', 'lacto ovo vegetarian']",['super bowl'],{},Layer your bowl with refried beans.Add a layer...,"[{'name': '', 'steps': [{'number': 1, 'step': ...",,https://spoonacular.com/not-your-normal-seven-...,15.0,0.0,
4,False,False,True,False,False,False,False,False,24,no,...,"['gluten free', 'primal', 'pescatarian']",['easter'],"{'pairedWines': ['chardonnay', 'pinot noir', '...","<ol><li>In a frying pan, melt the butter over ...","[{'name': '', 'steps': [{'number': 1, 'step': ...",,https://spoonacular.com/pan-seared-salmon-with...,,,


In [90]:
print(f"Before dropping duplicates: {df.shape}")
df = df.drop_duplicates()
print(f"After dropping duplicates: {df.shape}")
print(f"{df.shape[0] - grouped.shape[0]} new recipes added.")

Before dropping duplicates: (3859, 40)
After dropping duplicates: (3859, 40)
33 new recipes added.


In [91]:
df.to_csv(f"data/recipedata-grouped.csv")