In [1]:
from dotenv import load_dotenv
import requests
import os

In [None]:
load_dotenv()

api_url = 'https://api.calorieninjas.com/v1/nutrition?query='
query = 'chicken sandwich'
response = requests.get(api_url + query, headers={'X-Api-Key': os.getenv("CALORIE_NINJAS_API_KEY")})
if response.status_code == requests.codes.ok:
    print(response.text)
else:
    print("Error:", response.status_code, response.text)

In [None]:
response

In [None]:
response.json()["items"][0]

In [2]:
import polars as pl
import ast

def categorize_meal_course(name, ingredients):
    """Assigns meal course tags based on dish name and ingredients."""
    name_lower = name.lower()
    ingredients_lower = {ing.lower() for ing in ingredients}
    meal_tags = set()

    # Breakfast-related keywords
    breakfast_keywords = {"pancake", "waffle", "oatmeal", "cereal", "omelet", "toast", "bagel", "smoothie", "frittata", "scramble", "bacon", "hashbrown", "granola", "crepe"}

    # Desserts & Snacks
    dessert_snack_keywords = {"cookie", "cake", "pie", "pudding", "muffin", "brownie", "candy", "donut", "cupcake", "sorbet", "ice cream", "cheesecake", "truffle", "chocolate"}

    # Appetizers & Side dishes
    appetizer_side_keywords = {"salad", "soup", "dip", "appetizer", "side", "fries", "bruschetta", "tapas", "spring roll", "dumpling", "chips", "guacamole", "hummus", "coleslaw", "pickle"}

    # Main courses
    main_course_keywords = {"chicken", "beef", "pasta", "rice", "stew", "curry", "fish", "pork", "lamb", "noodles", "steak", "fajita", "meatball", "biryani", "kebab", "taco", "lasagna"}

    # Sandwiches & Burgers
    sandwich_burger_keywords = {"burger", "sandwich", "wrap", "panini", "shawarma", "sub", "grilled cheese"}

    # Pizza & Flatbreads
    pizza_keywords = {"pizza", "flatbread", "calzone", "pide"}

    # Ingredient-based Classification
    high_protein = {"chicken", "beef", "pork", "lamb", "fish", "shrimp", "turkey"}
    carb_based = {"pasta", "rice", "bread", "potato", "noodle", "tortilla"}

    # Assign categories based on name
    if any(word in name_lower for word in breakfast_keywords):
        meal_tags.add("breakfast")
    if any(word in name_lower for word in dessert_snack_keywords):
        meal_tags.update(["dessert", "snack"])
    if any(word in name_lower for word in appetizer_side_keywords):
        meal_tags.update(["appetizer", "side"])
    if any(word in name_lower for word in main_course_keywords):
        meal_tags.update(["lunch", "dinner"])
    if any(word in name_lower for word in sandwich_burger_keywords):
        meal_tags.add("sandwich/burger")
    if any(word in name_lower for word in pizza_keywords):
        meal_tags.add("pizza")

    # Ingredient-based assignments
    if not meal_tags:
        if ingredients_lower.intersection(high_protein):
            meal_tags.update(["lunch", "dinner"])
        if ingredients_lower.intersection(carb_based):
            meal_tags.update(["side", "main"])
        if "egg" in ingredients_lower or "bacon" in ingredients_lower:
            meal_tags.add("breakfast")

    # Fallback Category (More specific than "general")
    if not meal_tags:
        meal_tags.add("miscellaneous")  # Less ambiguous than "general"

    return list(meal_tags)

In [3]:
def categorize_dietary_tags(ingredients):
    """Assigns dietary restriction tags based on ingredients."""
    ingredients_lower = {ing.lower() for ing in ingredients}
    tags = set()

    non_halal = {"pork", "bacon", "ham", "lard", "gelatin"}
    non_kosher = {"pork", "shellfish", "gelatin", "rabbit"}
    vegetarian = {"chicken", "beef", "pork", "fish", "lamb", "duck", "shrimp", "bacon", "ham"}
    vegan = vegetarian.union({"milk", "cheese", "butter", "honey", "eggs", "yogurt", "cream"})

    if not ingredients_lower.intersection(non_halal):
        tags.add("Halal")
    else:
        tags.add("Non-Halal")

    if not ingredients_lower.intersection(non_kosher):
        tags.add("Kosher")
    else:
        tags.add("Non-Kosher")

    if not ingredients_lower.intersection(vegetarian):
        tags.add("Vegetarian")

    if not ingredients_lower.intersection(vegan):
        tags.add("Vegan")

    return list(tags)

In [21]:
# Load CSV file
df = pl.read_csv("data/recipes.csv")
df.head()

Unnamed: 0_level_0,name,ingredients,instructions,link,source,NER
i64,str,str,str,str,str,str
0,"""No-Bake Nut Cookies""","""[""1 c. firmly packed brown sug…","""[""In a heavy 2-quart saucepan,…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""brown sugar"", ""milk"", ""vanil…"
1,"""Jewell Ball'S Chicken""","""[""1 small jar chipped beef, cu…","""[""Place chipped beef on bottom…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""beef"", ""chicken breasts"", ""c…"
2,"""Creamy Corn""","""[""2 (16 oz.) pkg. frozen corn""…","""[""In a slow cooker, combine al…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""frozen corn"", ""cream cheese""…"
3,"""Chicken Funny""","""[""1 large whole chicken"", ""2 (…","""[""Boil and debone chicken."", ""…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""chicken"", ""chicken gravy"", ""…"
4,"""Reeses Cups(Candy) ""","""[""1 c. peanut butter"", ""3/4 c.…","""[""Combine first four ingredien…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""[""peanut butter"", ""graham crac…"


In [22]:
df.dtypes

[Int64, String, String, String, String, String, String]

In [23]:
import json

def safe_literal_eval(val):
    try:
        return json.loads(val) if isinstance(val, str) else val
    except (json.JSONDecodeError, ValueError, TypeError):
        return []

# Apply categorization functions efficiently
df = df.with_columns(
    pl.col("ingredients").map_elements(
        safe_literal_eval,
        return_dtype=pl.List(pl.Utf8)).alias("ingredients"),
    pl.col("instructions").map_elements(
        safe_literal_eval,
        return_dtype=pl.List(pl.Utf8)).alias("instructions"),
    pl.col("NER").map_elements(
        safe_literal_eval,
        return_dtype=pl.List(pl.Utf8)).alias("NER")
)

df.head()

Unnamed: 0_level_0,name,ingredients,instructions,link,source,NER
i64,str,list[str],list[str],str,str,list[str]
0,"""No-Bake Nut Cookies""","[""1 c. firmly packed brown sugar"", ""1/2 c. evaporated milk"", … ""3 1/2 c. bite size shredded rice biscuits""]","[""In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine."", ""Stir over medium heat until mixture bubbles all over top."", … ""Let stand until firm, about 30 minutes.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""brown sugar"", ""milk"", … ""bite size shredded rice biscuits""]"
1,"""Jewell Ball'S Chicken""","[""1 small jar chipped beef, cut up"", ""4 boned chicken breasts"", … ""1 carton sour cream""]","[""Place chipped beef on bottom of baking dish."", ""Place chicken on top of beef."", ""Mix soup and cream together; pour over chicken. Bake, uncovered, at 275° for 3 hours.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""beef"", ""chicken breasts"", … ""sour cream""]"
2,"""Creamy Corn""","[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg. cream cheese, cubed"", … ""1/4 tsp. pepper""]","[""In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""frozen corn"", ""cream cheese"", … ""pepper""]"
3,"""Chicken Funny""","[""1 large whole chicken"", ""2 (10 1/2 oz.) cans chicken gravy"", … ""4 oz. shredded cheese""]","[""Boil and debone chicken."", ""Put bite size pieces in average size square casserole dish."", … ""Sprinkle shredded cheese on top and bake at 350° for approximately 20 minutes or until golden and bubbly.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""chicken"", ""chicken gravy"", … ""shredded cheese""]"
4,"""Reeses Cups(Candy) ""","[""1 c. peanut butter"", ""3/4 c. graham cracker crumbs"", … ""1 large pkg. chocolate chips""]","[""Combine first four ingredients and press in 13 x 9-inch ungreased pan."", ""Melt chocolate chips and spread over mixture. Refrigerate for about 20 minutes and cut into pieces before chocolate gets hard."", ""Keep in refrigerator.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""peanut butter"", ""graham cracker crumbs"", … ""chocolate chips""]"


In [29]:
df.tail(20)

Unnamed: 0_level_0,name,ingredients,instructions,link,source,NER,meal_course,dietary_tags
i64,str,list[str],list[str],str,str,list[str],list[str],list[str]
2231122,"""Breakfast Enchiladas""","[""1 ENCHILADA FILLING"", ""2 lb hot ground pork sausage (1 1/2 lbs. for filling, 1/2 lb. used as topping; set aside)"", … ""1/2 pd crumbled cooked sausage, 1 package halved grape tomatoes, 4 sliced green onions, 1 tablespoon chopped fresh cilantro (top enchiladas after baked)""]","[""Preheat oven to 350."", ""Cook sausage in a large nonstick skillet over medium-high heat, stirring until sausage crumbles are cooked through."", … ""When enchiladas are done; sprinkle down the center in this order; 1/2 pd crumbled sausage, halved grape tomatoes, green onions and fresh cilantro.""]","""cookpad.com/us/recipes/349310-…","""Recipes1M""","[""ENCHILADA"", ""hot ground pork sausage"", … ""sausage""]","[""miscellaneous""]","[""Halal"", ""Kosher"", ""Vegetarian""]"
2231123,"""Latin-Spiced Rib Eye with Saut…","[""1 teaspoon ground cumin"", ""1 teaspoon dried oregano"", … ""3/4 cup cilantro leaves""]","[""In a small bowl, combine the cumin, oregano and garlic and onion powders, 1 teaspoon of salt and 1/2 teaspoon of pepper."", ""Season the steaks on both sides with the spice mixture and rub them with the garlic puree."", … ""Transfer to plates, top with the cilantro and serve.""]","""www.foodandwine.com/recipes/la…","""Recipes1M""","[""ground cumin"", ""oregano"", … ""cilantro""]","[""miscellaneous""]","[""Vegan"", ""Halal"", … ""Vegetarian""]"
2231124,"""Chocolate Crumble""","[""6 1/2 tablespoons (94g) unsalted butter, softened"", ""1/3 cup (30g) unsweetened cocoa powder (preferably Valrhona)"", … ""1/4 teaspoon (1g) coarse salt""]","[""Heat the oven to 375F or 350F on convection."", ""Line a baking sheet with a Silpat or parchment."", … ""Store in an airtight container for up to 1 week.""]","""www.epicurious.com/recipes/foo…","""Recipes1M""","[""unsalted butter"", ""cocoa"", … ""salt""]","[""dessert"", ""snack""]","[""Vegan"", ""Halal"", … ""Vegetarian""]"
2231125,"""Cran-Apple White Chocolate Pop…","[""12 ounces white chocolate chips"", ""12 teaspoon ground cinnamon"", … ""23 cup dried cranberries""]","[""Melt chocolate in large bowl set over pot of simmering water, stirring frequently until smooth."", ""Add cinnamon and pumpkin pie spice and whisk to combine."", … ""Store for up to 3 days in covered container.""]","""www.food.com/recipe/cran-apple…","""Recipes1M""","[""chocolate chips"", ""ground cinnamon"", … ""cranberries""]","[""dessert"", ""snack""]","[""Vegan"", ""Halal"", … ""Vegetarian""]"
2231126,"""Cucumber Sandwiches""","[""1 cucumber, lightly peeled"", ""1/2 tsp salt"", … ""Tiny watercress leaves, or microgreens, to garnish""]","[""Prepare ahead"", ""Step 1 can be completed 1 hour in advance, and the cucumber slices stored in the refrigerator."", … ""As an alternative to butter, spread the bread with cream cheese or soft goat cheese.""]","""www.cookstr.com/recipes/cucumb…","""Recipes1M""","[""cucumber"", ""salt"", … ""watercress leaves""]","[""sandwich/burger""]","[""Halal"", ""Kosher"", ""Vegetarian""]"
…,…,…,…,…,…,…,…,…
2231137,"""Sunny's Fake Crepes""","[""1/2 cup chocolate hazelnut spread (recommended: Nutella)"", ""4 (8-inch) round whole-wheat tortillas, room temperature"", … ""1/2 cup finely chopped toasted hazelnuts""]","[""Spread hazelnut spread on 1 side of each tortilla."", ""In a large pan over medium heat add 2 tablespoons of butter and let it melt until bubbly."", … ""Arrange them on a serving platter and serve immediately.""]","""www.foodnetwork.com/recipes/su…","""Recipes1M""","[""chocolate hazelnut spread"", ""tortillas"", … ""hazelnuts""]","[""breakfast""]","[""Halal"", ""Kosher"", ""Vegetarian""]"
2231138,"""Devil Eggs""","[""1 dozen eggs"", ""1 paprika"", … ""3 tbsp relish""]","[""Boil eggs on medium for 30mins."", ""Then cool eggs down in cold water for approximately 5 mins."", … ""finish touch sprinkle paprika on top.""]","""cookpad.com/us/recipes/355411-…","""Recipes1M""","[""eggs"", ""paprika"", … ""relish""]","[""miscellaneous""]","[""Halal"", ""Kosher"", ""Vegetarian""]"
2231139,"""Extremely Easy and Quick - Nam…","[""150 grams Daikon radish"", ""1 tbsp Sesame oil"", … ""1 tsp Soy sauce""]","[""Julienne the daikon and squeeze out the excess moisture."", ""Combine all of the ingredients."", ""Serve on a dish.""]","""cookpad.com/us/recipes/153324-…","""Recipes1M""","[""radish"", ""Sesame oil"", … ""Soy sauce""]","[""side"", ""appetizer""]","[""Vegan"", ""Halal"", … ""Vegetarian""]"
2231140,"""Pan-Roasted Pork Chops With Ap…","[""1 cup apple cider"", ""6 tablespoons sugar"", … ""1 cup neutral oil, like canola""]","[""In a large bowl, mix the apple cider with 4 cups of water, the sugar and the salt."", ""Toast the bay leaves, chilies, juniper berries, caraway seeds, mustard seeds and coriander seeds in a small pan set over medium heat until you can smell them, then add to the brine, and stir to combine."", … ""The usual accompaniment is potato-and-horseradish pierogies sauteed in brown butter, though roasted new potatoes with a topping of butter and freshly grated horseradish will answer almost as well.""]","""cooking.nytimes.com/recipes/10…","""Recipes1M""","[""apple cider"", ""sugar"", … ""neutral oil""]","[""dinner"", ""lunch""]","[""Non-Halal"", ""Non-Kosher""]"


In [25]:
df.dtypes

[Int64, String, List(String), List(String), String, String, List(String)]

In [26]:
# Apply categorization functions efficiently
df = df.with_columns(
    pl.struct(["name", "NER"]).map_elements(
        lambda row: categorize_meal_course(row["name"], row["NER"]),
        return_dtype=pl.List(pl.Utf8)).alias("meal_course"),

    pl.col("NER").map_elements(
        categorize_dietary_tags,
        return_dtype=pl.List(pl.Utf8)).alias("dietary_tags")
)
df.head()

Unnamed: 0_level_0,name,ingredients,instructions,link,source,NER,meal_course,dietary_tags
i64,str,list[str],list[str],str,str,list[str],list[str],list[str]
0,"""No-Bake Nut Cookies""","[""1 c. firmly packed brown sugar"", ""1/2 c. evaporated milk"", … ""3 1/2 c. bite size shredded rice biscuits""]","[""In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine."", ""Stir over medium heat until mixture bubbles all over top."", … ""Let stand until firm, about 30 minutes.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""brown sugar"", ""milk"", … ""bite size shredded rice biscuits""]","[""dessert"", ""snack""]","[""Halal"", ""Kosher"", ""Vegetarian""]"
1,"""Jewell Ball'S Chicken""","[""1 small jar chipped beef, cut up"", ""4 boned chicken breasts"", … ""1 carton sour cream""]","[""Place chipped beef on bottom of baking dish."", ""Place chicken on top of beef."", ""Mix soup and cream together; pour over chicken. Bake, uncovered, at 275° for 3 hours.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""beef"", ""chicken breasts"", … ""sour cream""]","[""dinner"", ""lunch""]","[""Halal"", ""Kosher""]"
2,"""Creamy Corn""","[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg. cream cheese, cubed"", … ""1/4 tsp. pepper""]","[""In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""frozen corn"", ""cream cheese"", … ""pepper""]","[""miscellaneous""]","[""Halal"", ""Kosher"", ""Vegetarian""]"
3,"""Chicken Funny""","[""1 large whole chicken"", ""2 (10 1/2 oz.) cans chicken gravy"", … ""4 oz. shredded cheese""]","[""Boil and debone chicken."", ""Put bite size pieces in average size square casserole dish."", … ""Sprinkle shredded cheese on top and bake at 350° for approximately 20 minutes or until golden and bubbly.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""chicken"", ""chicken gravy"", … ""shredded cheese""]","[""dinner"", ""lunch""]","[""Halal"", ""Kosher""]"
4,"""Reeses Cups(Candy) ""","[""1 c. peanut butter"", ""3/4 c. graham cracker crumbs"", … ""1 large pkg. chocolate chips""]","[""Combine first four ingredients and press in 13 x 9-inch ungreased pan."", ""Melt chocolate chips and spread over mixture. Refrigerate for about 20 minutes and cut into pieces before chocolate gets hard."", ""Keep in refrigerator.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""peanut butter"", ""graham cracker crumbs"", … ""chocolate chips""]","[""dessert"", ""snack""]","[""Halal"", ""Kosher"", ""Vegetarian""]"


In [27]:
df["meal_course"].value_counts()

meal_course,count
list[str],u32
"[""side"", ""main""]",33764
"[""pizza"", ""side"", … ""sandwich/burger""]",5
"[""dessert"", ""sandwich/burger"", … ""pizza""]",9
"[""side"", ""dinner"", … ""main""]",425
"[""side"", ""dinner"", … ""sandwich/burger""]",27
…,…
"[""snack"", ""dessert"", … ""sandwich/burger""]",1
"[""pizza"", ""side"", … ""lunch""]",128
"[""snack"", ""dessert"", … ""sandwich/burger""]",4
"[""snack"", ""dessert"", … ""lunch""]",6


In [28]:
df["dietary_tags"].value_counts()

dietary_tags,count
list[str],u32
"[""Vegan"", ""Halal"", … ""Vegetarian""]",1038941
"[""Non-Halal"", ""Kosher""]",88375
"[""Halal"", ""Non-Kosher""]",46
"[""Non-Halal"", ""Kosher"", ""Vegetarian""]",1564
"[""Vegetarian"", ""Vegan"", … ""Non-Kosher""]",272
…,…
"[""Non-Halal"", ""Vegetarian"", ""Non-Kosher""]",1915
"[""Non-Halal"", ""Vegetarian"", … ""Non-Kosher""]",2618
"[""Vegetarian"", ""Halal"", ""Non-Kosher""]",121
"[""Halal"", ""Kosher""]",159826


In [None]:
# Save the updated dataset
df.write_csv("data/recipes_with_tags.csv")

print("Updated dataset saved as recipes_with_tags.csv")



In [30]:
from itertools import chain

# Get all unique meal course tags
all_meal_courses = set(chain.from_iterable(df["meal_course"]))

# Get all unique dietary tags
all_dietary_tags = set(chain.from_iterable(df["dietary_tags"]))

# Function to generate one-hot encoding columns
def one_hot_encode_column(df, column_name, unique_tags):
    for tag in unique_tags:
        df = df.with_columns(
            pl.col(column_name).list.contains(tag).cast(pl.Int8()).alias(f"{column_name}_{tag}")
        )
    return df

# Apply one-hot encoding
df = one_hot_encode_column(df, "meal_course", all_meal_courses)
df = one_hot_encode_column(df, "dietary_tags", all_dietary_tags)

# Drop the original list columns as they're no longer needed
df = df.drop(["meal_course", "dietary_tags"])

df.head()

Unnamed: 0_level_0,name,ingredients,instructions,link,source,NER,meal_course_snack,meal_course_pizza,meal_course_dessert,meal_course_side,meal_course_dinner,meal_course_breakfast,meal_course_appetizer,meal_course_lunch,meal_course_sandwich/burger,meal_course_main,meal_course_miscellaneous,dietary_tags_Non-Kosher,dietary_tags_Halal,dietary_tags_Vegetarian,dietary_tags_Non-Halal,dietary_tags_Kosher,dietary_tags_Vegan
i64,str,list[str],list[str],str,str,list[str],i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8
0,"""No-Bake Nut Cookies""","[""1 c. firmly packed brown sugar"", ""1/2 c. evaporated milk"", … ""3 1/2 c. bite size shredded rice biscuits""]","[""In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine."", ""Stir over medium heat until mixture bubbles all over top."", … ""Let stand until firm, about 30 minutes.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""brown sugar"", ""milk"", … ""bite size shredded rice biscuits""]",1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0
1,"""Jewell Ball'S Chicken""","[""1 small jar chipped beef, cut up"", ""4 boned chicken breasts"", … ""1 carton sour cream""]","[""Place chipped beef on bottom of baking dish."", ""Place chicken on top of beef."", ""Mix soup and cream together; pour over chicken. Bake, uncovered, at 275° for 3 hours.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""beef"", ""chicken breasts"", … ""sour cream""]",0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0
2,"""Creamy Corn""","[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg. cream cheese, cubed"", … ""1/4 tsp. pepper""]","[""In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""frozen corn"", ""cream cheese"", … ""pepper""]",0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0
3,"""Chicken Funny""","[""1 large whole chicken"", ""2 (10 1/2 oz.) cans chicken gravy"", … ""4 oz. shredded cheese""]","[""Boil and debone chicken."", ""Put bite size pieces in average size square casserole dish."", … ""Sprinkle shredded cheese on top and bake at 350° for approximately 20 minutes or until golden and bubbly.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""chicken"", ""chicken gravy"", … ""shredded cheese""]",0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0
4,"""Reeses Cups(Candy) ""","[""1 c. peanut butter"", ""3/4 c. graham cracker crumbs"", … ""1 large pkg. chocolate chips""]","[""Combine first four ingredients and press in 13 x 9-inch ungreased pan."", ""Melt chocolate chips and spread over mixture. Refrigerate for about 20 minutes and cut into pieces before chocolate gets hard."", ""Keep in refrigerator.""]","""www.cookbooks.com/Recipe-Detai…","""Gathered""","[""peanut butter"", ""graham cracker crumbs"", … ""chocolate chips""]",1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0


In [31]:
df.dtypes

[Int64,
 String,
 List(String),
 List(String),
 String,
 String,
 List(String),
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8]

In [32]:
stringified_df = df.with_columns(
    pl.col("ingredients").map_elements(lambda x: "|".join(x)).alias("ingredients"),
    pl.col("instructions").map_elements(lambda x: "|".join(x)).alias("instructions"),
    pl.col("NER").map_elements(lambda x: "|".join(x)).alias("NER"),
    # pl.col("meal_course").map_elements(lambda x: "\n".join(x)).alias("meal_course"),
    # pl.col("dietary_tags").map_elements(lambda x: "\n".join(x)).alias("dietary_tags")
)
stringified_df.head()



Unnamed: 0_level_0,name,ingredients,instructions,link,source,NER,meal_course_snack,meal_course_pizza,meal_course_dessert,meal_course_side,meal_course_dinner,meal_course_breakfast,meal_course_appetizer,meal_course_lunch,meal_course_sandwich/burger,meal_course_main,meal_course_miscellaneous,dietary_tags_Non-Kosher,dietary_tags_Halal,dietary_tags_Vegetarian,dietary_tags_Non-Halal,dietary_tags_Kosher,dietary_tags_Vegan
i64,str,str,str,str,str,str,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8
0,"""No-Bake Nut Cookies""","""1 c. firmly packed brown sugar…","""In a heavy 2-quart saucepan, m…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""brown sugar|milk|vanilla|nuts|…",1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0
1,"""Jewell Ball'S Chicken""","""1 small jar chipped beef, cut …","""Place chipped beef on bottom o…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""beef|chicken breasts|cream of …",0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0
2,"""Creamy Corn""","""2 (16 oz.) pkg. frozen corn|1 …","""In a slow cooker, combine all …","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""frozen corn|cream cheese|butte…",0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0
3,"""Chicken Funny""","""1 large whole chicken|2 (10 1/…","""Boil and debone chicken.|Put b…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""chicken|chicken gravy|cream of…",0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0
4,"""Reeses Cups(Candy) ""","""1 c. peanut butter|3/4 c. grah…","""Combine first four ingredients…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""peanut butter|graham cracker c…",1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0


In [33]:
stringified_df["ingredients"][:5]

ingredients
str
"""1 c. firmly packed brown sugar…"
"""1 small jar chipped beef, cut …"
"""2 (16 oz.) pkg. frozen corn|1 …"
"""1 large whole chicken|2 (10 1/…"
"""1 c. peanut butter|3/4 c. grah…"


In [34]:
stringified_df.write_parquet("data/recipes3.parquet")

In [35]:
import polars as pl
read_df = pl.read_parquet("data/recipes3.parquet")
read_df.head()

Unnamed: 0_level_0,name,ingredients,instructions,link,source,NER,meal_course_snack,meal_course_pizza,meal_course_dessert,meal_course_side,meal_course_dinner,meal_course_breakfast,meal_course_appetizer,meal_course_lunch,meal_course_sandwich/burger,meal_course_main,meal_course_miscellaneous,dietary_tags_Non-Kosher,dietary_tags_Halal,dietary_tags_Vegetarian,dietary_tags_Non-Halal,dietary_tags_Kosher,dietary_tags_Vegan
i64,str,str,str,str,str,str,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8
0,"""No-Bake Nut Cookies""","""1 c. firmly packed brown sugar…","""In a heavy 2-quart saucepan, m…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""brown sugar|milk|vanilla|nuts|…",1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0
1,"""Jewell Ball'S Chicken""","""1 small jar chipped beef, cut …","""Place chipped beef on bottom o…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""beef|chicken breasts|cream of …",0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0
2,"""Creamy Corn""","""2 (16 oz.) pkg. frozen corn|1 …","""In a slow cooker, combine all …","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""frozen corn|cream cheese|butte…",0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0
3,"""Chicken Funny""","""1 large whole chicken|2 (10 1/…","""Boil and debone chicken.|Put b…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""chicken|chicken gravy|cream of…",0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0
4,"""Reeses Cups(Candy) ""","""1 c. peanut butter|3/4 c. grah…","""Combine first four ingredients…","""www.cookbooks.com/Recipe-Detai…","""Gathered""","""peanut butter|graham cracker c…",1,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0


In [36]:
read_df.describe()

statistic,Unnamed: 1_level_0,name,ingredients,instructions,link,source,NER,meal_course_snack,meal_course_pizza,meal_course_dessert,meal_course_side,meal_course_dinner,meal_course_breakfast,meal_course_appetizer,meal_course_lunch,meal_course_sandwich/burger,meal_course_main,meal_course_miscellaneous,dietary_tags_Non-Kosher,dietary_tags_Halal,dietary_tags_Vegetarian,dietary_tags_Non-Halal,dietary_tags_Kosher,dietary_tags_Vegan
str,f64,str,str,str,str,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",2231142.0,"""2231142""","""2231142""","""2231142""","""2231142""","""2231142""","""2231142""",2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0,2231142.0
"""null_count""",0.0,"""0""","""0""","""0""","""0""","""0""","""0""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",1115570.5,,,,,,,0.213088,0.011824,0.213088,0.160138,0.212458,0.082391,0.140472,0.212458,0.023169,0.019666,0.36568,0.008526,0.95051,0.882438,0.04949,0.991474,0.467799
"""std""",644075.361488,,,,,,,0.40949,0.108095,0.40949,0.366734,0.409047,0.274959,0.347476,0.409047,0.150441,0.13885,0.48162,0.091943,0.216889,0.322089,0.216889,0.091943,0.498962
"""min""",0.0,""" Arugula Pomegranate Salad""",""" 2 Kgs Fresh Mussels (cleaned…","""�|Cut unpeeled squash into 1/4…","""allrecipes.com/recipe/1-pea-sa…","""Gathered""","""""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""25%""",557785.0,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
"""50%""",1115571.0,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
"""75%""",1673356.0,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0
"""max""",2231141.0,"""￼Risotto Ai Funghi ""","""~|6 Cup Blueberries|1/2 Cup Su…","""°|Arrange 8 popsicle sticks on…","""www.yummly.com/recipe/tortitas…","""Recipes1M""","""||||||||""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [37]:
read_df.dtypes

[Int64,
 String,
 String,
 String,
 String,
 String,
 String,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8,
 Int8]