In [1]:
import pandas as pd
import sqlite3

In [None]:
# Function to find all unique words in a column regaerdless of the entry being a list of words
def find_unique_vals(df, column_name):
    unique_list = set(
        word.strip()
        for courses in df[column_name]
        if courses is not None  # Skip None values
        for word in courses.split(";")
    )
    return unique_list

In [None]:
# Function to clean a column of undesired words
def clean_column(column, removal_list):
    # Handle None or NaN values
    if not isinstance(column, str):  # Skip if the column value is not a string
        return None

    # Split the cuisine into a list
    col_list = [c.strip() for c in column.split(";")]

    # If the list has only one cuisine and it is in cuisines_to_remove, return None
    if len(col_list) == 1 and col_list[0] in removal_list:
        return None  # Mark for deletion

    # Remove unwanted cuisines if there are multiple cuisines
    if len(col_list) > 1:
        col_list = [c for c in col_list if c not in removal_list]

    # Rejoin the list back into a string
    return ", ".join(col_list)

----

## Combine Databases

In [None]:
# Connect to the SQLite database and retrieve data
conn = sqlite3.connect("japanese_recipes.db")
query = "SELECT * FROM recipes"
df_japanese = pd.read_sql_query(query, conn)
conn.close()

df_japanese = df_japanese[
    [
        "title",
        "link",
        "image_url",
        "image_data",
        "description",
        "Total Time:",
        "Course:",
        "Cuisine:",
        "ingredients",
        "Calories:",
    ]
]


# Rename columns
df_japanese = df_japanese.rename(
    columns={
        "Total Time:": "total_time",
        "Course:": "course",
        "Cuisine:": "cuisine",
        "Calories:": "calories",
    }
)

df_japanese

Unnamed: 0,title,link,image_url,image_data,description,total_time,course,cuisine,ingredients,calories
0,Yuzu Cha (Citron Tea),https://www.justonecookbook.com/yuzu-cha/,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Try my easy homemade recipe for Yuzu Cha (Citr...,1 hour,How to,Japanese,yuzu; white rock sugar; shochu,
1,Japanese Milk Bread (Shokupan),https://www.justonecookbook.com/japanese-milk-...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Japanese Milk Bread is possibly the best versi...,3 hours,Breakfast,Japanese,warm water; sugar; Diamond Crystal kosher salt...,1645 kcal
2,Chicken Chashu,https://www.justonecookbook.com/chicken-chashu/,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Juicy and tender Chicken Chashu is a lighter v...,1 hour,Main Course,Japanese,"boneless, skin-on chicken thighs; green onions...",
3,Gyudon (Japanese Beef Rice Bowl),https://www.justonecookbook.com/gyudon/,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,With thinly sliced beef and tender onions simm...,20 minutes,Main Course,Japanese,onion; green onion/scallion; thinly sliced bee...,657 kcal
4,Japanese Beef Curry,https://www.justonecookbook.com/japanese-beef-...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"With tender chunks of beef, potatoes, carrots,...",3 hours,Main Course,Japanese,onions; unsalted butter; neutral oil; russet p...,426 kcal
...,...,...,...,...,...,...,...,...,...,...
943,Taiwanese Hot Pot with Homemade Meatballs,https://www.justonecookbook.com/taiwanese-hot-...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Delicious Taiwanese Hot Pot and Homemade Meatb...,30 minutes,Main Course,Taiwanese,ground pork; green onion/scallion; Tokyo negi ...,974 kcal
944,Oyakodon (Chicken and Egg Rice Bowl),https://www.justonecookbook.com/oyakodon/,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Oyakodon is a classic comfort food of Japanese...,25 minutes,Main Course,Japanese,"onion; boneless, skinless chicken thighs; sake...",537 kcal
945,Hamachi (Yellowtail) Teriyaki with Yuzu Kosho,https://www.justonecookbook.com/hamachi-yellow...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"Growing up, I had enjoyed eating Hamachi Teriy...",20 minutes,Main Course,Japanese,"yellowtail (hamachi, buri); freshly ground bla...",314 kcal
946,Teriyaki Pork Balls,https://www.justonecookbook.com/teriyaki-pork-...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,My Teriyaki Pork Balls recipe uses thin slices...,30 minutes,Main Course,Japanese,sake; potato starch or cornstarch; thinly slic...,429 kcal


In [None]:
# Connect to the SQLite database and retrieve data
conn = sqlite3.connect("chinese_recipes.db")
query = "SELECT * FROM recipes"
df_chinese = pd.read_sql_query(query, conn)
conn.close()

df_chinese = df_chinese[
    [
        "title",
        "link",
        "image_url",
        "image_data",
        "description",
        "Total Time:",
        "Course:",
        "Cuisine:",
        "ingredients",
        "Calories:",
    ]
]


# Rename columns
df_chinese = df_chinese.rename(
    columns={
        "Total Time:": "total_time",
        "Course:": "course",
        "Cuisine:": "cuisine",
        "Calories:": "calories",
    }
)

df_chinese

Unnamed: 0,title,link,image_url,image_data,description,total_time,course,cuisine,ingredients,calories
0,Easy Oyster Mushroom Stir Fry,https://omnivorescookbook.com/easy-oyster-mush...,https://omnivorescookbook.com/wp-content/uploa...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,A super quick and easy oyster mushroom stir fr...,15 minutes,Side Dish,Chinese,oyster mushrooms; peanut oil; garlic; sugar; s...,85 kcal
1,Honey Glazed Salmon,https://omnivorescookbook.com/honey-soy-sauce-...,https://omnivorescookbook.com/wp-content/uploa...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,A simple yet rich tasting honey glazed salmon ...,55 minutes,Main,Chinese Fusion,salmon filets; salt; sugar; honey; Shaoxing wi...,445 kcal
2,Shrimp Toast,https://omnivorescookbook.com/shrimp-toast/,https://omnivorescookbook.com/wp-content/uploa...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,Make these crispy savory shrimp toasts as an a...,40 minutes,Appetizer,Chinese,shrimp; egg white; ginger; garlic; light soy s...,234 kcal
3,Garlic Fried Rice,https://omnivorescookbook.com/garlic-fried-rice/,https://omnivorescookbook.com/wp-content/uploa...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,A Chinese style garlic fried rice featuring cr...,25 minutes,Side,Chinese,of leftover cooked jasmine rice; soy sauce; oy...,239 kcal
4,Chicken with Garlic Sauce,https://omnivorescookbook.com/chicken-with-gar...,https://omnivorescookbook.com/wp-content/uploa...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,Chicken with garlic sauce is a super easy take...,30 minutes,Main,Chinese,chicken breasts or thighs; Shaoxing wine; salt...,248 kcal
...,...,...,...,...,...,...,...,...,...,...
900,Barter-Worthy Spam Musubi,https://omnivorescookbook.com/spam-musubi/,https://omnivorescookbook.com/wp-content/uploa...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,"The spam is grilled until crispy, perfectly ca...",45 minutes,"Appetizer, Main",hawaii,vegetable oil; low-sodium Spam (12 oz. / 340 g...,279 kcal
901,Authentic Mapo Tofu (麻婆豆腐),https://omnivorescookbook.com/authentic-mapo-t...,https://omnivorescookbook.com/wp-content/uploa...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,An easy mapo tofu recipe that creates the auth...,25 minutes,Main,Chinese,ground pork; Shaoxing wine; light soy sauce; m...,194 kcal
902,Chinese Scallion Pancakes (葱油饼),https://omnivorescookbook.com/chinese-scallion...,https://omnivorescookbook.com/wp-content/uploa...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Super crispy and flaky on the outside and slig...,1 hour,Appetizer,Chinese,all-purpose flour; salt; boiling water; cool w...,202 kcal
903,Chicken and Broccoli (Chinese Takeout Style),https://omnivorescookbook.com/chicken-and-broc...,https://omnivorescookbook.com/wp-content/uploa...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,An easy chicken and broccoli stir fry recipe t...,25 minutes,Main,Chinese,boneless skinless chicken breast; Shaoxing win...,246 kcal


In [None]:
# Connect to the SQLite database and retrieve data
conn = sqlite3.connect("indian_recipes.db")
query = "SELECT * FROM recipes"
df_indian = pd.read_sql_query(query, conn)
conn.close()

df_indian = df_indian[
    [
        "title",
        "link",
        "image_url",
        "image_data",
        "description",
        "Total Time",
        "Course",
        "Cuisine",
        "ingredients",
        "Calories:",
    ]
]


# Rename columns
df_indian = df_indian.rename(
    columns={
        "Total Time": "total_time",
        "Course": "course",
        "Cuisine": "cuisine",
        "Calories:": "calories",
    }
)

df_indian

Unnamed: 0,title,link,image_url,image_data,description,total_time,course,cuisine,ingredients,calories
0,Kala Chana Curry {Gujarati Rasawala Kala Chana...,https://ministryofcurry.com/kala-chana-curry/,https://ministryofcurry.com/wp-content/uploads...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Hearty Kala Chana Curry made with black chickp...,9 hours,dinner,Indian,dry black chana; water; oil; mustard seeds; hi...,246 kcal
1,"Chilli Tofu {Bold Flavors, Light Twist}",https://ministryofcurry.com/chilli-tofu/,https://ministryofcurry.com/wp-content/uploads...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,A light spin on Chilli Paneer by using tofu an...,30 minutes,"dinner, Lunch",Indo-Chinese,extra firm tofu; Kashmiri red chili powder; ko...,208 kcal
2,Quick & Easy Khichdi: Perfect for Cozy Evening...,https://ministryofcurry.com/khichdi/,https://ministryofcurry.com/wp-content/uploads...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,This simple khichdi recipe makes for a nourish...,30 minutes,dinner,Indian,short-grain rice; moong dal; water; kosher sal...,270 kcal
3,Pomegranate Mojito Recipe,https://ministryofcurry.com/pomegranate-mojito/,https://ministryofcurry.com/wp-content/uploads...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"Twist to the classic mojito, this Pomegranate ...",,Beverage,American,pomegrante juice; club soda; ice cubes; fresh ...,110 kcal
4,Easy Malai Laddo,https://ministryofcurry.com/malai-laddu/,https://ministryofcurry.com/wp-content/uploads...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"Easy 5-ingredient Malai Laddu for a quick, del...",35 minutes,Dessert,Indian,ricotta cheese; heavy cream; powdered sugar; c...,99 kcal
...,...,...,...,...,...,...,...,...,...,...
425,Stuffed Baby Eggplant Curry,https://ministryofcurry.com/stuffed-baby-eggpl...,https://ministryofcurry.com/wp-content/uploads...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Baby eggplants stuffed with finely diced onion...,30 minutes,Main Course,Indian,small baby eggplants; yellow onion; ginger; ga...,58 kcal
426,Palak Paneer,https://ministryofcurry.com/palak-paneer/,https://ministryofcurry.com/wp-content/uploads...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,A delicious north Indian dish with pureed spin...,25 minutes,Entree,Indian,Spinach; cubed paneer; green chilies; yellow o...,210 kcal
427,Vermicelli Sheera,https://ministryofcurry.com/vermicelli-sheera/,https://ministryofcurry.com/wp-content/uploads...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Dessert made with thin wheat vermcelli noodles...,15 minutes,Dessert,Indian,vermicelli; sugar; ghee; almonds; pistachios; ...,411 kcal
428,How to Cook Beets,https://ministryofcurry.com/roasted-beetroots/,https://ministryofcurry.com/wp-content/uploads...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Cooking Beets 2 ways - roast in the oven or st...,1 hour,Misc.,American,medium Beets; Olive oil,60 kcal


In [None]:
# Connect to the SQLite database and retrieve data
conn = sqlite3.connect("thai_recipes.db")
query = "SELECT * FROM recipes"
df_thai = pd.read_sql_query(query, conn)
conn.close()

df_thai = df_thai[
    [
        "title",
        "link",
        "image_url",
        "image_data",
        "description",
        "Total Time",
        "Course",
        "Cuisine",
        "ingredients",
        "Calories:",
    ]
]


# Rename columns
df_thai = df_thai.rename(
    columns={
        "Total Time": "total_time",
        "Course": "course",
        "Cuisine": "cuisine",
        "Calories:": "calories",
    }
)

df_thai

Unnamed: 0,title,link,image_url,image_data,description,total_time,course,cuisine,ingredients,calories
0,Authentic Thai Beef Satay Recipe With Peanut S...,https://hungryinthailand.com/thai-beef-satay-r...,https://hungryinthailand.com/wp-content/upload...,"b'RIFFd""\x02\x00WEBPVP8X\n\x00\x00\x00 \x00\x0...",Enjoy my family’s authentic Thai beef satay wi...,4 hours,"Appetizer, Main Course, Snack",Thai,beef; of garlic; lemongrass; coriander seeds; ...,83 kcal
1,Easy Thai Fish Sauce Wings Recipe,https://hungryinthailand.com/fish-sauce-wings/,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xca\xbb\x01\x00WEBPVP8X\n\x00\x00\x00 \...,Enjoy perfectly crispy chicken every time with...,35 minutes,"Appetizer, Side Dish, Snack","Asian, Thai",chicken wings; fish sauce; rosdee seasoning po...,670 kcal
2,Sweet Thai Chili Wings Recipe,https://hungryinthailand.com/sweet-thai-chili-...,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xc6\x91\x02\x00WEBPVP8X\n\x00\x00\x00 \...,"Sweet Thai chili wings recipe with a sticky, s...",40 minutes,"Appetizer, Snack",Thai,tempura flour; rosdee seasoning powder; ice-co...,239 kcal
3,Shrimp Satay Recipe With Thai Peanut Sauce,https://hungryinthailand.com/shrimp-satay-with...,https://hungryinthailand.com/wp-content/upload...,b'RIFFL!\x01\x00WEBPVP8X\n\x00\x00\x00 \x00\x0...,Enjoy this easy shrimp satay recipe with Thai ...,50 minutes,"Appetizer, Snack",Thai,shrimp; coconut milk; yellow curry powder; Ros...,392 kcal
4,Pork Gyoza Recipe (Pork Dumplings),https://hungryinthailand.com/pork-gyoza-recipe/,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xf2Z\x01\x00WEBPVP8X\n\x00\x00\x00 \x00...,"Make this pork gyoza recipe for easy, homemade...",1 hour,"Appetizer, Snack",Thai,ground pork; white pepper; sesame oil; shoyu s...,126 kcal
...,...,...,...,...,...,...,...,...,...,...
304,Thai Vegetable Soup (Tom Jued),https://hungryinthailand.com/thai-vegetable-so...,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xa4\x83\x01\x00WEBPVP8X\n\x00\x00\x00 \...,Tom jued is a healthy Thai vegetable soup reci...,40 minutes,"Main Course, Soup",Thai,garlic; coriander root; black peppercorns; bou...,63 kcal
305,Tom Kha Gai Recipe (Thai Coconut Chicken Soup),https://hungryinthailand.com/tom-kha-gai/,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xfa\x9f\x01\x00WEBPVP8X\n\x00\x00\x00 \...,Try this tom kha gai recipe—a healthy and quic...,20 minutes,"Main Course, Soup",Thai,water; coconut milk; lemongrass; galangal; kaf...,451 kcal
306,Tom Yum Kung Recipe (Spicy Thai Shrimp Soup),https://hungryinthailand.com/tom-yum-kung/,https://hungryinthailand.com/wp-content/upload...,b'RIFF$\x1f\x02\x00WEBPVP8X\n\x00\x00\x00 \x00...,This tom yum kung recipe is an authentic Thai ...,25 minutes,"Main Course, Soup",Thai,shrimp; Asian mushrooms; lemongrass; shallots;...,198 kcal
307,Thai Chicken Glass Noodle Soup Recipe,https://hungryinthailand.com/thai-chicken-glas...,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xd8+\x02\x00WEBPVP8X\n\x00\x00\x00 \x00...,Enjoy a quick and easy Thai chicken glass nood...,30 minutes,Soup,Thai,glass noodles; dried chilies; chilies; shallot...,644 kcal


In [None]:
# Connect to the SQLite database and retrieve data
conn = sqlite3.connect("korean_recipes.db")
query = "SELECT * FROM recipes"
df_korean = pd.read_sql_query(query, conn)
conn.close()

df_korean = df_korean[
    [
        "title",
        "link",
        "image_url",
        "image_data",
        "description",
        "Total Time:",
        "Course",
        "Cuisine",
        "ingredients",
        "Calories:",
    ]
]


# Rename columns
df_korean = df_korean.rename(
    columns={
        "Total Time:": "total_time",
        "Course": "course",
        "Cuisine": "cuisine",
        "Calories:": "calories",
    }
)

df_korean

Unnamed: 0,title,link,image_url,image_data,description,total_time,course,cuisine,ingredients,calories
0,Seolleongtang (Korean Beef Bone Broth),https://kimchimari.com/seolleongtang-korean-be...,https://kimchimari.com/wp-content/uploads/2024...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Seolleontang is a bone broth made from boiling...,10 hours,Soup,Korean,beef bones; water; green onions; beef brisket;...,74 kcal
1,Sujebi (Hand-Pulled Noodle Soup),https://kimchimari.com/sujebi-hand-pulled-nood...,https://kimchimari.com/wp-content/uploads/2023...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Sujebi is a fun hand-pulled or hand-torn noodl...,40 minutes,"Lunch, noodles",Korean,all purpose flour; sea salt; water; water; dri...,530 kcal
2,Instant Pot Gamjatang,https://kimchimari.com/instant-pot-gamjatang-k...,https://kimchimari.com/wp-content/uploads/2018...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Instant Pot Gamjatang recipe was a perfect rec...,50 minutes,"Main Course, Pork",Korean,pork neck bones; cooking sake; potatoes; fresh...,363 kcal
3,Instant Pot Tteok Guk (Rice Cake Soup),https://kimchimari.com/instant-pot-tteokguk-ri...,https://kimchimari.com/wp-content/uploads/2018...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Anchovy Broth Tteokguk is a very elegant versi...,28 minutes,Soup,Korean,tteokguk tteok; beef stew meat; water; sesame ...,407 kcal
4,Tteok guk (떡국) – Korean rice cake soup,https://kimchimari.com/rice-cake-soup-tteokguk...,https://kimchimari.com/wp-content/uploads/2011...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"Every New Year’s day, Koreans make Dduk Guk/T...",45 minutes,"rice, Soup",Korean,rice cake slices/ovalettes for soup; anchovy s...,264 kcal
...,...,...,...,...,...,...,...,...,...,...
257,Gluten Free Sweet Red Bean Rice Cake (Tteok Pp...,https://kimchimari.com/sweet-red-bean-rice-cake/,https://kimchimari.com/wp-content/uploads/2014...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,This deliciously addictive Sweet Red Bean Rice...,1 hour,"Dessert, Snack","Korean, Koreanfusion",sweet rice flour; sugar; milk; salted butter; ...,370 kcal
258,Sweet Rice Punch (Sikhye 식혜),https://kimchimari.com/sweet-rice-punch/,https://kimchimari.com/wp-content/uploads/2013...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,Sikhye is a classic Korean dessert drink that ...,8 hours,"Dessert, Drinks",Korean,yeotkireum; water; short grain rice; sugar; pi...,204 kcal
259,Crispy Zucchini Pancakes (Hobak buchimgae),https://kimchimari.com/hobak-boochimgae/,https://kimchimari.com/wp-content/uploads/2012...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,Easy Korean Zucchini fritters that is great as...,20 minutes,"Appetizer, Side Dish",Korean,large Korean Zucchini; onion; Green Chili Pepp...,219 kcal
260,Korean Sweet Rice Dessert (Yaksik),https://kimchimari.com/yaksik-korean-sweet-ric...,https://kimchimari.com/wp-content/uploads/2011...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01...,A dessert filled with health-promoting ingredi...,2 hours,Dessert,Korean,sweet rice; water; sugar; honey; soy sauce; se...,266 kcal


In [None]:
# Combine all the separate dataframes
df_combined = pd.concat(
    [df_japanese, df_korean, df_chinese, df_indian, df_thai], ignore_index=True
)

df_combined

Unnamed: 0,title,link,image_url,image_data,description,total_time,course,cuisine,ingredients,calories
0,Yuzu Cha (Citron Tea),https://www.justonecookbook.com/yuzu-cha/,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Try my easy homemade recipe for Yuzu Cha (Citr...,1 hour,How to,Japanese,yuzu; white rock sugar; shochu,
1,Japanese Milk Bread (Shokupan),https://www.justonecookbook.com/japanese-milk-...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Japanese Milk Bread is possibly the best versi...,3 hours,Breakfast,Japanese,warm water; sugar; Diamond Crystal kosher salt...,1645 kcal
2,Chicken Chashu,https://www.justonecookbook.com/chicken-chashu/,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Juicy and tender Chicken Chashu is a lighter v...,1 hour,Main Course,Japanese,"boneless, skin-on chicken thighs; green onions...",
3,Gyudon (Japanese Beef Rice Bowl),https://www.justonecookbook.com/gyudon/,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,With thinly sliced beef and tender onions simm...,20 minutes,Main Course,Japanese,onion; green onion/scallion; thinly sliced bee...,657 kcal
4,Japanese Beef Curry,https://www.justonecookbook.com/japanese-beef-...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"With tender chunks of beef, potatoes, carrots,...",3 hours,Main Course,Japanese,onions; unsalted butter; neutral oil; russet p...,426 kcal
...,...,...,...,...,...,...,...,...,...,...
2849,Thai Vegetable Soup (Tom Jued),https://hungryinthailand.com/thai-vegetable-so...,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xa4\x83\x01\x00WEBPVP8X\n\x00\x00\x00 \...,Tom jued is a healthy Thai vegetable soup reci...,40 minutes,"Main Course, Soup",Thai,garlic; coriander root; black peppercorns; bou...,63 kcal
2850,Tom Kha Gai Recipe (Thai Coconut Chicken Soup),https://hungryinthailand.com/tom-kha-gai/,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xfa\x9f\x01\x00WEBPVP8X\n\x00\x00\x00 \...,Try this tom kha gai recipe—a healthy and quic...,20 minutes,"Main Course, Soup",Thai,water; coconut milk; lemongrass; galangal; kaf...,451 kcal
2851,Tom Yum Kung Recipe (Spicy Thai Shrimp Soup),https://hungryinthailand.com/tom-yum-kung/,https://hungryinthailand.com/wp-content/upload...,b'RIFF$\x1f\x02\x00WEBPVP8X\n\x00\x00\x00 \x00...,This tom yum kung recipe is an authentic Thai ...,25 minutes,"Main Course, Soup",Thai,shrimp; Asian mushrooms; lemongrass; shallots;...,198 kcal
2852,Thai Chicken Glass Noodle Soup Recipe,https://hungryinthailand.com/thai-chicken-glas...,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xd8+\x02\x00WEBPVP8X\n\x00\x00\x00 \x00...,Enjoy a quick and easy Thai chicken glass nood...,30 minutes,Soup,Thai,glass noodles; dried chilies; chilies; shallot...,644 kcal


In [None]:
### Remove all cuisines that aren't the specified asian ones that are being worled with

# Find all cuisines mentioned across all recipes
all_cuisines = find_unique_vals(df_combined, "cuisine")

# Define cuisines to keep
cuisines_to_keep = ["Japanese", "Chinese", "Thai", "Indian", "Korean"]

# Find cuisines to remove from the df
cuisines_to_remove = [item for item in all_cuisines if item not in cuisines_to_keep]

# Remove rows that dont have the key cuisines in it
df_combined["cuisine"] = df_combined["cuisine"].apply(
    lambda x: clean_column(x, cuisines_to_remove)
)
df_combined = df_combined.dropna(subset=["cuisine"]).reset_index(drop=True)

In [None]:
df_combined["cuisine"].value_counts()

cuisine
Japanese    837
Chinese     744
Thai        314
Indian      289
Korean      254
Name: count, dtype: int64

In [None]:
# Show the types of courses present in df
print(df_combined["course"].value_counts().head(50))

course
Main Course                      488
Main                             358
Side Dish                        154
Appetizer                        147
Dessert                          142
Soup                              81
Main, Side                        78
Salad                             73
Side                              71
 Entree                           49
Condiments                        39
 Dessert                          36
Drinks                            30
Appetizer, Main                   29
Appetizer, Side Dish              26
Appetizer, Snack                  23
Salad, Side Dish                  22
Main Course, Soup                 22
Appetizer, Main Course            22
Dessert, Snack                    22
Main Course, Salad                21
Breakfast                         20
Snack                             16
Main Course, Side Dish            16
 dinner, Lunch                    16
 Appetizer                        12
Bakery                         

In [None]:
# Mappings to convert similar categories into one
mapping = {
    "Appetizer": ["Appetizer", "Entree"],
    "Breakfast": ["Breakfast", "Brunch", "Porridge"],
    "Dessert": ["Dessert", "Sweets"],
    "Main Course": ["Main Course", "Main", "Main dish", "Dinner", "main dish"],
    "Side Dish": ["Side Dish", "Side"],
    "Soup": ["Soup", "Stew"],
}


# Function to replace synonyms with main category and remove duplicates
def map_to_main_category(entry, mapping):
    # Flatten the mapping for quick lookups
    reverse_mapping = {
        synonym: main_category
        for main_category, synonyms in mapping.items()
        for synonym in synonyms
    }
    # Split the string into a list
    entry_list = [item.strip() for item in entry.split(",")]
    # Replace each item in the list
    mapped_list = [
        reverse_mapping[item] for item in entry_list if item in reverse_mapping
    ]
    # Remove duplicates while preserving order
    unique_list = list(dict.fromkeys(mapped_list))
    # Return None if no matches
    if not unique_list:
        return None
    return ", ".join(unique_list)

In [None]:
# Remove rows where column 'course' has NaN
df_combined = df_combined.dropna(subset=["course"])

# Apply the mappings function to the df
df_combined["course"] = df_combined["course"].apply(
    lambda x: map_to_main_category(x, mapping)
)

# Drop rows where 'course' is None
df_combined = df_combined.dropna(subset=["course"]).reset_index(drop=True)

# Show the types of courses present in df
print(df_combined["course"].value_counts())

course
Main Course                          913
Side Dish                            289
Appetizer                            260
Dessert                              219
Soup                                 104
Main Course, Side Dish                95
Appetizer, Main Course                67
Breakfast                             56
Appetizer, Side Dish                  49
Main Course, Soup                     26
Breakfast, Main Course                11
Breakfast, Dessert                     7
Appetizer, Main Course, Side Dish      5
Breakfast, Main Course, Side Dish      4
Appetizer, Breakfast                   2
Appetizer, Soup                        2
Appetizer, Main Course, Soup           2
Breakfast, Side Dish                   2
Breakfast, Appetizer, Main Course      2
Appetizer, Side Dish, Soup             1
Dessert, Side Dish                     1
Appetizer, Dessert                     1
Side Dish, Soup                        1
Breakfast, Appetizer                   1
Dessert, 

In [15]:
df_combined

Unnamed: 0,title,link,image_url,image_data,description,total_time,course,cuisine,ingredients,calories
0,Japanese Milk Bread (Shokupan),https://www.justonecookbook.com/japanese-milk-...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Japanese Milk Bread is possibly the best versi...,3 hours,Breakfast,Japanese,warm water; sugar; Diamond Crystal kosher salt...,1645 kcal
1,Chicken Chashu,https://www.justonecookbook.com/chicken-chashu/,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Juicy and tender Chicken Chashu is a lighter v...,1 hour,Main Course,Japanese,"boneless, skin-on chicken thighs; green onions...",
2,Gyudon (Japanese Beef Rice Bowl),https://www.justonecookbook.com/gyudon/,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,With thinly sliced beef and tender onions simm...,20 minutes,Main Course,Japanese,onion; green onion/scallion; thinly sliced bee...,657 kcal
3,Japanese Beef Curry,https://www.justonecookbook.com/japanese-beef-...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,"With tender chunks of beef, potatoes, carrots,...",3 hours,Main Course,Japanese,onions; unsalted butter; neutral oil; russet p...,426 kcal
4,Japanese Cheesecake,https://www.justonecookbook.com/souffle-japane...,https://www.justonecookbook.com/wp-content/upl...,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,Jiggly and fluffy Japanese Cheesecake is a cro...,1 hour,Dessert,Japanese,unsalted butter; large eggs (50 g each w/o she...,3560 kcal
...,...,...,...,...,...,...,...,...,...,...
2118,Thai Vegetable Soup (Tom Jued),https://hungryinthailand.com/thai-vegetable-so...,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xa4\x83\x01\x00WEBPVP8X\n\x00\x00\x00 \...,Tom jued is a healthy Thai vegetable soup reci...,40 minutes,"Main Course, Soup",Thai,garlic; coriander root; black peppercorns; bou...,63 kcal
2119,Tom Kha Gai Recipe (Thai Coconut Chicken Soup),https://hungryinthailand.com/tom-kha-gai/,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xfa\x9f\x01\x00WEBPVP8X\n\x00\x00\x00 \...,Try this tom kha gai recipe—a healthy and quic...,20 minutes,"Main Course, Soup",Thai,water; coconut milk; lemongrass; galangal; kaf...,451 kcal
2120,Tom Yum Kung Recipe (Spicy Thai Shrimp Soup),https://hungryinthailand.com/tom-yum-kung/,https://hungryinthailand.com/wp-content/upload...,b'RIFF$\x1f\x02\x00WEBPVP8X\n\x00\x00\x00 \x00...,This tom yum kung recipe is an authentic Thai ...,25 minutes,"Main Course, Soup",Thai,shrimp; Asian mushrooms; lemongrass; shallots;...,198 kcal
2121,Thai Chicken Glass Noodle Soup Recipe,https://hungryinthailand.com/thai-chicken-glas...,https://hungryinthailand.com/wp-content/upload...,b'RIFF\xd8+\x02\x00WEBPVP8X\n\x00\x00\x00 \x00...,Enjoy a quick and easy Thai chicken glass nood...,30 minutes,Soup,Thai,glass noodles; dried chilies; chilies; shallot...,644 kcal


In [None]:
# Find all cuisines mentioned across all recipes
find_unique_vals(df_combined, "ingredients")

3617

In [None]:
# Connect to SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect("all_recipes.db")

# Save DataFrame to SQL database
df_combined.to_sql("recipes", conn, if_exists="replace", index=False)

# Close the connection
conn.close()

print("Data saved to database!")

Data saved to database!


--- 

## Create Embeddings

In [None]:
import ollama

content = """The following are common ingredient variations. Normalize each ingredient variation by mapping it to the appropriate standardized name:

Salt, Kosher Salt, Sea Salt, Flaky Salt -> Salt
Olive Oil, Extra Virgin Olive Oil, Virgin Olive Oil -> Olive Oil
Tomatoes, Canned Tomatoes, Cherry Tomatoes, Roma Tomatoes -> Tomatoes
Potatoes, Russet Potatoes, Yukon Gold Potatoes -> Potatoes
Garlic, Garlic Powder, Minced Garlic -> Garlic

Create a python dictionary, where each key refers to one of the original ingredients lists below and its value is your answer. You can only give one ingredient answer. Do not include the above examples in the python dictionary to be created. Do not return anything other than the python dictionary in your response.

Normalize this ingredient:
'pineapple slices',
'coarsely chopped raw cashews',
'chopped cilantro for garnish',
'pickled garlic juice',
'nutmeg powder',
'can chickpeas',
'manila clams',
'reconstituted bracken fiddleheads',
'Ootoya’s sweet and sour chicken',
'dried hijiki seaweed',
'low fat milk or whole milk',
'brown sesame seeds',
'dried scallop',
'green onions – cut 4 green onions into 2 inch',
'chopped cilantro',
'Lamb or mutton shoulder',
'frozen tater tots',
'ume plum vinegar',
'whole cumin seeds',
'12-oz. / 340-g block extra firm tofu',"""

response = ollama.generate(model="gemma2", prompt=content)
print(response["response"])

In [None]:
# Convert the string into a Python dictionary using ast.literal_eval (safe)
import ast

ingredient_standardization = ast.literal_eval(response["response"])

# Print the dictionary to verify
print(ingredient_standardization)