In [186]:
import pandas as pd
import numpy as np
import math
from collections import Counter

n = 160

In [187]:

# Load the data
recipes = pd.read_csv("full_dataset.csv", nrows = n)

# User ingredients
user_ingredients = ['bite size shredded rice biscuits', 'brown sugar', 'milk', 'vanilla', 'nuts', 'butter', 'chicken', 'garlic', 'onion', 'pepper', 'salt', 'tomato', 'lemon']
#print (recipes)

recipe_ing = {}
recipe_dict = {}
for index, row in recipes.iterrows():
    #print (row['NER'])
    recipe_ing[index] = row['NER']
    recipe_dict[index] = row['title']

In [188]:
#standardize the format of each ingredient
def clean_ingredient(ingredient):
    ingredient = ingredient.lower()
    while ingredient[0] != "\"":
        ingredient = ingredient[1:]
    while ingredient[-1] != "\"":
        ingredient = ingredient[:-1]
    return ingredient[1:-1]

In [189]:
#get the unique ingredients and the frequency of each ingredient

ingredient_freq = Counter()

i = 0
add = True

all_ing = set()
for r in recipe_ing.values():
    for ing in r.split(','):
        ing = clean_ingredient(ing)
        ingredient_freq[ing] += 1
        all_ing.add(ing)
        if ing == '+':
            add = False
    if add:
        i += 1
all_ing = sorted(list(all_ing))
print(len(all_ing), len(ingredient_freq))
ingredient_freq = sorted(ingredient_freq.items(), key=lambda x: x[0])

print(recipe_ing[159])

# Print the sorted ingredients and their frequencies
print(i)

372 372
["white cake mix", "+", "pistachio instant pudding", "ginger ale", "eggs", "pecans", "cool whip", "milk", "pistachio instant pudding"]
159


In [190]:
#calculate wieghts for each ingredient

ingredient_weights = {}
for ing, freq in ingredient_freq:
    ingredient_weights[ing] = math.log(n/(freq + 1))
print(ingredient_weights)

{'+': 4.382026634673881, 'alfalfa honey': 4.382026634673881, 'allspice': 4.382026634673881, 'almond extract': 3.9765615265657175, 'almonds': 3.9765615265657175, 'american cheese': 4.382026634673881, 'angel': 4.382026634673881, 'apple': 3.9765615265657175, 'apple juice': 4.382026634673881, 'apple vinegar': 4.382026634673881, 'apples': 3.4657359027997265, 'applesauce': 4.382026634673881, 'apricot jello': 4.382026634673881, 'apricot nectar': 4.382026634673881, 'arborio': 4.382026634673881, 'armour dried beef': 4.382026634673881, 'artichokes': 4.382026634673881, 'bacon': 3.6888794541139363, 'baking potatoes': 4.382026634673881, 'baking powder': 2.8779492378976075, 'baking soda': 3.9765615265657175, 'bananas': 3.4657359027997265, 'barbecue sauce': 4.382026634673881, 'basil': 3.6888794541139363, 'bay leaf': 3.6888794541139363, 'beans': 3.283414346005772, 'beef': 3.6888794541139363, 'beef bouillon': 4.382026634673881, 'beef consomme': 4.382026634673881, 'beef stock': 3.9765615265657175, 'beer

In [191]:
def create_vector(recipe):
    vector = []
    for ingredient in all_ing:
        if(ingredient in recipe):
            vector.append(ingredient_weights[ingredient])
        else:
            vector.append(0)
    return vector

In [192]:
# Encode the recipes
recipe_vectors = {}
i = 0
for ingredients in recipe_ing.items():
    recipe_vectors[recipe_dict[i]] = create_vector(ingredients[1])
    i += 1
user_vector = create_vector(user_ingredients)

In [193]:
def cos_sim(a, b):
    #dot product
    dot_product = sum([i * j for i, j in zip(a, b)])

    #magnitude of the vectors
    magnitude_vec1 = math.sqrt(sum(i ** 2 for i in a))
    magnitude_vec2 = math.sqrt(sum(j ** 2 for j in b))
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:  # Handle zero vectors
        return 0
    return dot_product / (magnitude_vec1 * magnitude_vec2)


In [194]:
def weighted_cos_sim(a, b):
    # Compute weighted dot product
    a = [float(ai) for ai in a]
    b = [float(bi) for bi in b]
    weights = [float(ingredient_weights[w]) for w in ingredient_weights]

    dot_product = sum([w * ai * bi for ai, bi, w in zip(a, b, weights)])

    # Compute weighted magnitudes
    magnitude_a = math.sqrt(sum([w * ai**2 for ai, w in zip(a, weights)]))
    magnitude_b = math.sqrt(sum([w * bi**2 for bi, w in zip(b, weights)]))

    # Handle the case where either magnitude is zero (no common ingredients)
    if magnitude_a == 0 or magnitude_b == 0:
        return 0.0
    
    # Return the weighted cosine similarity
    return dot_product / (magnitude_a * magnitude_b)

In [195]:
similarities = {}
for recipe, vector in recipe_vectors.items():
    similarity = weighted_cos_sim(user_vector, vector)
    similarities[recipe] = similarity

recipe_scores = sorted(similarities.items(), key=lambda x: x[1], reverse=True)

print("Recommendations:")
for recipe, score in recipe_scores:
    print(f"{recipe}: {score:.4f}")

Recommendations:
No-Bake Nut Cookies: 0.4560
Cheese Dip: 0.3501
Prize-Winning Meat Loaf: 0.2856
Spaghetti Sauce To Can: 0.2812
Nolan'S Pepper Steak: 0.2769
Zucchini-Artichoke Continental: 0.2407
Chicken Stew: 0.2394
Corral Barbecued Beef Steak Strips: 0.2301
Quick Coffee Cake(6 Servings)  : 0.2140
French Onion Soup: 0.2098
Eggplant Spaghetti Sauce: 0.2040
Creole Flounder: 0.1971
Taco Salad Chip Dip: 0.1969
Supreme Beef Casserole: 0.1879
Casserole Italiano: 0.1877
Jan'S Winter Soup: 0.1789
Vegetable Soup: 0.1724
Vegetable-Burger Soup: 0.1697
Spanish Hamburgers: 0.1645
Smoked Turkey Risotto: 0.1608
Pecan Pralines: 0.1582
Chicken Spaghetti: 0.1568
Creamy Corn: 0.1540
Egg Casserole: 0.1442
Sweet-N-Sour Chicken: 0.1362
Chicken Salad: 0.1355
Baked Beans: 0.1348
Cuddy Farms Marinated Turkey: 0.1285
Monkey Bread: 0.1257
Easy Fudge: 0.1237
Fast Real Good Fudge: 0.1214
Ranch Beef And Beans: 0.1209
Consomme Chicken: 0.1184
Creamy Coleslaw(Better Homes And Gardens)  : 0.1176
Summer Chicken: 0.1160