#### This notebook generates interactions between users and recipes based on a cosine similarity and stores them in `data/user_recipe_interactions.csv`

In [33]:
'''
for recipes.json with 42 recipes

'''

import pandas as pd
from collections import Counter
import json

# Load the updated recipe file with tags
# with open("data/recipes.json") as f:
#     recipes = json.load(f)

with open("data/recipes.json") as f:
    recipes = json.load(f)

# Extract tags and ingredients
all_tags = []
all_ingredients = []
all_cuisines = []

for recipeID, recipe in recipes.items() :
    # Collect tags
    all_tags.extend(recipe.get("tags", []))
    
    # Collect ingredient names
    non_dairy_ings = recipe.get("Non-dairy ingredients", {})
    dairy_ings = recipe.get("dairy ingredients", {})
    all_cuisines.append(recipe['Cuisine'])
    all_ingredients.extend(non_dairy_ings.keys())
    all_ingredients.extend(dairy_ings.keys())

# Count frequencies
tag_counts = Counter(all_tags)
ingredient_counts = Counter(all_ingredients)
cuisine_counts = Counter(all_cuisines)

In [34]:
# Convert to DataFrames for better display
tag_df = pd.DataFrame(tag_counts.items(), columns=["Tag", "Count"]).sort_values(by="Count", ascending=False)
ingredient_df = pd.DataFrame(ingredient_counts.items(), columns=["Ingredient", "Count"]).sort_values(by="Count", ascending=False)
cuisine_df = pd.DataFrame(cuisine_counts.items(), columns=["Cuisine", "Count"]).sort_values(by="Count", ascending=False)

In [35]:
tag_df

Unnamed: 0,Tag,Count
0,vegetarian,39
8,gluten-free,14
2,quick,9
1,vegan,6
7,gourmet,5
3,non-vegetarian,5
9,processed,4
4,spicy,3
5,comfort,1
6,creative,1


In [36]:
ingredient_df

Unnamed: 0,Ingredient,Count
1,onion,26
0,garlic,16
6,potato,16
2,carrot,14
5,green chilli,14
20,milk,13
4,ginger,11
8,capsicum,9
14,apple,7
10,cabbage,6


In [37]:
cuisine_df

Unnamed: 0,Cuisine,Count
3,Indian,20
1,Continental,12
0,Asian,9
4,Fusion,2
2,Chinese,1


In [38]:
# Vectorizing user preferences
# Cuisines(5), preferred tags(10), top ingredients(10)
# Mapping them to indices in vector
ingInd = {ing:index for index, ing in enumerate(ingredient_df['Ingredient'], start=0)}
cuiInd = {cuisine:index for index, cuisine in enumerate(cuisine_df['Cuisine'])}
tagInd = {tag:index for index, tag in enumerate(tag_df['Tag'], start=0)}

In [39]:
ingInd

{'onion': 0,
 'garlic': 1,
 'potato': 2,
 'carrot': 3,
 'green chilli': 4,
 'milk': 5,
 'ginger': 6,
 'capsicum': 7,
 'apple': 8,
 'cabbage': 9,
 'mushrooms': 10,
 'beet': 11,
 'flour': 12,
 'eggs': 13,
 'sugar': 14,
 'broccoli': 15,
 'brinjal': 16,
 'bell_pepper': 17,
 'chicken': 18,
 'banana': 19,
 'chocolate': 20,
 'blueberries': 21,
 'eggplant': 22}

In [40]:
tagInd

{'vegetarian': 0,
 'gluten-free': 1,
 'quick': 2,
 'vegan': 3,
 'gourmet': 4,
 'non-vegetarian': 5,
 'processed': 6,
 'spicy': 7,
 'comfort': 8,
 'creative': 9}

In [41]:
userPref = json.loads(open('data/user_preferences.json').read())
userPrefDict = {}

for user in userPref :
    cuisineVec = [0] * len(cuiInd)
    tagsVec = [0] * len(tagInd)
    ingVec = [0] * len(ingInd)

    # Cuisine mapping - order of listing = order of preference
    cuisineScore = 1
    for uc in userPref[user]['preferred_cuisines'] :
        cuisineVec[cuiInd[uc]] = cuisineScore
        cuisineScore += 1
    
    # Ingredients mapping
    ingBias = userPref[user]['ingredient_bias']
    # Sorting ingredients preferred in descending order of bias - 1 highest
    ingBias = sorted(ingBias.items(), key = lambda item : item[1], reverse=True)
    ingBiasScore = {ingPref[0] : score for score, ingPref in enumerate(ingBias, start = 1)}
    for ing, score in ingBiasScore.items() :
        ingVec[ingInd[ing]] = score
    
    # Tag mapping - 1 if preferred else 0
    for ut in userPref[user]['preferred_tags'] :
        tagsVec[tagInd[ut]] = 1

    userVec = cuisineVec + tagsVec + ingVec
    userPref[user] = userVec

with open("data/userVectors.json", 'w') as f :
    json.dump(userPref, f, separators=(',', ':')) # -> Prints all in one line tho



In [42]:
# Creating recipe vectors - static for each
recipes = json.loads(open('data/recipes.json').read())
recipesDict = {}

for recipeID, rec in recipes.items() :

    recCuiVec = [0] * len(cuiInd)
    recTagsVec  = [0] * len(tagInd)
    recIngVec = [0] * len(ingInd)

    # Cuisine mapping
    recCuiVec[cuiInd[rec['Cuisine']]] = 1
    
    # Ingredients mapping 
    ingreds = rec['Non-dairy ingredients'] | rec['dairy ingredients']
    # Ingredients used more in the recipe will have higher score (later relates to user liking one ingredient more)
    ingreds = sorted(ingreds.items(), key = lambda item : item[1], reverse = True)
    for score, ing in enumerate(ingreds, start = 1) :
        ingredient, count = ing
        recIngVec[ingInd[ingredient]] = score
    
    # Tag mapping
    for t in rec['tags'] :
        recTagsVec[tagInd[t]] = 1

    
    recVec = recCuiVec + recTagsVec + ingVec
    recipesDict[recipeID] = recVec

with open("data/recVectors.json", 'w') as f :
    json.dump(recipesDict, f, separators=(',', ':')) # -> Prints all in one line tho


### Finding top K recipes the user has tried and likes
This simulates users having interacted with recipes with a 'like' or 'dislike' button and is a precursor for recommendation

In [43]:
import numpy as np

with open("data/userVectors.json") as f:
    user_vectors = json.load(f)

with open("data/recVectors.json") as f:
    recipe_vectors = json.load(f)

In [44]:
from sklearn.metrics.pairwise import cosine_similarity

user_likes = {}

for user_id, user_vec in user_vectors.items():
    uvec = np.array(user_vec).reshape(1, -1)
    scores = {
        recipe: cosine_similarity(uvec, np.array(rvec).reshape(1, -1))[0][0]
        for recipe, rvec in recipe_vectors.items()
    }
    top_n_recipes = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:5]
    user_likes[user_id] = top_n_recipes


In [45]:
user_likes

{'suparna@123': [('R_2', 0.2823912473624526),
  ('R_3', 0.2541521226262073),
  ('R_9', 0.2541521226262073),
  ('R_11', 0.2541521226262073),
  ('R_7', 0.25038669783359574)],
 'aditi@456': [('R_9', 0.2823912473624526),
  ('R_13', 0.2823912473624526),
  ('R_19', 0.2823912473624526),
  ('R_3qXgYDV8zRo9TUF5aH39vk', 0.2823912473624526),
  ('R_12', 0.258092700604382)],
 'chefMaster': [('R_2', 0.2541521226262073),
  ('R_3', 0.22591299788996205),
  ('R_5', 0.22591299788996205),
  ('R_9', 0.22591299788996205),
  ('R_11', 0.22591299788996205)],
 'gordon@ramsay': [('R_9', 0.457891045625714),
  ('R_12', 0.43592864528932224),
  ('R_21', 0.43592864528932224),
  ('R_28', 0.43592864528932224),
  ('R_3', 0.42927285527410686)],
 'tArLaDaLaL': [('R_2', 0.1796644508187709),
  ('R_1', 0.15399810070180361),
  ('R_4', 0.15399810070180361),
  ('R_14', 0.15399810070180361),
  ('R_33', 0.13032150878567172)],
 'yumCook123': [('R_1', 0.16943474841747155),
  ('R_4', 0.16943474841747153),
  ('R_14', 0.16943474841747

In [46]:
# Generating (userID, recipeID, interaction 0/1)
# Simulating users liking it more (since data similarity is giving 0.2 and all)
interacted = []

for user_id, topLiked in user_likes.items() :
    for rID, recScore in topLiked :
        interacted.append((user_id, rID, round(min(recScore * 3, 1), 2)))

In [47]:
interacted

[('suparna@123', 'R_2', 0.85),
 ('suparna@123', 'R_3', 0.76),
 ('suparna@123', 'R_9', 0.76),
 ('suparna@123', 'R_11', 0.76),
 ('suparna@123', 'R_7', 0.75),
 ('aditi@456', 'R_9', 0.85),
 ('aditi@456', 'R_13', 0.85),
 ('aditi@456', 'R_19', 0.85),
 ('aditi@456', 'R_3qXgYDV8zRo9TUF5aH39vk', 0.85),
 ('aditi@456', 'R_12', 0.77),
 ('chefMaster', 'R_2', 0.76),
 ('chefMaster', 'R_3', 0.68),
 ('chefMaster', 'R_5', 0.68),
 ('chefMaster', 'R_9', 0.68),
 ('chefMaster', 'R_11', 0.68),
 ('gordon@ramsay', 'R_9', 1),
 ('gordon@ramsay', 'R_12', 1),
 ('gordon@ramsay', 'R_21', 1),
 ('gordon@ramsay', 'R_28', 1),
 ('gordon@ramsay', 'R_3', 1),
 ('tArLaDaLaL', 'R_2', 0.54),
 ('tArLaDaLaL', 'R_1', 0.46),
 ('tArLaDaLaL', 'R_4', 0.46),
 ('tArLaDaLaL', 'R_14', 0.46),
 ('tArLaDaLaL', 'R_33', 0.39),
 ('yumCook123', 'R_1', 0.51),
 ('yumCook123', 'R_4', 0.51),
 ('yumCook123', 'R_14', 0.51),
 ('yumCook123', 'R_8', 0.5),
 ('yumCook123', 'R_33', 0.43),
 ('food4lyf', 'R_1', 0.54),
 ('food4lyf', 'R_2', 0.54),
 ('food4lyf'

In [48]:
df = pd.DataFrame(interacted, columns=["user_id", "recipe_id", "rating"])
df.to_csv("data/user_recipe_interactions.csv", index=False)