In [1]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import CountVectorizer, IDF
from pyspark.sql.functions import col, udf
from pyspark.sql import functions as F
from pyspark.sql.types import StringType, ArrayType, DoubleType

from IPython.display import HTML

import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
import re
import json

SEED = 42  # Set your desired seed for reproducibility

# Initialize Spark session
spark = SparkSession.builder.appName("Recipe Recommender System").getOrCreate()

24/10/14 21:42:18 WARN Utils: Your hostname, Maximes-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 192.168.0.97 instead (on interface en0)
24/10/14 21:42:18 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/10/14 21:42:18 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [2]:


def display_spark_dataframe(df, num_rows=100):
    # Convert the Spark DataFrame to Pandas for display purposes
    pandas_df = df.limit(num_rows).toPandas()

    # Generate HTML table
    html = pandas_df.to_html(classes="table table-striped table-bordered", index=False)

    # Display the HTML in the notebook
    display(HTML(html))

In [3]:
# Load dataset
file_path = "../data/processed/users_interactions.parquet"
raw_df = spark.read.parquet(file_path)


# Inspect dataset
raw_df.printSchema()
display_spark_dataframe(raw_df, 3)

root
 |-- user_id: long (nullable = true)
 |-- recipe_id: long (nullable = true)
 |-- title: string (nullable = true)
 |-- ingredients: string (nullable = true)
 |-- link: string (nullable = true)
 |-- ratings: long (nullable = true)
 |-- import_date: double (nullable = true)



user_id,recipe_id,title,ingredients,link,ratings,import_date
1,3236072851089661531,Sauteed Bananas,"[""bananas"", ""confectioners sugar"", ""flour"", ""butter""]",http://www.cookbooks.com/Recipe-Details.aspx?id=913543,3,1727300000.0
1,9145863324980688501,Fruit Pizza,"[""cream cheese"", ""Marshmallow Fluff"", ""blueberries""]",http://www.cookbooks.com/Recipe-Details.aspx?id=1056162,2,1726177000.0
1,6646370876398660190,Raw Gluten,"[""whole wheat flour"", ""water""]",http://www.cookbooks.com/Recipe-Details.aspx?id=1075892,4,1726609000.0


Here the issue is that the ingredients column is a string and not a list of strings. We need to convert it to a list of strings.

In [4]:
# clean the ingredients column as a list of strings
def clean_ingredients(ingredients):
    ingredients = json.loads(ingredients)
    return [re.sub(r"\d+", "", ingredient).strip() for ingredient in ingredients]


clean_ingredients_udf = udf(clean_ingredients, ArrayType(StringType()))
df = raw_df.withColumn("ingredients", clean_ingredients_udf("ingredients"))

In [5]:
# Inspect dataset
df.printSchema()
display_spark_dataframe(df, 5)

root
 |-- user_id: long (nullable = true)
 |-- recipe_id: long (nullable = true)
 |-- title: string (nullable = true)
 |-- ingredients: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- link: string (nullable = true)
 |-- ratings: long (nullable = true)
 |-- import_date: double (nullable = true)



user_id,recipe_id,title,ingredients,link,ratings,import_date
1,3236072851089661531,Sauteed Bananas,"[bananas, confectioners sugar, flour, butter]",http://www.cookbooks.com/Recipe-Details.aspx?id=913543,3,1727300000.0
1,9145863324980688501,Fruit Pizza,"[cream cheese, Marshmallow Fluff, blueberries]",http://www.cookbooks.com/Recipe-Details.aspx?id=1056162,2,1726177000.0
1,6646370876398660190,Raw Gluten,"[whole wheat flour, water]",http://www.cookbooks.com/Recipe-Details.aspx?id=1075892,4,1726609000.0
1,-3751444487082958385,Buckeye Candy,"[powdered sugar, butter, peanut butter, paraffin, chocolate chips]",http://www.cookbooks.com/Recipe-Details.aspx?id=886785,5,1728510000.0
1,4663243117944940535,Beer Bread,"[flour, beer, sugar]",http://www.cookbooks.com/Recipe-Details.aspx?id=26648,1,1726782000.0


In [6]:
# Text Cleaning Function: Lemmatisation, Lowercasing, Removing Special Characters
nltk.download("wordnet")
nltk.download("omw-1.4")
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/maximebonnesoeur/nltk_data...
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/maximebonnesoeur/nltk_data...


## Cleaning and lemmatizing the ingredients

In [7]:
def clean_ingredients(ingredient_list):
    cleaned_set = []
    for ingredient in ingredient_list:
        cleaned_set.append(ingredient)
        # Remove special characters, convert to lowercase, and lemmatize each word
        ingredient = "".join(re.sub(r"[^a-zA-Z\s]", "", ingredient))
        ingredient = ingredient.lower()
        # lemmatized_ingredient = ingredient#[lemmatizer.lemmatize(word) for word in ingredient.split()]
        cleaned_set.append(ingredient)

    return list(cleaned_set)


clean_ingredients_udf = udf(clean_ingredients, ArrayType(StringType()))

# Apply text cleaning to ingredients
df = df.withColumn("cleaned_ingredients", clean_ingredients_udf(col("ingredients")))
df.select("title", "ingredients", "cleaned_ingredients").show(5, truncate=False)

+---------------+------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------+
|title          |ingredients                                                       |cleaned_ingredients                                                                                                                 |
+---------------+------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------+
|Sauteed Bananas|[bananas, confectioners sugar, flour, butter]                     |[bananas, bananas, confectioners sugar, confectioners sugar, flour, flour, butter, butter]                                          |
|Fruit Pizza    |[cream cheese, Marshmallow Fluff, blueberries]                    |[cream cheese, cream cheese, Marshmallow Flu

## TF IDF vectorization

Here we have little information about the recipe in itself. So we will use the similarity between the ingredients to recommend recipes.

In [8]:
SIMILARITY_THRESHOLD = 0.5
# Recommendations for Users
RECOMMENDATION_LIMIT = 5

In [9]:
# Feature Extraction for Content-Based Filtering
# Use CountVectorizer to convert cleaned ingredients into feature vectors
# AKA, we will count the number of times each ingredient appears in the dataset
# Example: ["apple", "banana", "apple"] -> ({"apple": 2, "banana": 1})
cv = CountVectorizer(inputCol="cleaned_ingredients", outputCol="features")
cv_model = cv.fit(df)
vectorized_df = cv_model.transform(df)

However, we need to normalize the feature vectors to ensure that each ingredient has equal weight
Use IDF to normalize the feature vectors from CountVectorizer and "remove" common ingredients common to all recipes

In [10]:
# Compute TF-IDF for better weighting
idf = IDF(inputCol="features", outputCol="tfidf_features")
idf_model = idf.fit(vectorized_df)
tf_idf_df = idf_model.transform(vectorized_df)

                                                                                

In [11]:
# Generate Recommendations using Cosine Similarity
# Convert TF-IDF vectors to DenseVectors for similarity computation
vector_to_array_udf = udf(lambda v: v.toArray().tolist(), ArrayType(DoubleType()))
tf_idf_df = tf_idf_df.withColumn(
    "tfidf_array", vector_to_array_udf(col("tfidf_features"))
)


# Define a function to compute cosine similarity between two vectors
def cosine_similarity(v1, v2):
    v1 = np.array(v1)
    v2 = np.array(v2)
    dot_product = np.dot(v1, v2)
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    if norm_v1 == 0 or norm_v2 == 0:
        return 0.0
    return float(dot_product / (norm_v1 * norm_v2))


cosine_similarity_udf = udf(cosine_similarity, DoubleType())

# Self-join the DataFrame to calculate similarity between all recipes
cross_joined_df = tf_idf_df.alias("df1").crossJoin(
    tf_idf_df.select(["recipe_id", "tfidf_array", "title", "user_id"]).alias("df2")
)

# Calculate cosine similarity for each pair of recipes
cross_joined_df = cross_joined_df.withColumn(
    "similarity", cosine_similarity_udf(col("df1.tfidf_array"), col("df2.tfidf_array"))
)
cross_joined_df.printSchema()


# Select the necessary columns and alias them to avoid ambiguity
recommendations_df = cross_joined_df.filter(
    (col("df1.recipe_id") != col("df2.recipe_id"))
    & (  # Never recommend the same recipe
        col("df1.user_id") != col("df2.user_id")
    )  # Never recommend the a recipe from the same user
)

# Explicitly select and alias the columns
recommendations_df = (
    recommendations_df.select(
        col("df1.recipe_id").alias("recipe_id"),
        col("df1.user_id").alias("user_id"),
        col("df2.user_id").alias("recommended_user_id"),
        col("df1.title").alias("recipe_title"),
        col("df2.recipe_id").alias("recommended_recipe_id"),
        col("df2.title").alias("recommended_recipe_title"),
        col("similarity"),
        # Add any other columns you may need
    )
    .filter(col("similarity") > SIMILARITY_THRESHOLD)
    .orderBy(col("recipe_id"), col("similarity").desc())
)

display_spark_dataframe(recommendations_df, 5)

root
 |-- user_id: long (nullable = true)
 |-- recipe_id: long (nullable = true)
 |-- title: string (nullable = true)
 |-- ingredients: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- link: string (nullable = true)
 |-- ratings: long (nullable = true)
 |-- import_date: double (nullable = true)
 |-- cleaned_ingredients: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- features: vector (nullable = true)
 |-- tfidf_features: vector (nullable = true)
 |-- tfidf_array: array (nullable = true)
 |    |-- element: double (containsNull = true)
 |-- recipe_id: long (nullable = true)
 |-- tfidf_array: array (nullable = true)
 |    |-- element: double (containsNull = true)
 |-- title: string (nullable = true)
 |-- user_id: long (nullable = true)
 |-- similarity: double (nullable = true)



                                                                                

recipe_id,user_id,recommended_user_id,recipe_title,recommended_recipe_id,recommended_recipe_title,similarity
-8962283335644396817,1,3,Microwave Fudge,4543509988065438196,Easy Fudge,0.56471
-8962283335644396817,1,2,Microwave Fudge,6393498659283924984,One Bowl Chocolate Fudge(Microwave),0.518839
-8486918150038815122,13,3,Persimmon Cookies,3366988585576868145,Persimmon Cake,0.626147
-8005079721966657344,3,51,Lemon Pudding Pound Cake,-1875286469985801954,Bonnie'S Lemon Jello Cake,0.833649
-8005079721966657344,3,53,Lemon Pudding Pound Cake,3423791347401780918,Cream Cheese Squares,0.567421


In [12]:
# Get top recommendations by collecting the recommended_recipe_id into a list for each recipe_id
top_recommendations = recommendations_df.groupby(["recipe_id", "recipe_title"]).agg(
    F.collect_list("recommended_recipe_id").alias("recommended_recipes"),
    F.collect_list("recommended_recipe_title").alias("recommended_recipe_titles"),
    F.collect_list("similarity").alias("similarity_scores"),
)
# Show Top Recommendations
display_spark_dataframe(top_recommendations, 5)

                                                                                

recipe_id,recipe_title,recommended_recipes,recommended_recipe_titles,similarity_scores
5583050311044080495,Chocolate Icing,"[-556339092044172322, 7869757872690146822, -847568789911499598]","[Golf Balls, Chocolate Oatmeal Cookies, No Bake Cookies]","[0.6811784473412361, 0.5826389327799872, 0.5560344839915443]"
-629699203677729697,Rice-Broccoli Casserole,"[6082401801560777728, 6082401801560777728, 6082401801560777728, -7387520298671766024, -7387520298671766024, -7387520298671766024]","[Broccoli Rice Casserole, Broccoli Rice Casserole, Broccoli Rice Casserole, Broccoli Casserole, Broccoli Casserole, Broccoli Casserole]","[0.7505796541242438, 0.7505796541242438, 0.7505796541242438, 0.5262668534924548, 0.5262668534924548, 0.5262668534924548]"
-535900969057078244,Peach Cobbler,[2323039842900641889],"[""Peachy"" Cake]",[0.520846300239213]
-2681701082806432578,Strawberry Salad,[-3014238091247932718],[Phylis' Pineapple-Banana Salad],[0.5208854388953843]
-556339092044172322,Golf Balls,"[5583050311044080495, 7869757872690146822, -847568789911499598, -3751444487082958385, 5921757842749730267]","[Chocolate Icing, Chocolate Oatmeal Cookies, No Bake Cookies, Buckeye Candy, Annie'S Diabetic Candy]","[0.6811784473412361, 0.5971053116517856, 0.569840299323566, 0.5113858574913516, 0.5041645727694347]"


So far, for such a basic model, we have been able to generate recommendations based on the similarity of the ingredients of the recipes. Based on our limited set, the recipes are indeed similar to the first ones based on the ingredients.

## Content-based Filtering

The approach here is to recommend a recipe to a user based on the similarity of the ingredients present in other recipes.

Let's have a look

In [13]:
vectorized_df.columns

['user_id',
 'recipe_id',
 'title',
 'ingredients',
 'link',
 'ratings',
 'import_date',
 'cleaned_ingredients',
 'features']

In [14]:
subset = (
    recommendations_df.alias("df1")
    .join(
        vectorized_df.alias("df2"),
        recommendations_df.recommended_recipe_id == vectorized_df.recipe_id,
    )
    .select(
        col("df1.user_id"),
        col("df2.recipe_id"),
        col("df2.title"),
        col("df2.link"),
        col("df2.ingredients"),
        col("df1.similarity"),
    )
)


recipe_content_based_recommendations = (
    subset.orderBy(["user_id", "similarity"])
    .groupby(["user_id"])
    .agg(
        F.slice(
            F.collect_list(
                F.struct(
                    F.col("recipe_id"),
                    F.col("title"),
                    F.col("link"),
                    F.col("ingredients"),
                )
            ),
            1,
            RECOMMENDATION_LIMIT,
        ).alias("recommended_recipes"),
        F.slice(F.collect_list("similarity"), 1, RECOMMENDATION_LIMIT).alias(
            "similarity_scores"
        ),
    )
)
display_spark_dataframe(recipe_content_based_recommendations, 5)

                                                                                

user_id,recommended_recipes,similarity_scores
29,"[(7869757872690146822, Chocolate Oatmeal Cookies, http://www.cookbooks.com/Recipe-Details.aspx?id=619294, [margarine, milk, cocoa, sugar, oats, peanut butter, vanilla, nuts]), (5583050311044080495, Chocolate Icing, http://www.cookbooks.com/Recipe-Details.aspx?id=377636, [sugar, milk, cocoa, butter, vanilla]), (-556339092044172322, Golf Balls, http://www.cookbooks.com/Recipe-Details.aspx?id=627006, [cocoa, butter, powdered sugar, milk, vanilla, peanut butter])]","[0.5317740317025577, 0.5560344839915443, 0.569840299323566]"
7,"[(-1145622998892862328, Corn Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=1023122, [margarine, red, onion, corn, corn, eggs, corn muffin, Cheddar cheese])]",[0.7658884124740973]
34,"[(-4444908957552929730, Heavenly Hash, http://www.cookbooks.com/Recipe-Details.aspx?id=269997, [whipping cream, marshmallows, vanilla, sugar, pineapple])]",[0.6140633678782105]
32,"[(-3014238091247932718, Phylis' Pineapple-Banana Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=682439, [pineapple, bananas])]",[0.5208854388953843]
43,"[(92133411895449221, Almond Roco, http://www.cookbooks.com/Recipe-Details.aspx?id=588117, [butter, sugar, water, almonds, chocolate, almonds])]",[0.5783165794872545]


With this first approach, each user gets some recommendations based on the recipe similar to theirs.

However, this approach breeds little diversity in the recommendations. Let's thus observe another approach to recommend recipes to users.

## Collaborative Filtering

In this approach, we will get for each users the users that have similar taste to them, meaning users that have used similar ingredients in their recipe.

Then, we will sample randomly a recipe from the users that have similar taste to the user and that the user has not already tried.

In [15]:
# Recommendations for Users
RECOMMENDATION_LIMIT = 5
SIMILARITY_THRESHOLD_USERS = 0.7  # We want really similar users


# Get top recommendations by collecting the recommended_recipe_id into a list for each recipe_id
top_user_similarity_recommendations = (
    recommendations_df.orderBy(["user_id"])
    .groupby(["user_id", "recommended_user_id"])
    .agg(
        F.avg("similarity").alias("average_similarity"),
    )
    .orderBy(["user_id", "average_similarity"], ascending=[True, False])
    .withColumnRenamed("user_id", "user_id")
)

top_user_similarity_recommendations = (
    top_user_similarity_recommendations.filter(
        col("average_similarity") > SIMILARITY_THRESHOLD_USERS
    )
    .groupBy("user_id")
    .agg(
        F.slice(F.collect_list("recommended_user_id"), 1, RECOMMENDATION_LIMIT).alias(
            "recommended_users"
        ),
        F.slice(F.collect_list("average_similarity"), 1, RECOMMENDATION_LIMIT).alias(
            "average_similarity_scores"
        ),
    )
)

display_spark_dataframe(top_user_similarity_recommendations, 5)

                                                                                

user_id,recommended_users,average_similarity_scores
2,"[17, 86, 95, 46]","[0.7505796541242438, 0.7505796541242438, 0.7505796541242438, 0.7011472408034947]"
3,"[51, 90]","[0.833649287232778, 0.7519934280218968]"
4,[7],[0.7658884124740973]
7,[4],[0.7658884124740973]
17,[2],[0.7505796541242438]


In [16]:
subset.show()

[Stage 35:>                                                         (0 + 1) / 1]

+-------+--------------------+--------------------+--------------------+--------------------+------------------+
|user_id|           recipe_id|               title|                link|         ingredients|        similarity|
+-------+--------------------+--------------------+--------------------+--------------------+------------------+
|      1|-3014238091247932718|Phylis' Pineapple...|http://www.cookbo...|[pineapple, bananas]|0.5579071672606242|
|      1| -556339092044172322|          Golf Balls|http://www.cookbo...|[cocoa, butter, p...|0.5113858574913516|
|      1| 6393498659283924984|One Bowl Chocolat...|http://www.cookbo...|[semi-sweet choco...|0.5188390299314342|
|      1| 4543509988065438196|          Easy Fudge|http://www.cookbo...|[condensed milk, ...|0.5647104191283978|
|      1| 5921757842749730267|Annie'S Diabetic ...|http://www.cookbo...|[vanilla, cream c...|0.5285055578590108|
|      2|-8962283335644396817|     Microwave Fudge|http://www.cookbo...|[semi-sweet choco...|0.5

                                                                                

In [17]:
# Get top recommendations by collecting the recommended_recipe_id into a list for each recipe_id

subset = (
    top_user_similarity_recommendations.alias("df1")
    .join(
        vectorized_df.alias("df2"),
        F.array_contains(
            top_user_similarity_recommendations.recommended_users, vectorized_df.user_id
        ),
    )
    .select(
        "df1.user_id",
        "df1.recommended_users",
        "df2.recipe_id",
        "df2.title",
        "df2.link",
        "df2.ingredients",
    )
)


# Random recipe sampling for each reommended user
recipe_collaborative_recommendations = subset.groupBy("user_id").agg(
    F.slice(
        F.shuffle(
            F.collect_list(
                F.struct(
                    F.col("recipe_id"),
                    F.col("title"),
                    F.col("link"),
                    F.col("ingredients"),
                )
            )
        ),
        1,
        RECOMMENDATION_LIMIT,
    ).alias("recommended_recipes"),
)

display_spark_dataframe(recipe_collaborative_recommendations, 5)

                                                                                

user_id,recommended_recipes
2,"[(-3014238091247932718, Phylis' Pineapple-Banana Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=682439, [pineapple, bananas]), (-629699203677729697, Rice-Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=184755, [onion, celery, margarine, rice, broccoli, frozen broccoli florets, cream of chicken soup, cream of mushroom soup]), (-3899424102060393968, Salsa, http://www.cookbooks.com/Recipe-Details.aspx?id=444964, [tomatoes, green chilies, green onions, celery, yellow onions, cilantro, tomato sauce, cumin, garlic, salt]), (884358801733425943, Sweet Potato Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=699621, [cooked sweet potatoes, sugar, salt, eggs, margarine, milk, vanilla]), (-629699203677729697, Rice-Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=184755, [onion, celery, margarine, rice, broccoli, frozen broccoli florets, cream of chicken soup, cream of mushroom soup])]"
3,"[(-1794192433332893734, Coconut Chews, http://www.cookbooks.com/Recipe-Details.aspx?id=1002375, [shortening, confectioners sugar, Gold Medal flour, eggs, brown sugar, flour, baking powder, salt, vanilla, walnuts, flaked coconut, Orange-Lemon Icing]), (-3541657427999271516, Pink Stuff(Frozen Dessert) , http://www.cookbooks.com/Recipe-Details.aspx?id=982483, [pie filling, pineapple, condensed milk, lemon juice]), (-1875286469985801954, Bonnie'S Lemon Jello Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=744855, [yellow cake mix, lemon jello, water, eggs, oil]), (5213548222747985524, Chicken Over Rice, http://www.cookbooks.com/Recipe-Details.aspx?id=229737, [onion, bell peppers, celery, chicken broth, potatoes, soy sauce]), (-1512451306646192815, Cherry Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=492199, [cherry pie filling, pineapple, pecans, marshmallows, milk])]"
4,"[(-3995740312499116500, Corn Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=899479, [corn, corn, eggs, oleo, sour cream, corn muffin])]"
7,"[(-9025251382900008244, Marinated Cucumber And Sweet Onion Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=294971, [sweet onions, cucumbers, salt, dark vinegar, water, sugar, pepper]), (-7815179659791359312, Prize-Winning Meat Loaf, http://www.cookbooks.com/Recipe-Details.aspx?id=923674, [ground beef, tomato juice, oats, egg, onion, pepper, salt]), (-7976200725112237420, Cold Bread Pudding, http://www.cookbooks.com/Recipe-Details.aspx?id=91723, [cold, sweet milk, butter, eggs, sugar]), (-703446942390088871, Rhubarb Crumble, http://www.cookbooks.com/Recipe-Details.aspx?id=458858, [brown sugar, rolled oats, flour, butter, cinnamon, rhubarb, sugar, cornstarch, water, vanilla]), (5654531468223763620, Warm Apple Crisp, http://www.cookbooks.com/Recipe-Details.aspx?id=319988, [peeled apples, sugar, cinnamon, salt, butter, sugar, flour, butter, topping])]"
17,"[(-3496996154711083177, Potato And Cheese Pie, http://www.cookbooks.com/Recipe-Details.aspx?id=784386, [eggs, salt, pepper, potatoes, Cheddar cheese, green onions]), (6393498659283924984, One Bowl Chocolate Fudge(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=502592, [semi-sweet chocolate, condensed milk, vanilla, salt, nuts]), (-5250039407390896068, Quick Coffee Cake(6 Servings) , http://www.cookbooks.com/Recipe-Details.aspx?id=532166, [butter, sugar, egg, flour, baking powder, salt, milk, vanilla, brown sugar, cinnamon, butter]), (6070472271589565164, Corn ""Oysters"", http://www.cookbooks.com/Recipe-Details.aspx?id=1059690, [cream-style corn, eggs, onion, crackers, salt]), (6233456789766403456, Cheese Ball, http://www.cookbooks.com/Recipe-Details.aspx?id=515966, [cream cheese, sharp cheese, mayonnaise, Worcestershire sauce, garlic salt, celery salt, onion salt, pimento, olives, parsley flakes, beef])]"


## Merging the two recommendations

In [18]:
final_recommendations = (
    recipe_content_based_recommendations.alias("df1")
    .join(
        recipe_collaborative_recommendations.alias("df2"),
        recipe_content_based_recommendations.user_id
        == recipe_collaborative_recommendations.user_id,
    )
    .select(
        "df1.user_id",
        col("df1.recommended_recipes").alias("content_based_recipes"),
        col("df1.similarity_scores").alias("content_based_similarity_scores"),
        col("df2.recommended_recipes").alias("collaborative_recipes"),
    )
)

In [19]:
final_recommendations.show()

[Stage 48:>                                                         (0 + 1) / 1]

+-------+---------------------+-------------------------------+---------------------+
|user_id|content_based_recipes|content_based_similarity_scores|collaborative_recipes|
+-------+---------------------+-------------------------------+---------------------+
|      2| [{-34436589996190...|           [0.50700628912870...| [{-30142380912479...|
|      3| [{-52500394073908...|           [0.50700628912870...| [{-17941924333328...|
|      4| [{-39957403124991...|           [0.7658884124740973]| [{-39957403124991...|
|      7| [{-11456229988928...|           [0.7658884124740973]| [{-90252513829000...|
|     17| [{-26817010828064...|           [0.52088543889538...| [{-34969961547110...|
|     39| [{-61916414789517...|           [0.64256565682217...| [{521354822274798...|
|     46| [{-62969920367772...|           [0.52626685349245...| [{-41892034656694...|
|     51| [{-80050797219666...|            [0.833649287232778]| [{-80050797219666...|
|     86| [{-73875202986717...|           [0.526266853

                                                                                

In [20]:
# Example usage
display_spark_dataframe(final_recommendations)

                                                                                

user_id,content_based_recipes,content_based_similarity_scores,collaborative_recipes
2,"[(-3443658999619026085, Texas Prize Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=776471, [butter, sugar, eggs, flour, vanilla]), (-8962283335644396817, Microwave Fudge, http://www.cookbooks.com/Recipe-Details.aspx?id=199072, [semi-sweet chocolate chips, sweet chocolate chips, condensed milk, vanilla, nuts]), (4543509988065438196, Easy Fudge, http://www.cookbooks.com/Recipe-Details.aspx?id=812407, [condensed milk, semi-sweet chocolate chips, unsweetened chocolate, nuts, vanilla]), (-3089260678803287909, Sky High Biscuits, http://www.cookbooks.com/Recipe-Details.aspx?id=805008, [flour, baking powder, sugar, salt, cream of tartar, butter, egg, milk]), (-7387520298671766024, Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=693123, [rice, onion, broccoli, cream of celery soup, margarine])]","[0.507006289128707, 0.5188390299314342, 0.5188390299314342, 0.5712053416559774, 0.7011472408034947]","[(-3014238091247932718, Phylis' Pineapple-Banana Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=682439, [pineapple, bananas]), (-629699203677729697, Rice-Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=184755, [onion, celery, margarine, rice, broccoli, frozen broccoli florets, cream of chicken soup, cream of mushroom soup]), (-3899424102060393968, Salsa, http://www.cookbooks.com/Recipe-Details.aspx?id=444964, [tomatoes, green chilies, green onions, celery, yellow onions, cilantro, tomato sauce, cumin, garlic, salt]), (884358801733425943, Sweet Potato Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=699621, [cooked sweet potatoes, sugar, salt, eggs, margarine, milk, vanilla]), (-629699203677729697, Rice-Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=184755, [onion, celery, margarine, rice, broccoli, frozen broccoli florets, cream of chicken soup, cream of mushroom soup])]"
3,"[(-5250039407390896068, Quick Coffee Cake(6 Servings) , http://www.cookbooks.com/Recipe-Details.aspx?id=532166, [butter, sugar, egg, flour, baking powder, salt, milk, vanilla, brown sugar, cinnamon, butter]), (6393498659283924984, One Bowl Chocolate Fudge(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=502592, [semi-sweet chocolate, condensed milk, vanilla, salt, nuts]), (-8962283335644396817, Microwave Fudge, http://www.cookbooks.com/Recipe-Details.aspx?id=199072, [semi-sweet chocolate chips, sweet chocolate chips, condensed milk, vanilla, nuts]), (3423791347401780918, Cream Cheese Squares, http://www.cookbooks.com/Recipe-Details.aspx?id=949240, [yellow cake mix, egg, margarine, sugar, eggs, cream cheese]), (-6242579153018008641, Patio Potatoes, http://www.cookbooks.com/Recipe-Details.aspx?id=570410, [hash browns, margarine, salt, pepper, onion, cream of mushroom soup, sour cream, grated sharp])]","[0.507006289128707, 0.5188390299314342, 0.5647104191283978, 0.5674208459998452, 0.5903246928120044]","[(-1794192433332893734, Coconut Chews, http://www.cookbooks.com/Recipe-Details.aspx?id=1002375, [shortening, confectioners sugar, Gold Medal flour, eggs, brown sugar, flour, baking powder, salt, vanilla, walnuts, flaked coconut, Orange-Lemon Icing]), (-3541657427999271516, Pink Stuff(Frozen Dessert) , http://www.cookbooks.com/Recipe-Details.aspx?id=982483, [pie filling, pineapple, condensed milk, lemon juice]), (-1875286469985801954, Bonnie'S Lemon Jello Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=744855, [yellow cake mix, lemon jello, water, eggs, oil]), (5213548222747985524, Chicken Over Rice, http://www.cookbooks.com/Recipe-Details.aspx?id=229737, [onion, bell peppers, celery, chicken broth, potatoes, soy sauce]), (-1512451306646192815, Cherry Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=492199, [cherry pie filling, pineapple, pecans, marshmallows, milk])]"
4,"[(-3995740312499116500, Corn Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=899479, [corn, corn, eggs, oleo, sour cream, corn muffin])]",[0.7658884124740973],"[(-3995740312499116500, Corn Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=899479, [corn, corn, eggs, oleo, sour cream, corn muffin])]"
7,"[(-1145622998892862328, Corn Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=1023122, [margarine, red, onion, corn, corn, eggs, corn muffin, Cheddar cheese])]",[0.7658884124740973],"[(-9025251382900008244, Marinated Cucumber And Sweet Onion Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=294971, [sweet onions, cucumbers, salt, dark vinegar, water, sugar, pepper]), (-7815179659791359312, Prize-Winning Meat Loaf, http://www.cookbooks.com/Recipe-Details.aspx?id=923674, [ground beef, tomato juice, oats, egg, onion, pepper, salt]), (-7976200725112237420, Cold Bread Pudding, http://www.cookbooks.com/Recipe-Details.aspx?id=91723, [cold, sweet milk, butter, eggs, sugar]), (-703446942390088871, Rhubarb Crumble, http://www.cookbooks.com/Recipe-Details.aspx?id=458858, [brown sugar, rolled oats, flour, butter, cinnamon, rhubarb, sugar, cornstarch, water, vanilla]), (5654531468223763620, Warm Apple Crisp, http://www.cookbooks.com/Recipe-Details.aspx?id=319988, [peeled apples, sugar, cinnamon, salt, butter, sugar, flour, butter, topping])]"
17,"[(-2681701082806432578, Strawberry Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=188949, [bananas, pineapple, frozen strawberries, strawberry Jell-O, boiling water, sour cream]), (-7387520298671766024, Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=693123, [rice, onion, broccoli, cream of celery soup, margarine]), (3236072851089661531, Sauteed Bananas, http://www.cookbooks.com/Recipe-Details.aspx?id=913543, [bananas, confectioners sugar, flour, butter]), (6082401801560777728, Broccoli Rice Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=757188, [broccoli, cream of mushroom soup, rice, margarine, onion])]","[0.5208854388953843, 0.5262668534924548, 0.5579071672606242, 0.7505796541242438]","[(-3496996154711083177, Potato And Cheese Pie, http://www.cookbooks.com/Recipe-Details.aspx?id=784386, [eggs, salt, pepper, potatoes, Cheddar cheese, green onions]), (6393498659283924984, One Bowl Chocolate Fudge(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=502592, [semi-sweet chocolate, condensed milk, vanilla, salt, nuts]), (-5250039407390896068, Quick Coffee Cake(6 Servings) , http://www.cookbooks.com/Recipe-Details.aspx?id=532166, [butter, sugar, egg, flour, baking powder, salt, milk, vanilla, brown sugar, cinnamon, butter]), (6070472271589565164, Corn ""Oysters"", http://www.cookbooks.com/Recipe-Details.aspx?id=1059690, [cream-style corn, eggs, onion, crackers, salt]), (6233456789766403456, Cheese Ball, http://www.cookbooks.com/Recipe-Details.aspx?id=515966, [cream cheese, sharp cheese, mayonnaise, Worcestershire sauce, garlic salt, celery salt, onion salt, pimento, olives, parsley flakes, beef])]"
39,"[(-6191641478951783089, Yum Yum Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=223566, [cherry pie filling, pineapple, Borden milk, pecans]), (-1512451306646192815, Cherry Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=492199, [cherry pie filling, pineapple, pecans, marshmallows, milk])]","[0.6425656568221725, 0.7519934280218968]","[(5213548222747985524, Chicken Over Rice, http://www.cookbooks.com/Recipe-Details.aspx?id=229737, [onion, bell peppers, celery, chicken broth, potatoes, soy sauce]), (-3541657427999271516, Pink Stuff(Frozen Dessert) , http://www.cookbooks.com/Recipe-Details.aspx?id=982483, [pie filling, pineapple, condensed milk, lemon juice]), (-1512451306646192815, Cherry Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=492199, [cherry pie filling, pineapple, pecans, marshmallows, milk])]"
46,"[(-629699203677729697, Rice-Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=184755, [onion, celery, margarine, rice, broccoli, frozen broccoli florets, cream of chicken soup, cream of mushroom soup]), (-629699203677729697, Rice-Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=184755, [onion, celery, margarine, rice, broccoli, frozen broccoli florets, cream of chicken soup, cream of mushroom soup]), (-629699203677729697, Rice-Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=184755, [onion, celery, margarine, rice, broccoli, frozen broccoli florets, cream of chicken soup, cream of mushroom soup]), (-629699203677729697, Rice-Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=184755, [onion, celery, margarine, rice, broccoli, frozen broccoli florets, cream of chicken soup, cream of mushroom soup]), (-629699203677729697, Rice-Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=184755, [onion, celery, margarine, rice, broccoli, frozen broccoli florets, cream of chicken soup, cream of mushroom soup])]","[0.5262668534924548, 0.5262668534924548, 0.5262668534924548, 0.5262668534924548, 0.5262668534924548]","[(-4189203465669486065, Mock Lobster Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=564329, [crackers, celery, green pepper, eggs, onion, tomato juice, mayonnaise]), (-4765217018562870239, Do-Unkles Pumpkin Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=458051, [sugar, flour, pumpkin, salt, cinnamon, nutmeg, soda, oil, eggs, baking powder, walnuts]), (-5250039407390896068, Quick Coffee Cake(6 Servings) , http://www.cookbooks.com/Recipe-Details.aspx?id=532166, [butter, sugar, egg, flour, baking powder, salt, milk, vanilla, brown sugar, cinnamon, butter]), (6233456789766403456, Cheese Ball, http://www.cookbooks.com/Recipe-Details.aspx?id=515966, [cream cheese, sharp cheese, mayonnaise, Worcestershire sauce, garlic salt, celery salt, onion salt, pimento, olives, parsley flakes, beef]), (6070472271589565164, Corn ""Oysters"", http://www.cookbooks.com/Recipe-Details.aspx?id=1059690, [cream-style corn, eggs, onion, crackers, salt])]"
51,"[(-8005079721966657344, Lemon Pudding Pound Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=1068808, [eggs, yellow cake mix, water, oil])]",[0.833649287232778],"[(-8005079721966657344, Lemon Pudding Pound Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=1068808, [eggs, yellow cake mix, water, oil]), (-4775666681479532728, Baked Pork Tenderloin, http://www.cookbooks.com/Recipe-Details.aspx?id=512635, [pork tenderloin, salt, pepper, bacon, apple juice, cinnamon, cloves]), (-8783939955479050182, Chocolate Macaroon Bars, http://www.cookbooks.com/Recipe-Details.aspx?id=693256, [graham cracker crumbs, sugar, cocoa, margarine, condensed milk, coconut flakes, bread crumbs, eggs, vanilla extract, chocolate chips]), (-5520262844141481008, Cherry Congealed Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=673923, [cherry pie filling, pineapple, milk]), (-6345232053553097226, Sweet And Sour Red Cabbage, http://www.cookbooks.com/Recipe-Details.aspx?id=806317, [red cabbage, salt, salt, pepper, sugar, cider vinegar, caraway, onion])]"
86,"[(-7387520298671766024, Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=693123, [rice, onion, broccoli, cream of celery soup, margarine]), (6082401801560777728, Broccoli Rice Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=757188, [broccoli, cream of mushroom soup, rice, margarine, onion])]","[0.5262668534924548, 0.7505796541242438]","[(-6864753049434638110, Red Beet Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=412481, [sugar, eggs, Crisco oil, chocolate, flour, soda, salt, vanilla]), (6393498659283924984, One Bowl Chocolate Fudge(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=502592, [semi-sweet chocolate, condensed milk, vanilla, salt, nuts]), (5912469327965089621, Zucchini Squares, http://www.cookbooks.com/Recipe-Details.aspx?id=239356, [zucchini, onion, clove garlic, Parmesan, oil, parsley, salt, salt, oregano, eggs]), (6070472271589565164, Corn ""Oysters"", http://www.cookbooks.com/Recipe-Details.aspx?id=1059690, [cream-style corn, eggs, onion, crackers, salt]), (8676006650672359005, Creole Flounder, http://www.cookbooks.com/Recipe-Details.aspx?id=580768, [flounder, tomatoes, green pepper, lemon juice, salad oil, salt, onion, basil, ground black pepper, drops red pepper sauce, green pepper])]"
88,"[(-5520262844141481008, Cherry Congealed Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=673923, [cherry pie filling, pineapple, milk]), (-5520262844141481008, Cherry Congealed Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=673923, [cherry pie filling, pineapple, milk]), (-5520262844141481008, Cherry Congealed Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=673923, [cherry pie filling, pineapple, milk]), (-5520262844141481008, Cherry Congealed Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=673923, [cherry pie filling, pineapple, milk]), (-1512451306646192815, Cherry Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=492199, [cherry pie filling, pineapple, pecans, marshmallows, milk])]","[0.6425656568221725, 0.6425656568221725, 0.6425656568221725, 0.6425656568221725, 0.7081922298630527]","[(-1512451306646192815, Cherry Salad, http://www.cookbooks.com/Recipe-Details.aspx?id=492199, [cherry pie filling, pineapple, pecans, marshmallows, milk]), (5213548222747985524, Chicken Over Rice, http://www.cookbooks.com/Recipe-Details.aspx?id=229737, [onion, bell peppers, celery, chicken broth, potatoes, soy sauce]), (-3541657427999271516, Pink Stuff(Frozen Dessert) , http://www.cookbooks.com/Recipe-Details.aspx?id=982483, [pie filling, pineapple, condensed milk, lemon juice])]"
