In [12]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import CountVectorizer, IDF
from pyspark.sql.functions import col, udf
from pyspark.sql import functions as F
from pyspark.sql.types import StringType, ArrayType, DoubleType


import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
import re
import json

SEED = 42  # Set your desired seed for reproducibility

# Initialize Spark session
spark = SparkSession.builder.appName("Recipe Recommender System").getOrCreate()

In [13]:
from IPython.display import HTML


def display_spark_dataframe(df, num_rows=100):
    # Convert the Spark DataFrame to Pandas for display purposes
    pandas_df = df.limit(num_rows).toPandas()

    # Generate HTML table
    html = pandas_df.to_html(classes="table table-striped table-bordered", index=False)

    # Display the HTML in the notebook
    display(HTML(html))

In [14]:
# Load dataset
file_path = "../data/processed/users_interactions.parquet"
raw_df = spark.read.parquet(file_path)


# Inspect dataset
raw_df.printSchema()
display_spark_dataframe(raw_df, 3)

root
 |-- user_id: long (nullable = true)
 |-- recipe_id: long (nullable = true)
 |-- title: string (nullable = true)
 |-- ingredients: string (nullable = true)
 |-- link: string (nullable = true)
 |-- import_date: double (nullable = true)



user_id,recipe_id,title,ingredients,link,import_date
1,-5836364512049762718,Log Cabin Toast,"[""sugar"", ""cinnamon"", ""white"", ""margarine""]",http://www.cookbooks.com/Recipe-Details.aspx?id=595265,1720726000.0
1,5825856628004213219,Vegetable Burger Soup,"[""ground beef"", ""tomatoes"", ""tomato sauce"", ""frozen mixed vegetables"", ""onion soup"", ""sugar""]",http://www.cookbooks.com/Recipe-Details.aspx?id=302429,1724700000.0
1,6164519699133745296,Magic Cookie Bars,"[""butter"", ""graham cracker crumbs"", ""milk"", ""semi-sweet chocolate chips"", ""flaked coconut"", ""nuts""]",http://www.cookbooks.com/Recipe-Details.aspx?id=343171,1726601000.0


Here the issue is that the ingredients column is a string and not a list of strings. We need to convert it to a list of strings.

In [15]:
# clean the ingredients column as a list of strings
def clean_ingredients(ingredients):
    ingredients = json.loads(ingredients)
    return [re.sub(r"\d+", "", ingredient).strip() for ingredient in ingredients]


clean_ingredients_udf = udf(clean_ingredients, ArrayType(StringType()))
df = raw_df.withColumn("ingredients", clean_ingredients_udf("ingredients"))

In [16]:
# Inspect dataset
df.printSchema()
display_spark_dataframe(df, 5)

root
 |-- user_id: long (nullable = true)
 |-- recipe_id: long (nullable = true)
 |-- title: string (nullable = true)
 |-- ingredients: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- link: string (nullable = true)
 |-- import_date: double (nullable = true)



user_id,recipe_id,title,ingredients,link,import_date
1,-5836364512049762718,Log Cabin Toast,"[sugar, cinnamon, white, margarine]",http://www.cookbooks.com/Recipe-Details.aspx?id=595265,1720726000.0
1,5825856628004213219,Vegetable Burger Soup,"[ground beef, tomatoes, tomato sauce, frozen mixed vegetables, onion soup, sugar]",http://www.cookbooks.com/Recipe-Details.aspx?id=302429,1724700000.0
1,6164519699133745296,Magic Cookie Bars,"[butter, graham cracker crumbs, milk, semi-sweet chocolate chips, flaked coconut, nuts]",http://www.cookbooks.com/Recipe-Details.aspx?id=343171,1726601000.0
1,-8815957590080865143,Chicken Divan,"[margarine, onion, celery, flour, curry powder, mushrooms, cream of celery soup, broccoli, chicken, cheese]",http://www.cookbooks.com/Recipe-Details.aspx?id=425137,1725824000.0
1,3296045182844799490,Baked Cabbage,"[ground chuck, onion, rice, salt, cabbage, pepper, tomato soup, water, Mozzarella cheese]",http://www.cookbooks.com/Recipe-Details.aspx?id=761049,1722800000.0


In [17]:
# Text Cleaning Function: Lemmatisation, Lowercasing, Removing Special Characters
nltk.download("wordnet")
nltk.download("omw-1.4")
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/maximebonnesoeur/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/maximebonnesoeur/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


## Cleaning and lemmatizing the ingredients

In [18]:
def clean_ingredients(ingredient_list):
    cleaned_set = []
    for ingredient in ingredient_list:
        cleaned_set.append(ingredient)
        # Remove special characters, convert to lowercase, and lemmatize each word
        ingredient = "".join(re.sub(r"[^a-zA-Z\s]", "", ingredient))
        ingredient = ingredient.lower()
        # lemmatized_ingredient = ingredient#[lemmatizer.lemmatize(word) for word in ingredient.split()]
        cleaned_set.append(ingredient)

    return list(cleaned_set)


clean_ingredients_udf = udf(clean_ingredients, ArrayType(StringType()))

# Apply text cleaning to ingredients
df = df.withColumn("cleaned_ingredients", clean_ingredients_udf(col("ingredients")))
df.select("title", "ingredients", "cleaned_ingredients").show(5, truncate=False)

+---------------------+-----------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|title                |ingredients                                                                                                |cleaned_ingredients                                                                                                                                                                                                   |
+---------------------+-----------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

## TF IDF vectorization

Here we have little information about the recipe in itself. So we will use the similarity between the ingredients to recommend recipes.

In [19]:
SIMILARITY_THRESHOLD = 0.5
# Recommendations for Users
RECOMMENDATION_LIMIT = 5

In [20]:
# Feature Extraction for Content-Based Filtering
# Use CountVectorizer to convert cleaned ingredients into feature vectors
# AKA, we will count the number of times each ingredient appears in the dataset
# Example: ["apple", "banana", "apple"] -> ({"apple": 2, "banana": 1})
cv = CountVectorizer(inputCol="cleaned_ingredients", outputCol="features")
cv_model = cv.fit(df)
vectorized_df = cv_model.transform(df)

However, we need to normalize the feature vectors to ensure that each ingredient has equal weight
Use IDF to normalize the feature vectors from CountVectorizer and "remove" common ingredients common to all recipes

In [21]:
# Compute TF-IDF for better weighting
idf = IDF(inputCol="features", outputCol="tfidf_features")
idf_model = idf.fit(vectorized_df)
tf_idf_df = idf_model.transform(vectorized_df)

In [25]:
# Generate Recommendations using Cosine Similarity
# Convert TF-IDF vectors to DenseVectors for similarity computation
vector_to_array_udf = udf(lambda v: v.toArray().tolist(), ArrayType(DoubleType()))
tf_idf_df = tf_idf_df.withColumn(
    "tfidf_array", vector_to_array_udf(col("tfidf_features"))
)


# Define a function to compute cosine similarity between two vectors
def cosine_similarity(v1, v2):
    v1 = np.array(v1)
    v2 = np.array(v2)
    dot_product = np.dot(v1, v2)
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    if norm_v1 == 0 or norm_v2 == 0:
        return 0.0
    return float(dot_product / (norm_v1 * norm_v2))


cosine_similarity_udf = udf(cosine_similarity, DoubleType())

# Self-join the DataFrame to calculate similarity between all recipes
cross_joined_df = tf_idf_df.alias("df1").crossJoin(
    tf_idf_df.select(["recipe_id", "tfidf_array", "title", "user_id"]).alias("df2")
)

# Calculate cosine similarity for each pair of recipes
cross_joined_df = cross_joined_df.withColumn(
    "similarity", cosine_similarity_udf(col("df1.tfidf_array"), col("df2.tfidf_array"))
)
cross_joined_df.printSchema()


# Select the necessary columns and alias them to avoid ambiguity
recommendations_df = cross_joined_df.filter(
    (col("df1.recipe_id") != col("df2.recipe_id"))
    & (  # Never recommend the same recipe
        col("df1.user_id") != col("df2.user_id")
    )  # Never recommend the a recipe from the same user
)

# Explicitly select and alias the columns
recommendations_df = (
    recommendations_df.select(
        col("df1.recipe_id").alias("recipe_id"),
        col("df1.user_id").alias("user_id"),
        col("df2.user_id").alias("recommended_user_id"),
        col("df1.title").alias("recipe_title"),
        col("df2.recipe_id").alias("recommended_recipe_id"),
        col("df2.title").alias("recommended_recipe_title"),
        col("similarity"),
        # Add any other columns you may need
    )
    .filter(col("similarity") > SIMILARITY_THRESHOLD)
    .orderBy(col("recipe_id"), col("similarity").desc())
)

display_spark_dataframe(recommendations_df, 5)

root
 |-- user_id: long (nullable = true)
 |-- recipe_id: long (nullable = true)
 |-- title: string (nullable = true)
 |-- ingredients: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- link: string (nullable = true)
 |-- import_date: double (nullable = true)
 |-- cleaned_ingredients: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- features: vector (nullable = true)
 |-- tfidf_features: vector (nullable = true)
 |-- tfidf_array: array (nullable = true)
 |    |-- element: double (containsNull = true)
 |-- recipe_id: long (nullable = true)
 |-- tfidf_array: array (nullable = true)
 |    |-- element: double (containsNull = true)
 |-- title: string (nullable = true)
 |-- user_id: long (nullable = true)
 |-- similarity: double (nullable = true)



                                                                                

recipe_id,user_id,recommended_user_id,recipe_title,recommended_recipe_id,recommended_recipe_title,similarity
-9058629457183933290,24,1,Vegetable-Burger Soup,5825856628004213219,Vegetable Burger Soup,0.972691
-8983223843788436463,12,22,Meltaways,-3633718265606525894,Lemon Chip Cookies,0.535585
-8815957590080865143,1,28,Chicken Divan,56267968418564497,Broccoli Casserole,0.510864
-8228806269697013615,63,39,Hot Spiced Tea,3905076670840786120,Spiced Tea,0.988846
-8228806269697013615,63,17,Hot Spiced Tea,-7117877633066661520,Microwave Spice Tea,0.576431


In [27]:
# Get top recommendations by collecting the recommended_recipe_id into a list for each recipe_id
top_recommendations = recommendations_df.groupby(["recipe_id", "recipe_title"]).agg(
    F.collect_list("recommended_recipe_id").alias("recommended_recipes"),
    F.collect_list("recommended_recipe_title").alias("recommended_recipe_titles"),
    F.collect_list("similarity").alias("similarity_scores"),
)
# Show Top Recommendations
display_spark_dataframe(top_recommendations, 5)

                                                                                

recipe_id,recipe_title,recommended_recipes,recommended_recipe_titles,similarity_scores
5724821755766999724,Battered Fried Fish,[8240554322501841782],[Mom'S Pancakes],[0.5066268526708992]
1724751793166003540,Apple Pie,[-6972161054895818310],[Apple Dumplins],[0.5957179439818722]
-2682508455536574038,Ice Cream,"[6930348736077852695, -2839619272665960322, 8847943492650148032, 1728567545615044470, -5155294720942217303]","[Vanilla Ice Cream, Brownie Pie, Low Cholesterol Ice Cream, Plattar(Swedish Pancakes) , Cream Pie And Pudding(Microwave) ]","[0.624592049758917, 0.5543145255608545, 0.5385059820356695, 0.5378288057346015, 0.5378288057346015]"
-8228806269697013615,Hot Spiced Tea,"[3905076670840786120, -7117877633066661520]","[Spiced Tea, Microwave Spice Tea]","[0.9888464040409073, 0.5764306628895793]"
8847943492650148032,Low Cholesterol Ice Cream,"[6930348736077852695, -2682508455536574038]","[Vanilla Ice Cream, Ice Cream]","[0.6647852200539185, 0.5385059820356695]"


So far, for such a basic model, we have been able to generate recommendations based on the similarity of the ingredients of the recipes. Based on our limited set, the recipes are indeed similar to the first ones based on the ingredients.

## Content-based Filtering

The approach here is to recommend a recipe to a user based on the similarity of the ingredients present in other recipes.

Let's have a look

In [28]:
vectorized_df.columns

['user_id',
 'recipe_id',
 'title',
 'ingredients',
 'link',
 'import_date',
 'cleaned_ingredients',
 'features']

In [29]:
subset = (
    recommendations_df.alias("df1")
    .join(
        vectorized_df.alias("df2"),
        recommendations_df.recommended_recipe_id == vectorized_df.recipe_id,
    )
    .select(
        col("df1.user_id"),
        col("df2.recipe_id"),
        col("df2.title"),
        col("df2.link"),
        col("df2.ingredients"),
        col("df1.similarity"),
    )
)


recipe_content_based_recommendations = (
    subset.orderBy(["user_id", "similarity"])
    .groupby(["user_id"])
    .agg(
        F.slice(
            F.collect_list(
                F.struct(
                    F.col("recipe_id"),
                    F.col("title"),
                    F.col("link"),
                    F.col("ingredients"),
                )
            ),
            1,
            RECOMMENDATION_LIMIT,
        ).alias("recommended_recipes"),
        F.slice(F.collect_list("similarity"), 1, RECOMMENDATION_LIMIT).alias(
            "similarity_scores"
        ),
    )
)
display_spark_dataframe(recipe_content_based_recommendations, 5)

                                                                                

user_id,recommended_recipes,similarity_scores
29,"[(-2682508455536574038, Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=13922, [eggs, sugar, milk, cream, vanilla, salt, milk]), (8847943492650148032, Low Cholesterol Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=289172, [Egg Scramblers, sugar, vanilla, milk, milk]), (1728567545615044470, Plattar(Swedish Pancakes) , http://www.cookbooks.com/Recipe-Details.aspx?id=554747, [flour, sugar, salt, eggs, milk]), (-5155294720942217303, Cream Pie And Pudding(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=634804, [sugar, flour, salt, eggs, milk])]","[0.624592049758917, 0.6647852200539185, 0.7464639123963781, 0.7464639123963781]"
65,"[(5996411961097143608, Meg'S Power Peanut Butter Balls, http://www.cookbooks.com/Recipe-Details.aspx?id=866378, [graham cracker crumbs, crunchy peanut butter, confectioners sugar, butter, paraffin, chocolate chips])]",[0.5898817681518592]
22,"[(-8983223843788436463, Meltaways, http://www.cookbooks.com/Recipe-Details.aspx?id=719571, [cream cheese, margarine, flour])]",[0.5355846262398374]
77,"[(4248772492409983307, Rolls, http://www.cookbooks.com/Recipe-Details.aspx?id=824206, [milk, butter, yeast, sugar, salt, eggs, flour]), (-2682508455536574038, Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=13922, [eggs, sugar, milk, cream, vanilla, salt, milk]), (6930348736077852695, Vanilla Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=91883, [eggs, sugar, milk, milk, milk]), (1728567545615044470, Plattar(Swedish Pancakes) , http://www.cookbooks.com/Recipe-Details.aspx?id=554747, [flour, sugar, salt, eggs, milk])]","[0.5016577217360161, 0.5378288057346015, 0.7464639123963781, 1.0]"
43,"[(1724751793166003540, Apple Pie, http://www.cookbooks.com/Recipe-Details.aspx?id=505783, [apples, sugar, flour, cinnamon, salt])]",[0.5957179439818722]


With this first approach, each user gets some recommendations based on the recipe similar to theirs.

However, this approach breeds little diversity in the recommendations. Let's thus observe another approach to recommend recipes to users.

## Collaborative Filtering

In this approach, we will get for each users the users that have similar taste to them, meaning users that have used similar ingredients in their recipe.

Then, we will sample randomly a recipe from the users that have similar taste to the user and that the user has not already tried.

In [30]:
# Recommendations for Users
RECOMMENDATION_LIMIT = 5
SIMILARITY_THRESHOLD_USERS = 0.7  # We want really similar users


# Get top recommendations by collecting the recommended_recipe_id into a list for each recipe_id
top_user_similarity_recommendations = (
    recommendations_df.orderBy(["user_id"])
    .groupby(["user_id", "recommended_user_id"])
    .agg(
        F.avg("similarity").alias("average_similarity"),
    )
    .orderBy(["user_id", "average_similarity"], ascending=[True, False])
    .withColumnRenamed("user_id", "user_id")
)

top_user_similarity_recommendations = (
    top_user_similarity_recommendations.filter(
        col("average_similarity") > SIMILARITY_THRESHOLD_USERS
    )
    .groupBy("user_id")
    .agg(
        F.slice(F.collect_list("recommended_user_id"), 1, RECOMMENDATION_LIMIT).alias(
            "recommended_users"
        ),
        F.slice(F.collect_list("average_similarity"), 1, RECOMMENDATION_LIMIT).alias(
            "average_similarity_scores"
        ),
    )
)

display_spark_dataframe(top_user_similarity_recommendations, 5)

                                                                                

user_id,recommended_users,average_similarity_scores
1,[24],[0.9726914161586602]
24,[1],[0.9726914161586602]
29,"[41, 77]","[0.7464639123963781, 0.7464639123963781]"
39,[63],[0.9888464040409073]
41,"[77, 29]","[1.0, 0.7464639123963781]"


In [31]:
subset.show()

[Stage 52:>                                                         (0 + 1) / 1]

+-------+--------------------+--------------------+--------------------+--------------------+------------------+
|user_id|           recipe_id|               title|                link|         ingredients|        similarity|
+-------+--------------------+--------------------+--------------------+--------------------+------------------+
|      1|-9058629457183933290|Vegetable-Burger ...|http://www.cookbo...|[ground beef, wat...|0.9726914161586602|
|      1|   56267968418564497|  Broccoli Casserole|http://www.cookbo...|[rice, onion, bro...|0.5108642386266795|
|      2| 2231876287136426631|       Pumpkin Bread|http://www.cookbo...|[pumpkin, salad o...|0.5199154800363099|
|      2| 8240554322501841782|      Mom'S Pancakes|http://www.cookbo...|[flour, baking po...|0.5678542826316204|
|      3| 7226555342600168366|Crustless Pumpkin...|http://www.cookbo...|[eggs, honey, gin...|0.5199154800363099|
|      3| 3682129508320495524| Buckeyes(Cookies)  |http://www.cookbo...|[crunchy peanut b...|0.5

                                                                                

In [32]:
# Get top recommendations by collecting the recommended_recipe_id into a list for each recipe_id

subset = (
    top_user_similarity_recommendations.alias("df1")
    .join(
        vectorized_df.alias("df2"),
        F.array_contains(
            top_user_similarity_recommendations.recommended_users, vectorized_df.user_id
        ),
    )
    .select(
        "df1.user_id",
        "df1.recommended_users",
        "df2.recipe_id",
        "df2.title",
        "df2.link",
        "df2.ingredients",
    )
)


# Random recipe sampling for each reommended user
recipe_collaborative_recommendations = subset.groupBy("user_id").agg(
    F.slice(
        F.shuffle(
            F.collect_list(
                F.struct(
                    F.col("recipe_id"),
                    F.col("title"),
                    F.col("link"),
                    F.col("ingredients"),
                )
            )
        ),
        1,
        RECOMMENDATION_LIMIT,
    ).alias("recommended_recipes"),
)

display_spark_dataframe(recipe_collaborative_recommendations, 5)

                                                                                

user_id,recommended_recipes
1,"[(3825230156317200027, Carrot Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=406346, [flour, sugar, baking powder, cinnamon, cloves, oil, eggs, walnuts, junior jars baby carrots]), (5379553627317224379, Summer Squash Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=930887, [yellow sumer squash, onion, condensed cream, sour cream, carrot, herb seasoned stuffing mix, butter]), (-9058629457183933290, Vegetable-Burger Soup, http://www.cookbooks.com/Recipe-Details.aspx?id=822015, [ground beef, water, sugar, onion soup, tomatoes, tomato sauce, frozen mixed vegetables])]"
24,"[(-3928874728248188262, Blueberry Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=303061, [eggs, sugar, shortening, salt, baking powder, vanilla, flour, milk, floured blueberries]), (-5669451870154398266, Ground Beef Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=449639, [ground beef, onion, clove garlic, bell pepper, celery, tomato, tomato sauce, mushrooms, corn, olives, egg noodles, cheese]), (3296045182844799490, Baked Cabbage, http://www.cookbooks.com/Recipe-Details.aspx?id=761049, [ground chuck, onion, rice, salt, cabbage, pepper, tomato soup, water, Mozzarella cheese]), (1176869484819785883, Fa-La-La-La-Las, http://www.cookbooks.com/Recipe-Details.aspx?id=744133, [butter, sugar, egg, salt, vanilla, almond extract, all-purpose]), (-8815957590080865143, Chicken Divan, http://www.cookbooks.com/Recipe-Details.aspx?id=425137, [margarine, onion, celery, flour, curry powder, mushrooms, cream of celery soup, broccoli, chicken, cheese])]"
29,"[(-5182186088732076091, Play Doh, http://www.cookbooks.com/Recipe-Details.aspx?id=906005, [flour, salt, alum, water, cooking oil, food coloring]), (-5155294720942217303, Cream Pie And Pudding(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=634804, [sugar, flour, salt, eggs, milk]), (6717102631624880744, Original Mayfair Dressing, http://www.cookbooks.com/Recipe-Details.aspx?id=857466, [anchovies, black pepper, Accent, stalks celery, onion, mustard, garlic, lemon juice, eggs, oil]), (1728567545615044470, Plattar(Swedish Pancakes) , http://www.cookbooks.com/Recipe-Details.aspx?id=554747, [flour, sugar, salt, eggs, milk])]"
39,"[(-5593920171542558470, Pot Roast, http://www.cookbooks.com/Recipe-Details.aspx?id=587453, [chuck roast, ginger, garlic, pepper, paprika, tomato paste]), (-8228806269697013615, Hot Spiced Tea, http://www.cookbooks.com/Recipe-Details.aspx?id=622677, [sugar, instant tea, cinnamon, cloves]), (-6459617218318945727, Chicken, Broccoli And Cheese Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=1081500, [fresh broccoli, chicken breasts, broccoli, milk, Cheddar cheese])]"
41,"[(2149492657839221283, Ambrosia Mold, http://www.cookbooks.com/Recipe-Details.aspx?id=222480, [pineapple, boiling water, orange flavor, topping, orange segments, marshmallows, coconut]), (6930348736077852695, Vanilla Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=91883, [eggs, sugar, milk, milk, milk]), (6717102631624880744, Original Mayfair Dressing, http://www.cookbooks.com/Recipe-Details.aspx?id=857466, [anchovies, black pepper, Accent, stalks celery, onion, mustard, garlic, lemon juice, eggs, oil]), (-5182186088732076091, Play Doh, http://www.cookbooks.com/Recipe-Details.aspx?id=906005, [flour, salt, alum, water, cooking oil, food coloring]), (-5155294720942217303, Cream Pie And Pudding(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=634804, [sugar, flour, salt, eggs, milk])]"


## Merging the two recommendations

In [33]:
final_recommendations = (
    recipe_content_based_recommendations.alias("df1")
    .join(
        recipe_collaborative_recommendations.alias("df2"),
        recipe_content_based_recommendations.user_id
        == recipe_collaborative_recommendations.user_id,
    )
    .select(
        "df1.user_id",
        col("df1.recommended_recipes").alias("content_based_recipes"),
        col("df1.similarity_scores").alias("content_based_similarity_scores"),
        col("df2.recommended_recipes").alias("collaborative_recipes"),
    )
)

In [34]:
final_recommendations.show()

[Stage 65:>                                                         (0 + 1) / 1]

+-------+---------------------+-------------------------------+---------------------+
|user_id|content_based_recipes|content_based_similarity_scores|collaborative_recipes|
+-------+---------------------+-------------------------------+---------------------+
|      1| [{562679684185644...|           [0.51086423862667...| [{382523015631720...|
|     24| [{582585662800421...|           [0.9726914161586602]| [{-39288747282481...|
|     29| [{-26825084555365...|           [0.62459204975891...| [{-51821860887320...|
|     39| [{-71178776330666...|           [0.57000138817727...| [{-55939201715425...|
|     41| [{424877249240998...|           [0.50165772173601...| [{214949265783922...|
|     63| [{-71178776330666...|           [0.57643066288957...| [{390507667084078...|
|     77| [{424877249240998...|           [0.50165772173601...| [{693034873607785...|
+-------+---------------------+-------------------------------+---------------------+



                                                                                

In [35]:
# Example usage
display_spark_dataframe(final_recommendations)

                                                                                

user_id,content_based_recipes,content_based_similarity_scores,collaborative_recipes
1,"[(56267968418564497, Broccoli Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=693123, [rice, onion, broccoli, cream of celery soup, margarine]), (-9058629457183933290, Vegetable-Burger Soup, http://www.cookbooks.com/Recipe-Details.aspx?id=822015, [ground beef, water, sugar, onion soup, tomatoes, tomato sauce, frozen mixed vegetables])]","[0.5108642386266795, 0.9726914161586602]","[(3825230156317200027, Carrot Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=406346, [flour, sugar, baking powder, cinnamon, cloves, oil, eggs, walnuts, junior jars baby carrots]), (5379553627317224379, Summer Squash Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=930887, [yellow sumer squash, onion, condensed cream, sour cream, carrot, herb seasoned stuffing mix, butter]), (-9058629457183933290, Vegetable-Burger Soup, http://www.cookbooks.com/Recipe-Details.aspx?id=822015, [ground beef, water, sugar, onion soup, tomatoes, tomato sauce, frozen mixed vegetables])]"
24,"[(5825856628004213219, Vegetable Burger Soup, http://www.cookbooks.com/Recipe-Details.aspx?id=302429, [ground beef, tomatoes, tomato sauce, frozen mixed vegetables, onion soup, sugar])]",[0.9726914161586602],"[(-3928874728248188262, Blueberry Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=303061, [eggs, sugar, shortening, salt, baking powder, vanilla, flour, milk, floured blueberries]), (-5669451870154398266, Ground Beef Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=449639, [ground beef, onion, clove garlic, bell pepper, celery, tomato, tomato sauce, mushrooms, corn, olives, egg noodles, cheese]), (3296045182844799490, Baked Cabbage, http://www.cookbooks.com/Recipe-Details.aspx?id=761049, [ground chuck, onion, rice, salt, cabbage, pepper, tomato soup, water, Mozzarella cheese]), (1176869484819785883, Fa-La-La-La-Las, http://www.cookbooks.com/Recipe-Details.aspx?id=744133, [butter, sugar, egg, salt, vanilla, almond extract, all-purpose]), (-8815957590080865143, Chicken Divan, http://www.cookbooks.com/Recipe-Details.aspx?id=425137, [margarine, onion, celery, flour, curry powder, mushrooms, cream of celery soup, broccoli, chicken, cheese])]"
29,"[(-2682508455536574038, Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=13922, [eggs, sugar, milk, cream, vanilla, salt, milk]), (8847943492650148032, Low Cholesterol Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=289172, [Egg Scramblers, sugar, vanilla, milk, milk]), (1728567545615044470, Plattar(Swedish Pancakes) , http://www.cookbooks.com/Recipe-Details.aspx?id=554747, [flour, sugar, salt, eggs, milk]), (-5155294720942217303, Cream Pie And Pudding(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=634804, [sugar, flour, salt, eggs, milk])]","[0.624592049758917, 0.6647852200539185, 0.7464639123963781, 0.7464639123963781]","[(-5182186088732076091, Play Doh, http://www.cookbooks.com/Recipe-Details.aspx?id=906005, [flour, salt, alum, water, cooking oil, food coloring]), (-5155294720942217303, Cream Pie And Pudding(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=634804, [sugar, flour, salt, eggs, milk]), (6717102631624880744, Original Mayfair Dressing, http://www.cookbooks.com/Recipe-Details.aspx?id=857466, [anchovies, black pepper, Accent, stalks celery, onion, mustard, garlic, lemon juice, eggs, oil]), (1728567545615044470, Plattar(Swedish Pancakes) , http://www.cookbooks.com/Recipe-Details.aspx?id=554747, [flour, sugar, salt, eggs, milk])]"
39,"[(-7117877633066661520, Microwave Spice Tea, http://www.cookbooks.com/Recipe-Details.aspx?id=543821, [sugar, sugar, ground cloves, cinnamon, instant tea]), (-8228806269697013615, Hot Spiced Tea, http://www.cookbooks.com/Recipe-Details.aspx?id=622677, [sugar, instant tea, cinnamon, cloves])]","[0.5700013881772769, 0.9888464040409073]","[(-5593920171542558470, Pot Roast, http://www.cookbooks.com/Recipe-Details.aspx?id=587453, [chuck roast, ginger, garlic, pepper, paprika, tomato paste]), (-8228806269697013615, Hot Spiced Tea, http://www.cookbooks.com/Recipe-Details.aspx?id=622677, [sugar, instant tea, cinnamon, cloves]), (-6459617218318945727, Chicken, Broccoli And Cheese Casserole, http://www.cookbooks.com/Recipe-Details.aspx?id=1081500, [fresh broccoli, chicken breasts, broccoli, milk, Cheddar cheese])]"
41,"[(4248772492409983307, Rolls, http://www.cookbooks.com/Recipe-Details.aspx?id=824206, [milk, butter, yeast, sugar, salt, eggs, flour]), (-2682508455536574038, Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=13922, [eggs, sugar, milk, cream, vanilla, salt, milk]), (6930348736077852695, Vanilla Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=91883, [eggs, sugar, milk, milk, milk]), (-5155294720942217303, Cream Pie And Pudding(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=634804, [sugar, flour, salt, eggs, milk])]","[0.5016577217360161, 0.5378288057346015, 0.7464639123963781, 1.0]","[(2149492657839221283, Ambrosia Mold, http://www.cookbooks.com/Recipe-Details.aspx?id=222480, [pineapple, boiling water, orange flavor, topping, orange segments, marshmallows, coconut]), (6930348736077852695, Vanilla Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=91883, [eggs, sugar, milk, milk, milk]), (6717102631624880744, Original Mayfair Dressing, http://www.cookbooks.com/Recipe-Details.aspx?id=857466, [anchovies, black pepper, Accent, stalks celery, onion, mustard, garlic, lemon juice, eggs, oil]), (-5182186088732076091, Play Doh, http://www.cookbooks.com/Recipe-Details.aspx?id=906005, [flour, salt, alum, water, cooking oil, food coloring]), (-5155294720942217303, Cream Pie And Pudding(Microwave) , http://www.cookbooks.com/Recipe-Details.aspx?id=634804, [sugar, flour, salt, eggs, milk])]"
63,"[(-7117877633066661520, Microwave Spice Tea, http://www.cookbooks.com/Recipe-Details.aspx?id=543821, [sugar, sugar, ground cloves, cinnamon, instant tea]), (3905076670840786120, Spiced Tea, http://www.cookbooks.com/Recipe-Details.aspx?id=901007, [sugar, instant tea, cinnamon, cloves, salt])]","[0.5764306628895793, 0.9888464040409073]","[(3905076670840786120, Spiced Tea, http://www.cookbooks.com/Recipe-Details.aspx?id=901007, [sugar, instant tea, cinnamon, cloves, salt]), (-6007787547450475077, Danish Cake, http://www.cookbooks.com/Recipe-Details.aspx?id=646925, [shortening, sugar, eggs, flour, soda, salt, buttermilk, orange juice, nuts, dates, orange rind, vanilla])]"
77,"[(4248772492409983307, Rolls, http://www.cookbooks.com/Recipe-Details.aspx?id=824206, [milk, butter, yeast, sugar, salt, eggs, flour]), (-2682508455536574038, Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=13922, [eggs, sugar, milk, cream, vanilla, salt, milk]), (6930348736077852695, Vanilla Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=91883, [eggs, sugar, milk, milk, milk]), (1728567545615044470, Plattar(Swedish Pancakes) , http://www.cookbooks.com/Recipe-Details.aspx?id=554747, [flour, sugar, salt, eggs, milk])]","[0.5016577217360161, 0.5378288057346015, 0.7464639123963781, 1.0]","[(6930348736077852695, Vanilla Ice Cream, http://www.cookbooks.com/Recipe-Details.aspx?id=91883, [eggs, sugar, milk, milk, milk]), (2149492657839221283, Ambrosia Mold, http://www.cookbooks.com/Recipe-Details.aspx?id=222480, [pineapple, boiling water, orange flavor, topping, orange segments, marshmallows, coconut]), (1728567545615044470, Plattar(Swedish Pancakes) , http://www.cookbooks.com/Recipe-Details.aspx?id=554747, [flour, sugar, salt, eggs, milk])]"
