# Imports

In [1]:
# Imports

import pandas as pd
import numpy as np
import scipy.sparse as sp
from sklearn.neighbors import NearestNeighbors

# Initial Read

In [2]:
# PD read

recipes = pd.read_csv('Data/RAW_interactions.csv', usecols=[0,1,3])

In [3]:
# Head check

recipes.head()

Unnamed: 0,user_id,recipe_id,rating
0,38094,40893,4
1,1293707,40893,5
2,8937,44394,4
3,126440,85009,5
4,57222,85009,5


In [4]:
# Info/shape check

recipes.info()
print(recipes.shape)

# no nulls - three columns - 1,132,367 rows

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1132367 entries, 0 to 1132366
Data columns (total 3 columns):
 #   Column     Non-Null Count    Dtype
---  ------     --------------    -----
 0   user_id    1132367 non-null  int64
 1   recipe_id  1132367 non-null  int64
 2   rating     1132367 non-null  int64
dtypes: int64(3)
memory usage: 25.9 MB
(1132367, 3)


In [5]:
# Dupe check

recipes.duplicated().sum()

# no dupes

np.int64(0)

In [6]:
# How many unique user id - 226570

recipes['user_id'].nunique()


226570

In [7]:
# How many unique recipe id - 231637

recipes['recipe_id'].nunique()


231637

_Upon first glance, sampling 10% of the dataset would leave us with approx. 20,000 unique user IDs and 20,000 unique recipe IDs. Upon attempting to create recommendation models using the truncated dataset, was unable to find a way that didn't result in the kernel crashing or much too much data for computer memory._

**Due to this I will use a nearest neighbors algorithm to find similarities on the fly within the function, rather than creating a covariance matrix for every user and recipe in advance**

In [8]:
# Create dictionaries with user/recipe IDs and their indices

user_indices = {u: i for i, u in enumerate(recipes['user_id'].unique())}
recipe_indices = {r: i for i, r in enumerate(recipes['recipe_id'].unique())}

# Pull out rows, cols and ratings for creation of sparse matrix

rows = [user_indices[u] for u in recipes['user_id']]
cols = [recipe_indices[r] for r in recipes['recipe_id']]
ratings = recipes['rating'].values

In [9]:
# Create sparse matrix

matrix = sp.csr_matrix((ratings, (rows, cols)),
                           shape=(len(user_indices), len(recipe_indices)))


In [10]:
# Fit nearest neighbors model for item-item similarity

items_nn_model = NearestNeighbors(n_neighbors=50, metric='cosine', algorithm='brute')
items_nn_model.fit(matrix.T)


In [11]:
# Build nearest neighbors model for user-user similarity

user_nn_model = NearestNeighbors(n_neighbors=50, metric='cosine', algorithm='brute')
user_nn_model.fit(matrix)

In [12]:
# Get recommendations for a specific user

def recommend_for_user(n_recommendations=3):

     # Prompt user for their user ID
    user_id_input = input("Please enter your user ID: ").strip()

    #Convert to int
    user_id = int(user_id_input)

    # Check if the user ID exists in user_indices
    if user_id not in user_indices:
        print(f"User ID {user_id} not found. Please enter a valid user ID.")
        return None

    user_idx = user_indices[user_id]
    user_ratings = matrix[user_idx].toarray().flatten()
    rated_items = np.where(user_ratings > 0)[0]

    # Get similar items to those the user has rated highly
    similar_items = []
    for item_idx in rated_items:
        if user_ratings[item_idx] >= 4:
            distances, indices = items_nn_model.kneighbors(matrix.T[item_idx].reshape(1, -1))
            similar_items.extend([(idx, user_ratings[item_idx] * (1 - dist))
                                 for dist, idx in zip(distances[0], indices[0])
                                 if idx not in rated_items])

    # Sort by similarity
    similar_items.sort(key=lambda x: x[1], reverse=True)

    # Get top rec
    top_recs = similar_items[:n_recommendations]

    # Create DataFrame
    recipe_ids = [list(recipe_indices.keys())[rec[0]] for rec in top_recs]
    similarity_scores = [rec[1] for rec in top_recs]

    recommendations_df = pd.DataFrame({
        'recipe_id': recipe_ids,
        'similarity_score': similarity_scores
    })

    return recommendations_df

In [13]:
recommend_for_user()

Unnamed: 0,recipe_id,similarity_score
0,443100,4.082483
1,241428,3.916747
2,244271,3.706247
