In [1]:
dining_file = './preprocessed-data/alldininghalls.csv'
ratings_file = './preprocessed-data/dining_ratings.csv'
# users_file = './preprocessed-data/randomUserDataset.csv'
users_file = './preprocessed-data/normalizedValues.csv'
nut_facts_file = './preprocessed-data/allNutritionalInfo.csv'

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import botocore
import boto3
from botocore.exceptions import ClientError
import botocore
import json
from datetime import date

In [3]:
df = pd.read_csv(dining_file)
diningRates = pd.read_csv(ratings_file)
users = pd.read_csv(users_file)
nut_file = pd.read_csv(nut_facts_file)

In [4]:
# avg_ratings_users = ratings.groupby(by = ['userId','foodItem']).agg({
#     'rating':np.mean
# }).reset_index()

# ratings_similar_users = ratings[ratings['userId'].isin(similar_users['UserID'].tolist())].copy()
# avg_ratings_similar_users = ratings_similar_users.groupby(by = ['userId','foodItem']).agg({
#     'rating':np.mean
# }).reset_index()

In [15]:
#### collaborative filtering
## find similar users
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Function to find similar users
def find_similar_users(userID, k=25):    
    
    users_df = users.copy()
    # one_user = users[:1].copy() # new user data frame
    new_user_df = users.iloc[userID].to_frame().T # how to access a specific row
    
    # Compute cosine similarity between new user and existing users
    new_user_df.drop(columns = ['UserID'])
    users_df.drop(columns = ['UserID'])
    similarities = cosine_similarity(new_user_df, users_df)[0]
    
    # Find the top-k similar users
    top_k_similar_users_indices = similarities.argsort()[-k-1:-1][::-1]
    top_k_similar_users = users.iloc[top_k_similar_users_indices]
    
    return top_k_similar_users

# similar_users = find_similar_users(1) # pass in userID to get similar users
# display(similar_users)

In [6]:
# Define the model
class MatrixFactorization(nn.Module):
    def __init__(self, n_users, n_food_items, n_factors=20):
        super(MatrixFactorization, self).__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.movie_factors = nn.Embedding(n_food_items, n_factors)
        # initializing our matrices with a positive number generally will yield better results
        self.user_factors.weight.data.uniform_(0, 0.5)
        self.movie_factors.weight.data.uniform_(0, 0.5)
        
    def forward(self, user, food_item):
        return (self.user_factors(user) * self.movie_factors(food_item)).sum(1)

In [7]:
# Load the MovieLens dataset
diningHalls = df.copy()
# diningRates = ratings.copy()

# Preprocess the data
n_users = diningRates.userId.unique().shape[0]
print(n_users)
n_foodItems = diningRates.foodItem.unique().shape[0]

# Convert movieId and userId into unique integers
user_map = {u: i for i, u in enumerate(diningRates.userId.unique())}
diningRates['user_id'] = diningRates['userId'].map(user_map)

dining_map = {m: i for i, m in enumerate(diningRates.foodItem.unique())}
diningRates['food_item'] = diningRates['foodItem'].map(dining_map)

# Create a matrix with users as rows and movies as columns
# matrix = torch.zeros((n_users, n_foodItems))
# for i, row in diningRates.iterrows():
#     matrix[int(row.user_id), int(row.food_item)] = row.rating

610


In [8]:
model = MatrixFactorization(n_users, n_foodItems)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Train the model
for i in range(50):
    optimizer.zero_grad()
    user = torch.LongTensor(diningRates.user_id)
    movie = torch.LongTensor(diningRates.food_item)
    rating = torch.FloatTensor(diningRates.rating)
    predictions = model(user, movie)
    loss = criterion(predictions, rating)
    loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print(loss)

tensor(6.2165, grad_fn=<MseLossBackward0>)
tensor(6.2016, grad_fn=<MseLossBackward0>)
tensor(6.1866, grad_fn=<MseLossBackward0>)
tensor(6.1718, grad_fn=<MseLossBackward0>)
tensor(6.1571, grad_fn=<MseLossBackward0>)


In [26]:
# Make recommendations for a given user
def recommend_food(model, user_id, num_recommendations):
    with torch.no_grad():
        user = torch.LongTensor([user_map[user_id]])
        food_items = torch.arange(n_foodItems)
        ratings = model(user, food_items).detach().numpy()
    food_ids = ratings.argsort()[-num_recommendations:][::-1]
    recommended_food = [food_items[i] for i in food_ids]
    return recommended_food

In [10]:
# Get recommendations for a user
def getRecs(model, user_id, num_recs):
    recommended_food = recommend_food(model, user_id, num_recs)

    # Convert tensors to Int
    val = []
    for i in range(num_recs):
        val.append(int(recommended_food[i]))

    for id in val:
        row = diningHalls.loc[diningHalls['foodId'] == id]
        movie = row.values.tolist()
        if len(movie) == 0:
            continue
    return val

In [19]:
# recommendation for that particular user:

def getSimilarUserRecs(model, userID, num_recs = 5):
    recs = []
    recs += getRecs(model, userID, num_recs)
    # recommendation for similar users:
    
    similar_users = find_similar_users(userID) # added this
    print("here")
    for user in similar_users['UserID'].tolist():
        recs += getRecs(model, user, num_recs)
    recs = set(recs)
    food_recs = df[df['foodId'].isin(recs)]
        
    return food_recs

In [12]:
# Now do content filtering

def contentFiltering(df, dHall_pref, allergens, diet_restr, meal):     
        
    # filter by meal
    recs = df.loc[(df[meal] == 1)] # works
    recs = recs.drop_duplicates(subset="Food Item") # remove duplicates
        
    # filter by dining 
    if len(dHall_pref) == 0:
        pass
    else:
        for index, row in recs.iterrows():
            L = []
            actual_index = row["foodId"]
            for pref in dHall_pref:  

                if row[pref] == 0: L.append(0)
                else: L.append(1)

            all_zeros = all(val == 0 for val in L)
            if all_zeros == True: 
                recs = recs.drop(actual_index)
            
    # filter by allergens 
    if len(allergens) == 0:
        pass
    else:
        for index, row in recs.iterrows():
            L = []
            actual_index = row["foodId"]
            for allergen in allergens:  

                if row[allergen] == 1: L.append(1)
                else: L.append(0)

            all_zeros = all(val == 0 for val in L)
            # If there is an allergen present, a 1 in the list, drop it
            if all_zeros == False: 
                recs = recs.drop(actual_index)

    # filter by dietary restrictions
    if len(diet_restr) == 0:
        pass
    else:
        for index, row in recs.iterrows():
            L = []
            actual_index = row["foodId"]
            for restr in diet_restr:  

                if row[restr] == 0: L.append(0)
                else: L.append(1)

            all_zeros = all(val == 0 for val in L)
            # If there is an allergen present, a 1 in the list, drop it
            if all_zeros == True: 
                recs = recs.drop(actual_index)
    
  
    df_input = ["foodId", "Food Item"]
    for pref in dHallPref:
        df_input.append(pref)

    a = recs[df_input] # gets only necessary values
    obj = a.to_dict(orient="records") 
        
    return obj

In [13]:
# Check to see if food is being served at specified Dining hall
def checkDiningHall(recs, dHallPref, meal):
    bucket = "dininghall-data-cache"

    s3 = boto3.client('s3')
    today = date.today()
    dd = today.strftime("%d")
    mm = today.strftime("%m")
    yyyy = today.strftime("%Y")
     
    L = []
    # Check to see if user has a preference
    if len(dHallPref) > 0:
        
        # For each recommended item
        for item in recs:
        
            # Check all preferred dining halls to see if food item is being served
            for pref in dHallPref:

                # If a user has a preference, see if its currently being served at their preferred dining hall
                if item[pref] == 1:
                    dHall = item[pref]

                    key = "{}-{}-{}-{}-{}.json".format(pref.capitalize(), meal, mm, dd, yyyy) # pref is the dining hall
                    response = s3.get_object(Bucket=bucket, Key=key)
                    content = response['Body'].read()
                    data = json.loads(content)

                    for food_item in data:
                        if food_item["Food Item"] == item["Food Item"]:
                            # print("IT ACTUALLY FUCKING WORKS!!!")
                            L.append(food_item)
                            
    else:
        # User does not have a preference, recommend food from anywhere
        dHalls = ["gelfenbien", "kosher", "north", "northwest", "McMahon", "putnam", "south", "whitney"]

        for item in recs:
            
            # Check each dining hall to see if item is being served
            for dining_hall in dHalls:

                if dining_hall == "McMahon": continue
                else: dining_hall = dining_hall.capitalize()


                key = "{}-{}-{}-{}-{}.json".format(dining_hall, meal, mm, dd, yyyy) # pref is the dining hall
                response = s3.get_object(Bucket=bucket, Key=key)
                content = response['Body'].read()
                data = json.loads(content)

                for food_item in data:
                    if food_item["Food Item"] == item["Food Item"]:
                        # print("IT ACTUALLY FUCKING WORKS!!!")
                        L.append(food_item)          

    return L                           

In [23]:
# meals = ['Breakfast', 'Lunch', 'Dinner']
# allergens = ['Fish', 'Soybeans', 'Wheat', 'Gluten', 'Milk', 'Tree Nuts', 'Eggs', 'Sesame', 'Crustacean Shellfish']
# dietary_restrictions = ['Gluten Friendly', 'Less Sodium', 'Smart Check', 'Vegan', 'Vegetarian', 'Contains Nuts']
# dHalls = ["gelfenbien", "kosher", "north", "northwest", "McMahon", "putnam", "south", "whitney"]
# user_map
df = getSimilarUserRecs(model, 611, 10) # dataframe
# dHallPref = ["south", "McMahon"]
dHallPref = []

allergens = []
dietary_restrictions = []
meal = "Lunch"

len(dHallPref)

recs = contentFiltering(df, dHallPref, allergens, dietary_restrictions, meal) # ["Food Item"].tolist()
check = checkDiningHall(recs, dHallPref, meal)
check

tensor([610])


IndexError: index out of range in self