In [1]:
dining_file = 'alldininghalls.csv'
ratings_file = 'dining_ratings.csv'
users_file = 'randomUserDataset.csv'

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

In [3]:
df = pd.read_csv(dining_file)
ratings = pd.read_csv(ratings_file)
users = pd.read_csv(users_file)

In [4]:
# height ft-inch to inch
users['Height'] = users['Height'].apply(lambda x:int(x.split('\'')[0])*12+int(x.split('\'')[1]))

In [5]:
#### collaborative filtering
## find similar users
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Function to find similar users
def find_similar_users(new_user_df, users_df, k=25):    
    # Compute cosine similarity between new user and existing users
    new_user_df.drop(columns = ['UserID'])
    users_df.drop(columns = ['UserID'])
    similarities = cosine_similarity(new_user_df, users_df)[0]
    
    # Find the top-k similar users
    top_k_similar_users_indices = similarities.argsort()[-k-1:-1][::-1]
    top_k_similar_users = users.iloc[top_k_similar_users_indices]
    
    return top_k_similar_users

# Normalize numerical features
numerical_features = ['Weight (lbs)', 'Height']
numerical_data = users[numerical_features]
scaler = StandardScaler()
numerical_data_scaled = scaler.fit_transform(numerical_data)
users[numerical_features] = numerical_data_scaled

# similar users in a dataframe
one_user = users[:1].copy() ## new user data frame
similar_users = find_similar_users(one_user, users) 

In [6]:
avg_ratings_users = ratings.groupby(by = ['userId','foodItem']).agg({
    'rating':np.mean
}).reset_index()

In [7]:
ratings_similar_users = ratings[ratings['userId'].isin(similar_users['UserID'].tolist())].copy()
avg_ratings_similar_users = ratings_similar_users.groupby(by = ['userId','foodItem']).agg({
    'rating':np.mean
}).reset_index()

In [8]:
# Define the model
class MatrixFactorization(nn.Module):
    def __init__(self, n_users, n_movies, n_factors=20):
        super(MatrixFactorization, self).__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.movie_factors = nn.Embedding(n_movies, n_factors)
        # initializing our matrices with a positive number generally will yield better results
        self.user_factors.weight.data.uniform_(0, 0.5)
        self.movie_factors.weight.data.uniform_(0, 0.5)
        
    def forward(self, user, movie):
        return (self.user_factors(user) * self.movie_factors(movie)).sum(1)
    


In [9]:
# Load the MovieLens dataset
# diningHalls = pd.read_csv('./preprocessed-data/alldininghalls.csv')
# diningRates = pd.read_csv('./preprocessed-data/dining_ratings.csv')
diningHalls = df.copy()
diningRates = ratings.copy()

# Preprocess the data
n_users = diningRates.userId.unique().shape[0]
n_foodItems = diningRates.foodItem.unique().shape[0]

# Convert movieId and userId into unique integers
user_map = {u: i for i, u in enumerate(diningRates.userId.unique())}
diningRates['user_id'] = diningRates['userId'].map(user_map)

dining_map = {m: i for i, m in enumerate(diningRates.foodItem.unique())}
diningRates['food_item'] = diningRates['foodItem'].map(dining_map)

# Create a matrix with users as rows and movies as columns
matrix = torch.zeros((n_users, n_foodItems))
for i, row in diningRates.iterrows():
    matrix[int(row.user_id), int(row.food_item)] = row.rating

In [10]:
model = MatrixFactorization(n_users, n_foodItems)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Train the model
for i in range(50):
    optimizer.zero_grad()
    user = torch.LongTensor(diningRates.user_id)
    movie = torch.LongTensor(diningRates.food_item)
    rating = torch.FloatTensor(diningRates.rating)
    predictions = model(user, movie)
    loss = criterion(predictions, rating)
    loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print(loss)

tensor(6.1967, grad_fn=<MseLossBackward0>)
tensor(6.1815, grad_fn=<MseLossBackward0>)
tensor(6.1664, grad_fn=<MseLossBackward0>)
tensor(6.1514, grad_fn=<MseLossBackward0>)
tensor(6.1364, grad_fn=<MseLossBackward0>)


In [11]:
# Make recommendations for a given user
def recommend_movies(model, user_id, num_recommendations):
    with torch.no_grad():
        user = torch.LongTensor([user_map[user_id]])
        movies = torch.arange(n_foodItems)
        ratings = model(user, movies).detach().numpy()
    movie_ids = ratings.argsort()[-num_recommendations:][::-1]
    recommended_movies = [movies[i] for i in movie_ids]
    return recommended_movies

In [12]:
# Get recommendations for a user with user_id 1
def getRecs(model, user_id, num_recs):
    recommended_movies = recommend_movies(model, user_id, num_recs)

    # Convert tensors to Int
    val = []
    for i in range(num_recs):
        val.append(int(recommended_movies[i]))

    for id in val:
        row = diningHalls.loc[diningHalls['foodId'] == id]
        # print(type(row))
        movie = row.values.tolist()
        if len(movie) == 0:
            continue
#         print(movie[0][1])
    return val
        
def getUserInfo(user):
    row = diningRates.loc[diningRates['userId'] == user]
    x = row.loc[diningRates['rating'] >= 5.0]
    foodIDS = x['foodItem'].values

    for id in foodIDS:
        movie = diningHalls.loc[diningHalls['foodId'] == id]
        a = movie.values.tolist()
        try:
            print(a[0][1])
        except:
            pass

In [13]:
userID = 1
print("Recommendations for User {}: ".format(userID))
print()
recs = getRecs(model, userID, 5)
print()
print("----------------------------------------------------------------------------------------")
print()
print("Food Items that User {} has rated over 5.0:".format(userID)) 
print()
getUserInfo(userID)

Recommendations for User 1: 


----------------------------------------------------------------------------------------

Food Items that User 1 has rated over 5.0:

Dilly Fries
Cheesy Scrambled Eggs
Bacon, Egg & Cheese Bagel
Marinara Sauce
Pepperoni Pizza
Sauteed Burgundy Beef Tips
Marble Cake w/ Vanilla Frosting
Omelet Bar
Montreal Rotisserie Chicken
Special Pizza
Belgian Waffle Bar
South Western California Veggie Burger
Peppers & Onions
Mandarin Basmati Rice
Pareve Chili
Kale And Spinach Dumpling
Signature Beef Burger
Wrapped Brownie
Pepperoni Pizza
Omelet Bar
Caramelized Onion Beef Burger
California Veggie Burger w/ Bun
Chicken Barley Soup
Mixed Vegetables
Waffles
Sweet Potato Wedges
Pepperoni Pizza
Signature Beef Burger
Scrambled Eggs
Omelet Bar
Frijoles Pintos Con Queso
Yellow Rice w/ Pinto Beans
Grilled Chicken Breast
Buttermilk Pancakes
Scrambled Eggs
Omelet Bar
Cranberry Orange Bread
Spinach Saute
Grilled Cheese
Macaroni Salad
Special Pizza
Oatmeal Raisin Cookies
Butter-Crumb S

In [14]:
# recommendation for that particular user:
recs = []
recs += getRecs(model, userID, 5)
# recommendation for similar users:
for user in similar_users['UserID'].tolist():
    recs += getRecs(model, user, 1)
recs = set(recs)

In [15]:
food_recs = df[df['foodId'].isin(recs)]

In [16]:
food_recs['Food Item'].tolist()

['Pepperoni Pizza',
 'Italian Squash',
 'Bacon',
 'Hard Boiled Eggs',
 'Signature Beef Burger',
 'Grilled Hot Dogs',
 'Marinara Sauce',
 'Cheesy Scrambled Eggs',
 'Cream of Wheat',
 'Fresh Vegetable Saute',
 'Parmesan',
 'Steamed Broccoli',
 'Shrimp & Penne Verduta',
 "Chef's Choice Soup du Jour",
 'Roasted Curried Cauliflower',
 'Lemon Delights']