In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

# Load user preferences data from an Excel file
file_path = "C:/Data/food_preference.xlsx"  # Change to your file path
df = pd.read_excel(file_path)

# Set "User" column as the index and create the user-item matrix
user_item_matrix = df.set_index('User')
user_ids = user_item_matrix.index
item_ids = user_item_matrix.columns

# Convert the matrix to a numpy array, treating NaNs as missing values
R = user_item_matrix.fillna(0).values  # Fill NaNs with 0 for matrix factorization

# Parameters for ALS
num_factors = 2  # Number of latent factors
num_iterations = 40  # Number of ALS iterations
lambda_reg = 0.1  # Regularization parameter

# Initialize user and item factors randomly
num_users, num_items = R.shape
user_factors = np.random.normal(scale=1./num_factors, size=(num_users, num_factors))
item_factors = np.random.normal(scale=1./num_factors, size=(num_items, num_factors))

# ALS optimization
for iteration in range(num_iterations):
    # Fix item_factors and solve for user_factors
    for i in range(num_users):
        # Get the items rated by user i
        ratings = R[i, :]
        # Create a diagonal matrix for the regularization term
        reg_matrix = lambda_reg * np.eye(num_factors)
        # Update user factors based on current item factors
        user_factors[i, :] = np.linalg.solve(
            np.dot(item_factors.T, np.dot(np.diag(ratings > 0), item_factors)) + reg_matrix,
            np.dot(item_factors.T, ratings)
        )
    
    # Fix user_factors and solve for item_factors
    for j in range(num_items):
        # Get the users who rated item j
        ratings = R[:, j]
        # Create a diagonal matrix for the regularization term
        reg_matrix = lambda_reg * np.eye(num_factors)
        # Update item factors based on current user factors
        item_factors[j, :] = np.linalg.solve(
            np.dot(user_factors.T, np.dot(np.diag(ratings > 0), user_factors)) + reg_matrix,
            np.dot(user_factors.T, ratings)
        )

# Predict the missing ratings by multiplying the learned user and item factors
R_pred = np.dot(user_factors, item_factors.T)

# Mask 20% of the observed (non-NaN) ratings for testing
def create_masked_matrix(R, original_matrix, mask_fraction=0.2):
    mask = original_matrix.notna().values  # Mask for non-NaN entries in original data
    non_zero_indices = np.argwhere(mask)
    test_indices = non_zero_indices[np.random.choice(len(non_zero_indices), int(len(non_zero_indices) * mask_fraction), replace=False)]
    
    R_train = R.copy()
    R_test = np.zeros(R.shape)
    
    for (i, j) in test_indices:
        R_test[i, j] = R[i, j]  # Store actual value in test set
        R_train[i, j] = 0       # Mask the value in train set

    return R_train, R_test, test_indices

# Mask a percentage of the non-missing observations to create a test set
R_train, R_test, test_indices = create_masked_matrix(R, user_item_matrix, mask_fraction=0.2)

# Evaluate model performance on masked test values
test_values = [R_test[i, j] for i, j in test_indices]
predicted_values = [R_pred[i, j] for i, j in test_indices]
rmse = np.sqrt(mean_squared_error(test_values, predicted_values))
print(f"RMSE on test data: {rmse:.4f}")

# Create predictions DataFrame
predicted_ratings_df = pd.DataFrame(R_pred, index=user_ids, columns=item_ids)

RMSE on test data: 0.6761


In [3]:
users = df.User

# Display the list of users with indices
print("Select a user from the list below:")
for i, user in enumerate(users):
    print(f"{i + 1}. {user}")

# Get user input
while True:
    try:
        choice = int(input("Enter the number of the user you want to select: ")) - 1
        if 0 <= choice < len(users):
            selected_user = users[choice]
            print(f"You selected: {selected_user}")
            break
        else:
            print("Invalid selection. Please enter a number from the list.")
    except ValueError:
        print("Please enter a valid number.")

user_ratings = predicted_ratings_df.loc[selected_user]
unrated_items = user_ratings[user_item_matrix.loc[selected_user].isna()].copy()
top_items = unrated_items.sort_values(ascending=False).head(2).index
print(f"{selected_user} should try {top_items[0]} and {top_items[1]}")

Select a user from the list below:
1. Enno
2. Avani Jain
3. Ambika Burramukku
4. Marcela Salazar
5. Jaideep Kalsi
6. Shawn
7. Omer
8. yy
9. Snowflake
10. Alex Martin
11. Rahil
12. Sarah
13. Max Licht
14. Kunlong Wu
15. Berat Ozmen
16. Silin Li
17. Aarush Jain
18. Tanmay Maity
19. Huzaifa Ikram
20. Aoqi  Zeng
21. Isabella Li
22. Nikunj Mehta
23. Preetham Srinivasan
24. Halley Jones
25. Kenzie
26. Guanchen Wu
27. Ellie
28. Ann
29. DairyLover
30. Manisha
31. Matthew McClintick
32. Jack Motto
33. C
34. Aryan Dua
35. Aidan Marlier
36. Clare 
37. Casey
38. Kefan Shang
39. Noah
40. Ann Huttenhower
41. Lainah Mnagwiza
42. Banana
43. John Rouse
44. Jiaqi Cui
45. Limin Xiao
46. Charlie
47. Nick Elias
48. Harsh
49. Megan Moede
50. Apple
51. Ailee Miller
52. Ana
53. Aravind
54. Ryan Jiang
55. Jay
56. Bob
57. Andrew Bingen
58. Poppy
59. Akhil
60. Bingzhao
61. Ariella Breyer
62. Kunlong Wu
63. Somename
64. bharath
65. Peach
66. Strawberry
67. Aravind
68. Lemon
69. Rahul
70. Barry
71. Molly


Enter the number of the user you want to select:  51


You selected: Ailee Miller
Ailee Miller should try Steak and Samosa
