In [2]:
import pickle
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import mahalanobis

In [3]:
# Load preprocessed DataFrame from the pickle file
with open('df.pkl', 'rb') as f:
    df = pickle.load(f)

df.head()

Unnamed: 0,food_category,restaurant,item_name,item_description,calories,total_fat,saturated_fat,trans_fat,cholesterol,sodium,carbohydrates,dietary_fiber,sugar,protein
0,Soup,Applebee's,French Onion Soup,"French Onion Soup, Soups",370.0,22.0,12.0,0.0,55.0,1250.0,26.0,2.0,9.0,16.0
1,Soup,Applebee's,Tomato Basil Soup,"Tomato Basil Soup, Soups",220.0,12.0,4.5,1.5,25.0,1270.0,22.0,2.0,9.0,5.0
2,Soup,Applebee's,Chicken Tortilla Soup,"Chicken Tortilla Soup, Soups",280.0,15.0,4.0,0.0,35.0,930.0,26.0,2.0,3.0,11.0
3,Salads,Applebee's,House Salad,"House Salad, Side Salads",130.0,7.0,2.5,0.0,10.0,230.0,14.0,2.0,4.0,6.0
4,Salads,Applebee's,Oriental Chicken Salad,"Oriental Chicken Salad, 1 Breadstick, Salad Dr...",1560.0,103.0,17.0,1.0,65.0,1610.0,120.0,12.0,44.0,40.0


# KNN Euclidean Metric

This is good for getting as close as possible to specified targets. The downside is that it does not make reasonable tradeoffs. For example, it can replace say 20g protein for 20 more calories. That is a terrible tradeoff.

In [28]:
# Get x params
X = df.drop(['food_category', 'restaurant', 'item_name', 'item_description'], axis = 1)

k = 10 # init number of neighbors to find
# Cols are calories, total_fat, saturated_fat, trans_fat, cholestrol, sodium, carbs, dietary_fiber, sugar, protein
target = [500, 20, 5, 0, 100, 1000, 50, 10, 5, 25] # our target

model = NearestNeighbors(n_neighbors = k, metric = 'euclidean')
model.fit(X)

distances, indices = model.kneighbors(np.array(target).reshape(1, -1))
nearest_neighbors_indices = indices[0]
nearest_neighbors = df.iloc[nearest_neighbors_indices]
nearest_neighbors



Unnamed: 0,food_category,restaurant,item_name,item_description,calories,total_fat,saturated_fat,trans_fat,cholesterol,sodium,carbohydrates,dietary_fiber,sugar,protein
15554,Entrees,O'Charley's,Low Country Shrimp with Rice,"Low Country Shrimp with Rice, Seafood Favorite...",520.0,31.0,8.0,0.5,95.0,1000.0,39.0,2.0,8.0,20.0
16222,Toppings & Ingredients,Outback Steakhouse,Add 4 Grilled Shrimp on the Barbie,"Add 4 Grilled Shrimp on the Barbie, Plus It Up",510.0,35.0,12.0,0.0,110.0,1010.0,31.0,3.0,6.0,19.0
6981,Entrees,Dairy Queen,Pancake Platter with Sausage,"Pancake Platter with Sausage, Breakfast",520.0,24.0,8.0,0.0,100.0,980.0,60.0,4.0,11.0,17.0
22611,Soup,Steak 'N Shake,"Chili Deluxe, Cup","Chili Deluxe, Cup, Chili",500.0,28.0,16.0,0.5,80.0,980.0,36.0,7.0,2.0,29.0
7393,Entrees,Denny's,Jr. Cheeseburger (add side),"Jr. Cheeseburger (add side), Kids' Entrees",470.0,25.0,12.0,1.5,95.0,1000.0,34.0,1.0,5.0,27.0
23115,Sandwiches,Subway,Grilled Chicken on Plain Wrap,"Grilled Chicken on Plain Wrap, Make any Sandwi...",470.0,11.0,2.0,0.0,100.0,1010.0,54.0,3.0,5.0,42.0
15054,Sandwiches,McDonald's,McRib,"McRib, Sandwiches",500.0,26.0,10.0,0.0,70.0,980.0,44.0,3.0,11.0,22.0
2408,Salads,Bojangles,Homestyle Tenders Salad,"Homestyle Tenders Salad , Salads",480.0,26.0,10.0,0.5,85.0,1020.0,32.0,2.0,3.0,30.0
25253,Sandwiches,Whataburger,#11 Grilled Chicken Sandwich with Mayonnaise,"#11 Grilled Chicken Sandwich with Mayonnaise, ...",470.0,20.0,4.5,0.0,90.0,980.0,42.0,4.0,9.0,32.0
5183,Salads,Chick Fil A,Grilled Market Salad w/ Grilled Filet (Cold),"Grilled Market Salad w/ Grilled Filet (Cold), ...",510.0,31.0,6.0,0.0,80.0,1020.0,41.0,5.0,26.0,28.0


# KNN Cosine Metric
This is good for if the proportion of nutrients matters more than the actual values. The downside is that if they don't have anyone to share with to get the precise amount.

In [27]:
# Get x params
X = df.drop(['food_category', 'restaurant', 'item_name', 'item_description'], axis = 1)

k = 10 # init number of neighbors to find
# Cols are calories, total_fat, saturated_fat, trans_fat, cholestrol, sodium, carbs, dietary_fiber, sugar, protein
target = [500, 20, 5, 0, 100, 1000, 50, 10, 5, 25] # our target

model = NearestNeighbors(n_neighbors = k, metric = 'cosine')
model.fit(X)

distances, indices = model.kneighbors(np.array(target).reshape(1, -1))
nearest_neighbors_indices = indices[0]
nearest_neighbors = df.iloc[nearest_neighbors_indices]
nearest_neighbors



Unnamed: 0,food_category,restaurant,item_name,item_description,calories,total_fat,saturated_fat,trans_fat,cholesterol,sodium,carbohydrates,dietary_fiber,sugar,protein
1979,Entrees,BJ's Restaurant & Brewhouse,Italian Chicken Parmesan with Spaghetti,"Italian Chicken Parmesan with Spaghetti, Lunch...",900.0,37.0,11.0,0.0,184.0,1811.0,87.0,7.0,17.0,54.0
3842,Entrees,Carrabba's Italian Grill,Shrimp and Scallop Linguine Alla Vodka,"Shrimp and Scallop Linguine Alla Vodka, Pasta",1070.0,35.0,13.0,0.0,185.0,2130.0,120.0,10.0,11.0,49.0
2297,Toppings & Ingredients,Bob Evans,Breaded Chicken Breasts,"Breaded Chicken Breasts, Rolls, Available In: ...",590.0,21.0,4.5,0.0,115.0,1210.0,59.0,2.0,8.0,40.0
4993,Sandwiches,Chick Fil A,Grilled Chicken Sandwich,"Grilled Chicken Sandwich, Entrees",380.0,12.0,2.0,0.0,75.0,760.0,43.0,3.0,11.0,28.0
8500,Entrees,Domino's,"Chicken Carbonara, Dish","Chicken Carbonara, Dish, Penne Pasta, Sandwich...",690.0,34.0,19.0,0.5,120.0,1370.0,63.0,2.0,6.0,30.0
3805,Appetizers & Sides,Carrabba's Italian Grill,Shrimp Scampi,"Shrimp Scampi, Appetizers",990.0,51.0,19.0,0.0,180.0,2010.0,87.0,3.0,5.0,32.0
23522,Sandwiches,Taco Bell,Hash Brown Toasted Breakfast Burrito - Sausage,Hash Brown Toasted Breakfast Burrito - Sausage...,570.0,33.0,11.0,0.0,125.0,1170.0,52.0,3.0,3.0,18.0
17116,Salads,Panera Bread,Asian Sesame with Chicken Salad - Half,"Asian Sesame with Chicken Salad - Half, Salads",200.0,11.0,1.5,0.0,35.0,400.0,15.0,3.0,4.0,13.0
23340,Sandwiches,Taco Bell,Breakfast Crunchwrap - Steak,"Breakfast Crunchwrap - Steak, Breakfast",660.0,38.0,12.0,0.0,140.0,1360.0,53.0,4.0,3.0,24.0
19322,Sandwiches,Portillo's,Broiled Chicken Sandwich,"Broiled Chicken Sandwich, Chicken",400.0,17.0,4.0,0.0,80.0,769.0,36.0,1.0,8.0,26.0


# KNN Mahalanobis Distance

In [5]:
# Get x params
X = df.drop(['food_category', 'restaurant', 'item_name', 'item_description'], axis = 1)

k = 10 # init number of neighbors to find
# Cols are calories, total_fat, saturated_fat, trans_fat, cholestrol, sodium, carbs, dietary_fiber, sugar, protein
target = [1000, 20, 5, 0, 100, 1000, 50, 10, 5, 50] # our target

def mahalanobis_distance(x1, x2, cov_inv):
    return mahalanobis(x1, x2, cov_inv)

cov_matrix = np.cov(X, rowvar=False)  # Calculate covariance matrix
cov_inv = np.linalg.inv(cov_matrix)  # Invert covariance matrix

model = NearestNeighbors(n_neighbors = k, metric=mahalanobis_distance, metric_params={'cov_inv': cov_inv})
model.fit(X)

distances, indices = model.kneighbors(np.array(target).reshape(1, -1))
nearest_neighbors_indices = indices[0]
nearest_neighbors = df.iloc[nearest_neighbors_indices]
nearest_neighbors



Unnamed: 0,food_category,restaurant,item_name,item_description,calories,total_fat,saturated_fat,trans_fat,cholesterol,sodium,carbohydrates,dietary_fiber,sugar,protein
3420,Beverages,California Pizza Kitchen,Francis Ford Coppola Diamond Collection Merlot...,Francis Ford Coppola Diamond Collection Merlot...,600.0,0.0,0.0,0.0,0.0,20.0,16.0,6.0,0.0,3.0
12228,Appetizers & Sides,Hooter's,Boneless Wings - 16 Pc,"Boneless Wings - 16 Pc, Choice of Sauce or Dry...",960.0,31.0,6.0,0.0,180.0,520.0,33.0,3.0,0.0,70.0
12357,Beverages,Hooter's,"Allagash White, Pitcher","Allagash White, Pitcher, Beer, Draft",770.0,0.0,0.0,0.0,0.0,25.0,57.0,5.0,0.0,9.0
12404,Beverages,Hooter's,"Foothills Jade Ipa, Big Daddy","Foothills Jade Ipa, Big Daddy, Beer, Draft",430.0,0.0,0.0,0.0,0.0,20.0,30.0,2.0,2.0,6.0
12229,Appetizers & Sides,Hooter's,Boneless Wings - 22 Pc,"Boneless Wings - 22 Pc, Choice of Sauce or Dry...",1320.0,41.0,8.0,0.0,240.0,700.0,45.0,4.0,0.0,93.0
12360,Beverages,Hooter's,"Bell's Two Hearted, Big Daddy","Bell's Two Hearted, Big Daddy, Beer, Draft",410.0,0.0,0.0,0.0,0.0,20.0,32.0,3.0,3.0,4.0
16511,Beverages,Outback Steakhouse,"Rogue Chocolate Stout, Bottle, 22 oz","Rogue Chocolate Stout, Bottle, 22 oz, Beer",360.0,0.0,0.0,0.0,0.0,25.0,2.0,0.0,0.0,6.0
12614,Beverages,Hooter's,"White Zinfandel, 20 fl oz","White Zinfandel, 20 fl oz, Wine",500.0,0.0,0.0,0.0,0.0,30.0,23.0,0.0,23.0,2.0
20392,Beverages,Red Robin,"Voodoo Ranger Juicy Haze IPA, 22oz","Voodoo Ranger Juicy Haze IPA, 22oz, Beer",400.0,0.0,0.0,0.0,0.0,10.0,29.0,2.0,2.0,6.0
12428,Beverages,Hooter's,"Jekyll Hop Dang Diggity Southern Ipa, Big Daddy","Jekyll Hop Dang Diggity Southern Ipa, Big Dadd...",380.0,0.0,0.0,0.0,0.0,20.0,25.0,2.0,2.0,6.0
