# **Recommendation System for McDonalds Menu Dataset**

***Content Based Recommendation system***

Import all the necessary Modules, import the dataset and drop all duplicates and empty fields

In [15]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [16]:
df = pd.read_csv('menu.csv')
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

In [17]:

df[['Category','Serving Size','Calories', 'Calories from Fat', 'Total Fat', 'Saturated Fat', 'Trans Fat', 'Cholesterol', 'Sodium', 'Carbohydrates', 'Dietary Fiber','Sugars', 'Protein']]

Unnamed: 0,Category,Serving Size,Calories,Calories from Fat,Total Fat,Saturated Fat,Trans Fat,Cholesterol,Sodium,Carbohydrates,Dietary Fiber,Sugars,Protein
0,Breakfast,4.8 oz (136 g),300,120,13.0,5.0,0.0,260,750,31,4,3,17
1,Breakfast,4.8 oz (135 g),250,70,8.0,3.0,0.0,25,770,30,4,3,18
2,Breakfast,3.9 oz (111 g),370,200,23.0,8.0,0.0,45,780,29,4,2,14
3,Breakfast,5.7 oz (161 g),450,250,28.0,10.0,0.0,285,860,30,4,2,21
4,Breakfast,5.7 oz (161 g),400,210,23.0,8.0,0.0,50,880,30,4,2,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,Smoothies & Shakes,10.1 oz (285 g),510,150,17.0,9.0,0.5,45,280,80,1,64,12
256,Smoothies & Shakes,13.4 oz (381 g),690,200,23.0,12.0,1.0,55,380,106,1,85,15
257,Smoothies & Shakes,6.7 oz (190 g),340,100,11.0,6.0,0.0,30,190,53,1,43,8
258,Smoothies & Shakes,14.2 oz (403 g),810,290,32.0,15.0,1.0,60,400,114,2,103,21


In [18]:
# Create an instance of LabelEncoder
le = LabelEncoder()

# Encode the categorical column
df['Category_encoded'] = le.fit_transform(df['Category'])

# Print the mapping between labels and encoded values
print(dict(zip(le.classes_, le.transform(le.classes_))))

{'Beef & Pork': 0, 'Beverages': 1, 'Breakfast': 2, 'Chicken & Fish': 3, 'Coffee & Tea': 4, 'Desserts': 5, 'Salads': 6, 'Smoothies & Shakes': 7, 'Snacks & Sides': 8}


In [19]:
scaler = MinMaxScaler()
c = scaler.fit_transform(df[['Category_encoded','Calories', 'Calories from Fat', 'Total Fat', 'Saturated Fat', 'Trans Fat', 'Cholesterol', 'Sodium', 'Carbohydrates', 'Dietary Fiber','Sugars', 'Protein']])

In [20]:
print(c)

[[0.25       0.15957447 0.11320755 ... 0.57142857 0.0234375  0.1954023 ]
 [0.25       0.13297872 0.06603774 ... 0.57142857 0.0234375  0.20689655]
 [0.25       0.19680851 0.18867925 ... 0.57142857 0.015625   0.16091954]
 ...
 [0.875      0.18085106 0.09433962 ... 0.14285714 0.3359375  0.09195402]
 [0.875      0.43085106 0.27358491 ... 0.28571429 0.8046875  0.24137931]
 [0.875      0.21808511 0.14150943 ... 0.14285714 0.3984375  0.11494253]]


In [21]:
# similarity_matrix = cosine_similarity(df[['Category_encoded','Calories', 'Calories from Fat', 'Total Fat', 'Saturated Fat', 'Trans Fat', 'Cholesterol', 'Sodium', 'Carbohydrates', 'Dietary Fiber','Sugars', 'Protein']])
similarity_matrix = cosine_similarity(c)


In [22]:
def get_similar_items(item_name, similarity_matrix):
    if item_name not in df['Item'].values:
        return []
    item_index = df.index[df['Item'] == item_name][0]
    similar_items = list(enumerate(similarity_matrix[item_index]))
    sorted_items = sorted(similar_items, key=lambda x: x[1], reverse=True)
    X = [df.iloc[i[0]]['Item'] for i in sorted_items[1:6]]
    return X

In [24]:
get_similar_items("McFlurry with Reese's Peanut Butter Cups (Snack)", similarity_matrix)


['McFlurry with M&M’s Candies (Snack)',
 'McFlurry with Oreo Cookies (Snack)',
 'French Vanilla Latte (Small)',
 'Caramel Latte (Small)',
 'Hazelnut Latte (Small)']