In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import pairwise_distances
from numpy.ma.core import mean
from ast import literal_eval
import math 
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
recipes = pd.read_csv("RAW_recipes.csv",encoding="Latin1")
ratings = pd.read_csv("interactions_train.csv")
users_health = pd.read_csv("diseases_dataset.csv",encoding="Latin1")
users_health=users_health.set_index('users')

In [3]:
ratings.drop(['date','u','i'], axis=1, inplace=True) # dropping the cols I dont need
ratings=ratings[ratings.groupby('user_id').user_id.transform('count')>5] #dropping all the ratings from users with less than 5 ratings to have more accurate sample
ratings=ratings[ratings.groupby('recipe_id').user_id.transform('count')>3] #dropping all the ratings for recipes with less than 3 ratings to have more accurate sample

In [4]:
recipes_v1=recipes
recipes_v1['tags'] = recipes_v1['tags'].apply(lambda x: literal_eval(str(x))) #using literal_eval for traversing tags of a recipe

#splitting the nutritions to create metadata of nutritions
#in this project is uselless but in the future we can use them for recommending recipes according a diet plan


recipes_v1[['calories','total fat (%)','sugar (%)','sodium (%)','protein (%)','saturated fat (%)','carbohydrates (%)']] = recipes_v1.nutrition.str.split(",",expand=True) 
recipes_v1['calories'] =  recipes_v1['calories'].apply(lambda x: x.replace('[','')) 
recipes_v1['carbohydrates (%)'] =  recipes_v1['carbohydrates (%)'].apply(lambda x: x.replace(']','')) 

recipes_v1.drop(['contributor_id','submitted','ingredients','steps','description','nutrition'], axis=1, inplace=True) #dropping collumns I dont need

#create usefull metadata by extracting informations of recipe tags


recipes_v1['30-minutes-or-less']= ['30-minutes-or-less' in tag for tag in recipes_v1['tags']]
recipes_v1['brunch']= ['side-dishes' in tag for tag in recipes_v1['tags']]
recipes_v1['easy']= ['easy' in tag for tag in recipes_v1['tags']]
recipes_v1['vegetarian']= ['vegetarian' in tag for tag in recipes_v1['tags']]
recipes_v1['dietary']= ['dietary' in tag for tag in recipes_v1['tags']]
recipes_v1['kid-friendly']= ['kid-friendly' in tag for tag in recipes_v1['tags']]
recipes_v1['inexpensive']= ['inexpensive' in tag for tag in recipes_v1['tags']]
recipes_v1['5-ingredients-or-less']= ['5-ingredients-or-less' in tag for tag in recipes_v1['tags']]
recipes_v1['low-calorie']= ['low-calorie' in tag for tag in recipes_v1['tags']]
recipes_v1['vegan']= ['vegan' in tag for tag in recipes_v1['tags']]
recipes_v1['weeknight']= ['weeknight' in tag for tag in recipes_v1['tags']]
recipes_v1['holiday-event']= ['holiday-event' in tag for tag in recipes_v1['tags']]
recipes_v1['sweet']= ['sweet' in tag for tag in recipes_v1['tags']]
recipes_v1['main-dish']= ['main-dish' in tag for tag in recipes_v1['tags']]
recipes_v1['desserts']= ['desserts' in tag for tag in recipes_v1['tags']]
recipes_v1['lunch']= ['lunch' in tag for tag in recipes_v1['tags']]
recipes_v1['snacks']= ['snacks' in tag for tag in recipes_v1['tags']]
recipes_v1['breakfast']= ['breakfast' in tag for tag in recipes_v1['tags']]
recipes_v1['occasion']= ['occasion' in tag for tag in recipes_v1['tags']]
recipes_v1['15-minutes-or-less']= ['15-minutes-or-less' in tag for tag in recipes_v1['tags']]
recipes_v1['comfort-food']= ['comfort-food' in tag for tag in recipes_v1['tags']]
recipes_v1['diabetic']= ['diabetic' in tag for tag in recipes_v1['tags']]
recipes_v1['side-dishes']= ['side-dishes' in tag for tag in recipes_v1['tags']]
recipes_v1['candy']= ['candy' in tag for tag in recipes_v1['tags']]
recipes_v1['one-dish-meal']= ['one-dish-meal' in tag for tag in recipes_v1['tags']]
recipes_v1['seasonal']= ['seasonal' in tag for tag in recipes_v1['tags']]
recipes_v1['healthy']= ['healthy' in tag for tag in recipes_v1['tags']]
recipes_v1['healthy-2']= ['healthy-2' in tag for tag in recipes_v1['tags']]
recipes_v1['low-cholesterol']= ['low-cholesterol' in tag for tag in recipes_v1['tags']]
recipes_v1['low-saturated-fat']= ['low-saturated-fat' in tag for tag in recipes_v1['tags']]
recipes_v1['low-sodium']= ['low-sodium' in tag for tag in recipes_v1['tags']]
recipes_v1['low-protein']= ['low-protein' in tag for tag in recipes_v1['tags']]
recipes_v1['high-calcium']= ['high-calcium' in tag for tag in recipes_v1['tags']]
recipes_v1['low-calcium']= ['low-calcium' in tag for tag in recipes_v1['tags']]
recipes_v1['low-carb']= ['low-carb' in tag for tag in recipes_v1['tags']]
recipes_v1['gluten-free']= ['gluten-free' in tag for tag in recipes_v1['tags']]
recipes_v1['3-steps-or-less']= ['3-steps-or-less' in tag for tag in recipes_v1['tags']]
recipes_v1['beginner-cook']= ['beginner-cook' in tag for tag in recipes_v1['tags']]
recipes_v1['salads']= ['salads' in tag for tag in recipes_v1['tags']]
recipes_v1['low-fat']= ['low-fat' in tag for tag in recipes_v1['tags']]
recipes_v1['for-large-groups']= ['for-large-groups' in tag for tag in recipes_v1['tags']]
recipes_v1['for-1-or-2']= ['for-1-or-2' in tag for tag in recipes_v1['tags']]
recipes_v1['romantic']= ['romantic' in tag for tag in recipes_v1['tags']]
recipes_v1['high-fiber']= ['high-fiber' in tag for tag in recipes_v1['tags']]
recipes_v1['high-protein']= ['high-protein' in tag for tag in recipes_v1['tags']]

In [5]:
ratings_v1=ratings.copy()
ratings_v1.drop(['rating'], axis=1, inplace=True)


In [6]:
#create a new dataframe with the percentages of useful tags of each unique user so we can create a profile with user tastes so we can explain the recommendations we will give to him 
unique_users=ratings_v1['user_id'].unique()
percentage_df= pd.DataFrame(columns = ['user_id', 'quick(%)', 'brunch(%)', 'easy(%)', 'vegetarian(%)', 'kid-friendly(%)', 'inexpensive(%)', 
                                       'dietary(%)', 'few-ingr(%)', 'low-cal(%)', 'vegan(%)', 'weeknight(%)', 'holiday-event(%)', 'sweet(%)', 
                                       'main(%)', 'desserts(%)', 'lunch(%)', 'snacks(%)', 'breakfast(%)', 'occasion(%)', 'comfort(%)', 'diabetic(%)',
                                       'healthy(%)', 'side-dishes(%)', 'candy(%)', 'one-dish(%)', 'seasonal(%)', 'low-chol(%)', 'low-sat-fat(%)', 'low-sodium(%)', 'low-protein(%)', 
                                       'high-calcium(%)', 'low-calcium(%)', 'low-carb(%)', 'gluten-free(%)', '3-steps(%)', 'salads(%)', 'low-fat(%)', 'large(%)', 'for1-2(%)',
                                       'romantic(%)','high-fiber(%)','high-protein(%)'])
for x in unique_users:
    t=ratings_v1[(ratings_v1['user_id'] == x )]
    #ola ta sum gia kathe tag edw tha midenizontai
    total_sum=0
    sum_quick=0
    sum_brunch=0
    sum_easy=0
    sum_vegetarian=0
    sum_kid_friendly=0
    sum_inexpensive=0
    sum_dietary=0
    sum_few_ingridients=0
    sum_low_calories=0
    sum_vegan=0
    sum_weeknight=0
    sum_holiday_event=0
    sum_sweet=0
    sum_main=0
    sum_desserts=0
    sum_lunch=0
    sum_snacks=0
    sum_breakfast=0
    sum_occasion=0
    sum_comfort=0
    sum_diabetic=0
    sum_healthy=0
    sum_side_dishes=0
    sum_candy=0
    sum_one_dish=0
    sum_seasonal=0
    sum_low_cholesterol=0
    sum_low_saturated_fat=0
    sum_Low_sodium=0
    sum_low_protein=0
    sum_high_calcium=0
    sum_low_calcium=0
    sum_low_carb=0
    sum_gluten_free=0
    sum_3_steps=0
    sum_salads=0
    sum_low_fat=0
    sum_large=0
    sum_for12=0
    sum_romantic=0
    sum_high_fiber=0
    sum_high_protein=0
    for y in t['recipe_id']:
        total_sum=total_sum+1
        t1=recipes_v1[(recipes_v1['id'] == y )]
        if(t1['30-minutes-or-less'].bool()==True or t1['15-minutes-or-less'].bool()==True ):
            sum_quick=sum_quick+1
        if(t1['brunch'].bool()==True ):
            sum_brunch=sum_brunch+1
        if(t1['easy'].bool()==True or t1['beginner-cook'].bool()==True):
            sum_easy=sum_easy+1
        if(t1['vegetarian'].bool()==True ):
            sum_vegetarian=sum_vegetarian+1
        if(t1['kid-friendly'].bool()==True ):
            sum_kid_friendly=sum_kid_friendly+1
        if(t1['inexpensive'].bool()==True ):
            sum_inexpensive=sum_inexpensive+1
        if(t1['dietary'].bool()==True ):
            sum_dietary=sum_dietary+1
        if(t1['5-ingredients-or-less'].bool()==True ):
            sum_few_ingridients=sum_few_ingridients+1
        if(t1['low-calorie'].bool()==True ):
            sum_low_calories=sum_low_calories+1
        if(t1['vegan'].bool()==True ):
            sum_vegan=sum_vegan+1
        if(t1['weeknight'].bool()==True ):
            sum_weeknight=sum_weeknight+1
        if(t1['holiday-event'].bool()==True ):
            sum_holiday_event=sum_holiday_event+1
        if(t1['sweet'].bool()==True ):
            sum_sweet=sum_sweet+1
        if(t1['main-dish'].bool()==True ):
            sum_main=sum_main+1
        if(t1['desserts'].bool()==True ):
            sum_desserts=sum_desserts+1
        if(t1['lunch'].bool()==True ):
            sum_lunch=sum_lunch+1
        if(t1['snacks'].bool()==True ):
            sum_snacks=sum_snacks+1
        if(t1['breakfast'].bool()==True ):
            sum_breakfast=sum_breakfast+1
        if(t1['occasion'].bool()==True ):
            sum_occasion=sum_occasion+1
        if(t1['comfort-food'].bool()==True ):
            sum_comfort=sum_comfort+1
        if(t1['diabetic'].bool()==True ):
            sum_diabetic=sum_diabetic+1
        if(t1['healthy'].bool()==True or t1['healthy-2'].bool()==True):
            sum_healthy=sum_healthy+1
        if(t1['side-dishes'].bool()==True ):
            sum_side_dishes=sum_side_dishes+1
        if(t1['candy'].bool()==True ):
            sum_candy=sum_candy+1
        if(t1['one-dish-meal'].bool()==True ):
            sum_one_dish=sum_one_dish+1
        if(t1['seasonal'].bool()==True ):
            sum_seasonal=sum_seasonal+1
        if(t1['low-cholesterol'].bool()==True ):
            sum_low_cholesterol=sum_low_cholesterol+1
        if(t1['low-saturated-fat'].bool()==True ):
            sum_low_saturated_fat=sum_low_saturated_fat+1
        if(t1['low-sodium'].bool()==True ):
            sum_Low_sodium=sum_Low_sodium+1
        if(t1['low-protein'].bool()==True ):
            sum_low_protein=sum_low_protein+1
        if(t1['high-calcium'].bool()==True ):
            sum_high_calcium=sum_high_calcium+1
        if(t1['low-calcium'].bool()==True ):
            sum_low_calcium=sum_low_calcium+1
        if(t1['low-carb'].bool()==True ):
            sum_low_carb=sum_low_carb+1
        if(t1['gluten-free'].bool()==True ):
            sum_gluten_free=sum_gluten_free+1
        if(t1['3-steps-or-less'].bool()==True ):
            sum_3_steps=sum_3_steps+1
        if(t1['salads'].bool()==True ):
            sum_salads=sum_salads+1
        if(t1['low-fat'].bool()==True ):
            sum_low_fat=sum_low_fat+1
        if(t1['for-large-groups'].bool()==True ):
            sum_large=sum_large+1
        if(t1['for-1-or-2'].bool()==True ):
            sum_for12=sum_for12+1
        if(t1['romantic'].bool()==True ):
            sum_romantic=sum_romantic+1
        if(t1['high-fiber'].bool()==True ):
            sum_high_fiber=sum_high_fiber+1
        if(t1['high-protein'].bool()==True ):
            sum_high_protein=sum_high_protein+1

    percentage_df = pd.concat([percentage_df, pd.DataFrame.from_records([{
        'user_id': x,
        'quick(%)': (sum_quick/total_sum)*100,
        'brunch(%)':(sum_brunch/total_sum)*100,
        'easy(%)':(sum_easy/total_sum)*100, 
        'vegetarian(%)':(sum_vegetarian/total_sum)*100,
        'kid-friendly(%)':(sum_kid_friendly/total_sum)*100,
        'inexpensive(%)':(sum_inexpensive/total_sum)*100, 
        'dietary(%)':(sum_dietary/total_sum)*100, 
        'few-ingr(%)':(sum_few_ingridients/total_sum)*100, 
        'low-cal(%)':(sum_low_calories/total_sum)*100, 
        'vegan(%)':(sum_vegan/total_sum)*100, 
        'weeknight(%)':(sum_weeknight/total_sum)*100, 
        'holiday-event(%)':(sum_holiday_event/total_sum)*100, 
        'sweet(%)':(sum_sweet/total_sum)*100, 
        'main(%)':(sum_main/total_sum)*100, 
        'desserts(%)':(sum_desserts/total_sum)*100, 
        'lunch(%)':(sum_lunch/total_sum)*100, 
        'snacks(%)':(sum_snacks/total_sum)*100, 
        'breakfast(%)':(sum_breakfast/total_sum)*100, 
        'occasion(%)':(sum_occasion/total_sum)*100, 
        'comfort(%)':(sum_comfort/total_sum)*100, 
        'diabetic(%)':(sum_diabetic/total_sum)*100,
        'healthy(%)':(sum_healthy/total_sum)*100, 
        'side-dishes(%)':(sum_side_dishes/total_sum)*100, 
        'candy(%)':(sum_candy/total_sum)*100, 
        'one-dish(%)':(sum_one_dish/total_sum)*100, 
        'seasonal(%)':(sum_seasonal/total_sum)*100, 
        'low-chol(%)':(sum_low_cholesterol/total_sum)*100, 
        'low-sat-fat(%)':(sum_low_saturated_fat/total_sum)*100, 
        'low-sodium(%)':(sum_Low_sodium/total_sum)*100, 
        'low-protein(%)':(sum_low_protein/total_sum)*100, 
        'high-calcium(%)':(sum_high_calcium/total_sum)*100, 
        'low-calcium(%)':(sum_low_calcium/total_sum)*100, 
        'low-carb(%)':(sum_low_carb/total_sum)*100, 
        'gluten-free(%)':(sum_gluten_free/total_sum)*100, 
        '3-steps(%)':(sum_3_steps/total_sum)*100, 
        'salads(%)':(sum_salads/total_sum)*100, 
        'low-fat(%)':(sum_low_fat/total_sum)*100, 
        'large(%)':(sum_large/total_sum)*100, 
        'for1-2(%)':(sum_for12/total_sum)*100,
        'romantic(%)':(sum_romantic/total_sum)*100,
        'high-fiber(%)':(sum_high_fiber/total_sum)*100,
        'high-protein(%)':(sum_high_protein/total_sum)*100
    }])])




In [7]:
percentage_df.set_index('user_id', inplace=True) 

In [8]:
percentage_df_over4= pd.DataFrame(columns = ['user_id', 'quick(%)', 'brunch(%)', 'easy(%)', 'vegetarian(%)', 'kid-friendly(%)', 'inexpensive(%)', 
                                       'dietary(%)', 'few-ingr(%)', 'low-cal(%)', 'vegan(%)', 'weeknight(%)', 'holiday-event(%)', 'sweet(%)', 
                                       'main(%)', 'desserts(%)', 'lunch(%)', 'snacks(%)', 'breakfast(%)', 'occasion(%)', 'comfort(%)', 'diabetic(%)',
                                       'healthy(%)', 'side-dishes(%)', 'candy(%)', 'one-dish(%)', 'seasonal(%)', 'low-chol(%)', 'low-sat-fat(%)', 'low-sodium(%)', 'low-protein(%)', 
                                       'high-calcium(%)', 'low-calcium(%)', 'low-carb(%)', 'gluten-free(%)', '3-steps(%)', 'salads(%)', 'low-fat(%)', 'large(%)', 'for1-2(%)',
                                       'romantic(%)','high-fiber(%)','high-protein(%)'])
for x in unique_users:
    t=ratings_v1[(ratings['user_id'] == x )&(ratings['rating'] >= 4) ]
    #ola ta sum gia kathe tag edw tha midenizontai
    total_sum=0
    sum_quick=0
    sum_brunch=0
    sum_easy=0
    sum_vegetarian=0
    sum_kid_friendly=0
    sum_inexpensive=0
    sum_dietary=0
    sum_few_ingridients=0
    sum_low_calories=0
    sum_vegan=0
    sum_weeknight=0
    sum_holiday_event=0
    sum_sweet=0
    sum_main=0
    sum_desserts=0
    sum_lunch=0
    sum_snacks=0
    sum_breakfast=0
    sum_occasion=0
    sum_comfort=0
    sum_diabetic=0
    sum_healthy=0
    sum_side_dishes=0
    sum_candy=0
    sum_one_dish=0
    sum_seasonal=0
    sum_low_cholesterol=0
    sum_low_saturated_fat=0
    sum_Low_sodium=0
    sum_low_protein=0
    sum_high_calcium=0
    sum_low_calcium=0
    sum_low_carb=0
    sum_gluten_free=0
    sum_3_steps=0
    sum_salads=0
    sum_low_fat=0
    sum_large=0
    sum_for12=0
    sum_romantic=0
    sum_high_fiber=0
    sum_high_protein=0
    for y in t['recipe_id']:
        total_sum=total_sum+1
        t1=recipes_v1[(recipes_v1['id'] == y )]
        if(t1['30-minutes-or-less'].bool()==True or t1['15-minutes-or-less'].bool()==True ):
            sum_quick=sum_quick+1
        if(t1['brunch'].bool()==True ):
            sum_brunch=sum_brunch+1
        if(t1['easy'].bool()==True or t1['beginner-cook'].bool()==True):
            sum_easy=sum_easy+1
        if(t1['vegetarian'].bool()==True ):
            sum_vegetarian=sum_vegetarian+1
        if(t1['kid-friendly'].bool()==True ):
            sum_kid_friendly=sum_kid_friendly+1
        if(t1['inexpensive'].bool()==True ):
            sum_inexpensive=sum_inexpensive+1
        if(t1['dietary'].bool()==True ):
            sum_dietary=sum_dietary+1
        if(t1['5-ingredients-or-less'].bool()==True ):
            sum_few_ingridients=sum_few_ingridients+1
        if(t1['low-calorie'].bool()==True ):
            sum_low_calories=sum_low_calories+1
        if(t1['vegan'].bool()==True ):
            sum_vegan=sum_vegan+1
        if(t1['weeknight'].bool()==True ):
            sum_weeknight=sum_weeknight+1
        if(t1['holiday-event'].bool()==True ):
            sum_holiday_event=sum_holiday_event+1
        if(t1['sweet'].bool()==True ):
            sum_sweet=sum_sweet+1
        if(t1['main-dish'].bool()==True ):
            sum_main=sum_main+1
        if(t1['desserts'].bool()==True ):
            sum_desserts=sum_desserts+1
        if(t1['lunch'].bool()==True ):
            sum_lunch=sum_lunch+1
        if(t1['snacks'].bool()==True ):
            sum_snacks=sum_snacks+1
        if(t1['breakfast'].bool()==True ):
            sum_breakfast=sum_breakfast+1
        if(t1['occasion'].bool()==True ):
            sum_occasion=sum_occasion+1
        if(t1['comfort-food'].bool()==True ):
            sum_comfort=sum_comfort+1
        if(t1['diabetic'].bool()==True ):
            sum_diabetic=sum_diabetic+1
        if(t1['healthy'].bool()==True or t1['healthy-2'].bool()==True):
            sum_healthy=sum_healthy+1
        if(t1['side-dishes'].bool()==True ):
            sum_side_dishes=sum_side_dishes+1
        if(t1['candy'].bool()==True ):
            sum_candy=sum_candy+1
        if(t1['one-dish-meal'].bool()==True ):
            sum_one_dish=sum_one_dish+1
        if(t1['seasonal'].bool()==True ):
            sum_seasonal=sum_seasonal+1
        if(t1['low-cholesterol'].bool()==True ):
            sum_low_cholesterol=sum_low_cholesterol+1
        if(t1['low-saturated-fat'].bool()==True ):
            sum_low_saturated_fat=sum_low_saturated_fat+1
        if(t1['low-sodium'].bool()==True ):
            sum_Low_sodium=sum_Low_sodium+1
        if(t1['low-protein'].bool()==True ):
            sum_low_protein=sum_low_protein+1
        if(t1['high-calcium'].bool()==True ):
            sum_high_calcium=sum_high_calcium+1
        if(t1['low-calcium'].bool()==True ):
            sum_low_calcium=sum_low_calcium+1
        if(t1['low-carb'].bool()==True ):
            sum_low_carb=sum_low_carb+1
        if(t1['gluten-free'].bool()==True ):
            sum_gluten_free=sum_gluten_free+1
        if(t1['3-steps-or-less'].bool()==True ):
            sum_3_steps=sum_3_steps+1
        if(t1['salads'].bool()==True ):
            sum_salads=sum_salads+1
        if(t1['low-fat'].bool()==True ):
            sum_low_fat=sum_low_fat+1
        if(t1['for-large-groups'].bool()==True ):
            sum_large=sum_large+1
        if(t1['for-1-or-2'].bool()==True ):
            sum_for12=sum_for12+1
        if(t1['romantic'].bool()==True ):
            sum_romantic=sum_romantic+1
        if(t1['high-fiber'].bool()==True ):
            sum_high_fiber=sum_high_fiber+1
        if(t1['high-protein'].bool()==True ):
            sum_high_protein=sum_high_protein+1
    if(total_sum !=0):
        percentage_df_over4 = pd.concat([percentage_df_over4, pd.DataFrame.from_records([{
            'user_id': x,
            'quick(%)': (sum_quick/total_sum)*100,
            'brunch(%)':(sum_brunch/total_sum)*100,
            'easy(%)':(sum_easy/total_sum)*100, 
            'vegetarian(%)':(sum_vegetarian/total_sum)*100,
            'kid-friendly(%)':(sum_kid_friendly/total_sum)*100,
            'inexpensive(%)':(sum_inexpensive/total_sum)*100, 
            'dietary(%)':(sum_dietary/total_sum)*100, 
            'few-ingr(%)':(sum_few_ingridients/total_sum)*100, 
            'low-cal(%)':(sum_low_calories/total_sum)*100, 
            'vegan(%)':(sum_vegan/total_sum)*100, 
            'weeknight(%)':(sum_weeknight/total_sum)*100, 
            'holiday-event(%)':(sum_holiday_event/total_sum)*100, 
            'sweet(%)':(sum_sweet/total_sum)*100, 
            'main(%)':(sum_main/total_sum)*100, 
            'desserts(%)':(sum_desserts/total_sum)*100, 
            'lunch(%)':(sum_lunch/total_sum)*100, 
            'snacks(%)':(sum_snacks/total_sum)*100, 
            'breakfast(%)':(sum_breakfast/total_sum)*100, 
            'occasion(%)':(sum_occasion/total_sum)*100, 
            'comfort(%)':(sum_comfort/total_sum)*100, 
            'diabetic(%)':(sum_diabetic/total_sum)*100,
            'healthy(%)':(sum_healthy/total_sum)*100, 
            'side-dishes(%)':(sum_side_dishes/total_sum)*100, 
            'candy(%)':(sum_candy/total_sum)*100, 
            'one-dish(%)':(sum_one_dish/total_sum)*100, 
            'seasonal(%)':(sum_seasonal/total_sum)*100, 
            'low-chol(%)':(sum_low_cholesterol/total_sum)*100, 
            'low-sat-fat(%)':(sum_low_saturated_fat/total_sum)*100, 
            'low-sodium(%)':(sum_Low_sodium/total_sum)*100, 
            'low-protein(%)':(sum_low_protein/total_sum)*100, 
            'high-calcium(%)':(sum_high_calcium/total_sum)*100, 
            'low-calcium(%)':(sum_low_calcium/total_sum)*100, 
            'low-carb(%)':(sum_low_carb/total_sum)*100, 
            'gluten-free(%)':(sum_gluten_free/total_sum)*100, 
            '3-steps(%)':(sum_3_steps/total_sum)*100, 
            'salads(%)':(sum_salads/total_sum)*100, 
            'low-fat(%)':(sum_low_fat/total_sum)*100, 
            'large(%)':(sum_large/total_sum)*100, 
            'for1-2(%)':(sum_for12/total_sum)*100,
            'romantic(%)':(sum_romantic/total_sum)*100,
            'high-fiber(%)':(sum_high_fiber/total_sum)*100,
            'high-protein(%)':(sum_high_protein/total_sum)*100
        }])])
percentage_df_over4.set_index('user_id', inplace=True) 

In [9]:
#calculate the normalized rating for each user user-recipe interaction
Mean = ratings.groupby(by="user_id",as_index=False)['rating'].mean() #avarage rating of each user
rating_avg = pd.merge(ratings,Mean,on='user_id')
rating_avg['normalized_rating']=rating_avg['rating_x']-rating_avg['rating_y']

In [10]:
tmp_table=pd.pivot(rating_avg,values='normalized_rating',index='user_id',columns='recipe_id') #pre-processing to quickly summarize the data

In [11]:
final=tmp_table.fillna(0) # replace the NaN values by each recipe average rating

In [12]:
#calculate the similarity between the users with cosine similarity which is usually calculated over the common ratings in the past
np.fill_diagonal(cosine_similarity(final), 0 )
similarity = pd.DataFrame(cosine_similarity(final),index=final.index)
similarity.columns=final.index

MemoryError: Unable to allocate 1.34 GiB for an array with shape (13400, 13400) and data type float64

In [None]:
k=math.trunc(math.sqrt(len(similarity.index))) #I will apply K-nearest neighbors algorithm so I calculate the k finding the square root of the number of samples in above dataset

In [None]:
knn=similarity.apply(lambda x: pd.Series(x.sort_values(ascending=False).iloc[:41].index), axis=1) #create a dataframe with the k neighbors of each user
knn.drop(columns=knn.columns[0], 
        axis=1, 
        inplace=True)


In [None]:
user = int(input("Enter the id of the user you want to recommend recipes to : ")) #asking for user id to make recommendations

In [None]:
#find all K-nearest neighbors recipes
all_users_recipes=rating_avg.astype({"recipe_id":str}).groupby(by = 'user_id')['recipe_id'].apply(lambda x:','.join(x)) #the rated recipes of each unique user in dataset
all_k_nearest_neighbours_of_user=knn[knn.index==user].values.squeeze().tolist() # the k nearest neighbours(ids) of the user we want to recommend to
recipes_of_k_nearest_neighbours=all_users_recipes[all_users_recipes.index.isin(all_k_nearest_neighbours_of_user)] # all the rated recipes of the k nearest neighbours(with neighbors ids) of the user we want to recommend to
neighbours_recipes=','.join(recipes_of_k_nearest_neighbours.values).split(',') #we keep only the neighbours recipes ids

#find the recipes already rated by the user to exclude from recipes to recommend
user_recipes=tmp_table.columns[tmp_table[tmp_table.index==user].notna().any()].tolist() #In the pivot table with NaNs we find all the actual ratings of the user we want to recommend to

#find all the possible recipes for recommendation after the exclusion

possible_recipes=[int(item) for item in neighbours_recipes if item not in user_recipes]
#list(map(int,list(set(neighbours_recipes)-set(list(map(str, user_recipes)))))) #is a list of integers(recipes ids)

In [None]:
#predict the ratings of each possible recipe for recommendation. The formula I use is Pred=avg_rating_of_user+(sum over neighbours (Rating*Similarity)/sum over neighbours(similarity))

recipes_predictions=[]
user_avg=Mean.loc[Mean['user_id']==user,'rating'].values[0] #the avarage rating of the user
sum1=0
sum2=0
for j in possible_recipes:
    for i in all_k_nearest_neighbours_of_user:
        
        sim_with_user=similarity.loc[user,i] #the similarity of neighbour with the user we want to recommend to
        rating_i_to_j=poss_recipe_col=final.loc[i,j] # neighbour's rating of possible recipe
        sum1=sum1+sim_with_user*rating_i_to_j #the sum over neighbours (Rating*Similarity)
        sum2=sum2+sim_with_user #the sum over neighbours(similarity)
    normalized_pred=sum1/sum2 # the predicted normalized rating of the recipe
    actual_predicted_rating=user_avg+normalized_pred #the actual predicted rating which is calculated with avarage user rating plus the normalized predicted rating
    recipes_predictions.append((actual_predicted_rating))

In [None]:
#find the top 5 recommendations with their id and after using the ids we find also their names
rec_df=pd.DataFrame({'recipe_id':possible_recipes,'rating':recipes_predictions})
top_recommendations=rec_df.sort_values(by='rating',ascending=False)

top_recommendations.rename(columns = {'recipe_id':'id'}, inplace = True)
recipes_final=top_recommendations.merge(recipes,how='inner',on='id')
recipe_names=recipes_final.name.values.tolist()

In [None]:
recipes_final=recipes_final.sort_values(by='rating',ascending=False).reset_index(drop=True)

In [None]:
#find simillar users average percentages
knn_avarage_percentages= pd.DataFrame(columns = ['user_id', 'quick(%)', 'brunch(%)', 'easy(%)', 'vegetarian(%)', 'kid-friendly(%)', 'inexpensive(%)', 
                                       'dietary(%)', 'few-ingr(%)', 'low-cal(%)', 'vegan(%)', 'weeknight(%)', 'holiday-event(%)', 'sweet(%)', 
                                       'main(%)', 'desserts(%)', 'lunch(%)', 'snacks(%)', 'breakfast(%)', 'occasion(%)', 'comfort(%)', 'diabetic(%)',
                                       'healthy(%)', 'side-dishes(%)', 'candy(%)', 'one-dish(%)', 'seasonal(%)', 'low-chol(%)', 'low-sat-fat(%)', 'low-sodium(%)', 'low-protein(%)', 
                                       'high-calcium(%)', 'low-calcium(%)', 'low-carb(%)', 'gluten-free(%)', '3-steps(%)', 'salads(%)', 'low-fat(%)', 'large(%)', 'for1-2(%)',
                                       'romantic(%)','high-fiber(%)','high-protein(%)'])


knn_percentages=percentage_df[percentage_df.index.isin(all_k_nearest_neighbours_of_user)]


knn_percentages = pd.DataFrame([{
        'user_id': user,
        'quick(%)': percentage_df["quick(%)"].mean(),
        'brunch(%)':percentage_df["brunch(%)"].mean(),
        'easy(%)':percentage_df["easy(%)"].mean(), 
        'vegetarian(%)':percentage_df["vegetarian(%)"].mean(),
        'kid-friendly(%)':percentage_df["kid-friendly(%)"].mean(),
        'inexpensive(%)':percentage_df["inexpensive(%)"].mean(), 
        'dietary(%)':percentage_df["dietary(%)"].mean(), 
        'few-ingr(%)':percentage_df["few-ingr(%)"].mean(), 
        'low-cal(%)':percentage_df["low-cal(%)"].mean(), 
        'vegan(%)':percentage_df["vegan(%)"].mean(), 
        'weeknight(%)':percentage_df["weeknight(%)"].mean(), 
        'holiday-event(%)':percentage_df["holiday-event(%)"].mean(), 
        'sweet(%)':percentage_df["sweet(%)"].mean(), 
        'main(%)':percentage_df["main(%)"].mean(), 
        'desserts(%)':percentage_df["desserts(%)"].mean(), 
        'lunch(%)':percentage_df["lunch(%)"].mean(), 
        'snacks(%)':percentage_df["snacks(%)"].mean(), 
        'breakfast(%)':percentage_df["breakfast(%)"].mean(), 
        'occasion(%)':percentage_df["occasion(%)"].mean(), 
        'comfort(%)':percentage_df["comfort(%)"].mean(), 
        'diabetic(%)':percentage_df["diabetic(%)"].mean(),
        'healthy(%)':percentage_df["healthy(%)"].mean(), 
        'side-dishes(%)':percentage_df["side-dishes(%)"].mean(), 
        'candy(%)':percentage_df["candy(%)"].mean(), 
        'one-dish(%)':percentage_df["one-dish(%)"].mean(), 
        'seasonal(%)':percentage_df["seasonal(%)"].mean(), 
        'low-chol(%)':percentage_df["low-chol(%)"].mean(), 
        'low-sat-fat(%)':percentage_df["low-sat-fat(%)"].mean(), 
        'low-sodium(%)':percentage_df["low-sodium(%)"].mean(), 
        'low-protein(%)':percentage_df["low-protein(%)"].mean(), 
        'high-calcium(%)':percentage_df["high-calcium(%)"].mean(), 
        'low-calcium(%)':percentage_df["low-calcium(%)"].mean(), 
        'low-carb(%)':percentage_df["low-carb(%)"].mean(), 
        'gluten-free(%)':percentage_df["gluten-free(%)"].mean(), 
        '3-steps(%)':percentage_df["3-steps(%)"].mean(), 
        'salads(%)':percentage_df["salads(%)"].mean(), 
        'low-fat(%)':percentage_df["low-fat(%)"].mean(), 
        'large(%)':percentage_df["large(%)"].mean(), 
        'for1-2(%)':percentage_df["for1-2(%)"].mean(),
        'romantic(%)':percentage_df["romantic(%)"].mean(),
        'high-fiber(%)':percentage_df["high-fiber(%)"].mean(),
        'high-protein(%)':percentage_df["high-protein(%)"].mean()
    }])
knn_percentages.set_index('user_id', inplace=True) 



In [None]:
#plot the comparison of the percentages between user and simillar users 
plt.rcParams["figure.figsize"] = [20, 15]


n=knn_percentages.loc[user].transpose()
n=n.rename('neighbours_avg_per')

u=percentage_df.loc[user].transpose()
u=u.rename('user_per')

user_vs_neighbours=pd.concat([u,n],axis=1)
user_vs_neighbours.plot(kind="barh",title="User v Simillar Users percentages",width=0.5)

In [None]:
u=percentage_df.loc[171163].transpose()

In [None]:
#implementation of method 1

arr = np.empty((0,5),str)
t=ratings[(ratings['user_id'] == user )]
for index, row in recipes_final.head(5).iterrows():
    list=[]
    
    if(row['30-minutes-or-less']==True or row['15-minutes-or-less']==True ):
        list.append('quick(%)')
    if(row['brunch']==True ):
        list.append('brunch(%)')
    if(row['easy']==True or row['beginner-cook']==True):
        list.append('easy(%)')
    if(row['vegetarian']==True ):
        list.append('vegetarian(%)')
    if(row['kid-friendly']==True ):
        list.append('kid-friendly(%)')
    if(row['inexpensive']==True ):
        list.append('inexpensive(%)')
    if(row['dietary']==True ):
        list.append('dietary(%)')
    if(row['5-ingredients-or-less']==True ):
        list.append('few-ingr(%)')
    if(row['low-calorie']==True ):
        list.append('low-cal(%)')
    if(row['vegan']==True ):
        list.append('vegan(%)')
    if(row['weeknight']==True ):
        list.append('weeknight(%)')
    if(row['holiday-event']==True ):
        list.append('holiday-event(%)')
    if(row['sweet']==True ):
        list.append('sweet(%)')
    if(row['main-dish']==True ):
        list.append('main(%)')
    if(row['desserts']==True ):
        list.append('desserts(%)')
    if(row['lunch']==True ):
        list.append('lunch(%)')
    if(row['snacks']==True ):
        list.append('snacks(%)')
    if(row['breakfast']==True ):
        list.append('breakfast(%)')
    if(row['occasion']==True ):
        list.append('occasion(%)')
    if(row['comfort-food']==True ):
        list.append('comfort(%)')
    if(row['diabetic']==True ):
        list.append('diabetic(%)')
    if(row['healthy']==True or row['healthy-2']==True):
        list.append('healthy(%)')
    if(row['side-dishes']==True ):
        list.append('side-dishes(%)')
    if(row['candy']==True ):
        list.append('candy(%)')
    if(row['one-dish-meal']==True ):
        list.append('one-dish(%)')
    if(row['seasonal']==True ):
        list.append('seasonal(%)')
    if(row['low-cholesterol']==True ):
        list.append('low-chol(%)')
    if(row['low-saturated-fat']==True ):
        list.append('low-sat-fat(%)')
    if(row['low-sodium']==True ):
        list.append('low-sodium(%)')
    if(row['low-protein']==True ):
        list.append('low-protein(%)')
    if(row['high-calcium']==True ):
        list.append('high-calcium(%)')
    if(row['low-calcium']==True ):
        list.append('low-calcium(%)')
    if(row['low-carb']==True ):
        list.append('low-carb(%)')
    if(row['gluten-free']==True ):
        list.append('gluten-free(%)')
    if(row['3-steps-or-less']==True ):
        list.append('3-steps(%)')
    if(row['salads']==True ):
        list.append('salads(%)')
    if(row['low-fat']==True ):
        list.append('low-fat(%)')
    if(row['for-large-groups']==True ):
        list.append('large(%)')
    if(row['for-1-or-2']==True ):
        list.append('for1-2(%)')
    if(row['romantic']==True ):
        list.append('romantic(%)')
    if(row['high-fiber']==True ):
        list.append('high-fiber(%)')
    if(row['high-protein']==True ):
        list.append('high-protein(%)')
        
    tags_tmp=recipes_final[(recipes_final['id']==row['id'])].columns[recipes_final[(recipes_final['id']==row['id'])].all()]
    tags_tmp=tags_tmp.delete([0,1,2,3,4,5,6,7,8,9,10,11,12,13])
    tags_arr = np.empty((0,3),dtype=str) 
    #print(arr)
    for i in tags_tmp.values:
        sum=0
        count=0
        sum_for_pref=0
        sum_tag_share=0
        for x in t['recipe_id'].values:
            t1=recipes_v1[(recipes_v1['id'] == x )]
            if(t1.loc[:,i].values[0]):
                sum=sum+t[t['recipe_id']==x]['rating'].values[0]
                count=count+1
                r_mult_tag_share=t[t['recipe_id']==x]['rating'].values[0]*(1/count)
                sum_for_pref=sum_for_pref+r_mult_tag_share
                sum_tag_share=sum_tag_share+(1/count)
            
        if(count==0):
            tag_rel=0
        else:
            tag_rel="{:.2f}".format(sum/count)
        tag_pref="{:.2f}".format((sum_for_pref+(user_avg *k))/(sum_tag_share+k))
        tags_arr=np.append(tags_arr, np.array([[tag_rel,i,tag_pref]]), axis=0)
    
    arr = np.append(arr, np.array([[row['name'], knn_percentages.loc[user,list].idxmax().replace('(%)', ''),str(round(knn_percentages.loc[user,list].max()))+"% of the recipes your simillar users rate",str(round(percentage_df_over4.loc[user,list].max()))+"% of the recipes you rate highly (over 4)",''.join([i[2] for i in tags_arr if i[1]==knn_percentages.loc[user,list].idxmax().replace('(%)', '')])+' / 5.0']]), axis=0)
    
plt.rcParams["figure.figsize"] =[12.50, 2.50]
fig, axs = plt.subplots(1, 1)
columns = ("Recipe Name", "The recipe has the tag","That appears in","Also appears in","And your overall preference score for this tag is")
axs.axis('tight')
axs.axis('off')
the_table = axs.table(cellText=arr,cellLoc="center", colLabels=columns, loc='center',colColours=np.full(len(columns), 'lavender'))
the_table.scale(2,4.5)
plt.title("Method 1 Results Table",y=1.8,fontsize=20) 
plt.show()


In [None]:
# launch web browser that allows displaying the food.com page with recommending recipes
import webbrowser
for index, row in recipes_final.head(5).iterrows():
    tmp=row['name'].replace(" ","-")
    if("-s-" in tmp):
        tmp=tmp.replace("-s-","s-")
    webbrowser.open("https://www.food.com/recipe/"+tmp+"-"+str(row['id']), new=1)

In [None]:
#implementation of method 2

top_recommendations_filtering=rec_df.sort_values(by='rating',ascending=False)
top_recommendations_filtering.rename(columns = {'recipe_id':'id'}, inplace = True)
recipes_final_filtering=recipes_final

while True:
    option = input("Enter your preferred filtering option(a,b,c): \n a)Keyword \n b)Nutrition \n c)Both \n") #asking for user to give his preferred filtering option
    if option=='a' or option=='c' :
        pref_keyword = input("Enter the prefered keyword: ") #asking for user to give his prefered tag/keyword

        recipes_final_filtering.drop(recipes_final_filtering.loc[recipes_final_filtering[pref_keyword]==False].index, inplace=True)

    if option=='b' or option=='c':
        pref_nutr = input("Enter your important nutrition(type the name of nutrition eg 'calories'): \n a)calories \n b)total fat (%) \n c)sugar (%) \n d)sodium (%) \n e)protein (%) \n f)saturated fat (%) \n g)carbohydrates (%) \n") #asking for user to give his important nutrition

        num_input= float(input("Enter the nutrition value you want")) 

        comparison = input("Enter your preferce: \n a)less or equal than nutrition value \n b)greater than nutrition value \n") 

        if(comparison=='a'):
            exp="less or equal than"
            recipes_final_filtering.drop(recipes_final_filtering.loc[recipes_final_filtering[pref_nutr].astype(float)>num_input].index, inplace=True)
        elif(comparison=='b'):
            exp="more than"
            recipes_final_filtering.drop(recipes_final_filtering.loc[recipes_final_filtering[pref_nutr].astype(float)<=num_input].index, inplace=True)
    arr1=np.empty((0,3),str)
    if option=='a':
        for index, row in recipes_final_filtering.head(5).iterrows():
            arr1 = np.append(arr1, np.array([[row['name'], pref_keyword,"Its your prefered keyword for recipes"]]), axis=0)
    elif option=='b':
        for index, row in recipes_final_filtering.head(5).iterrows():
            arr1 = np.append(arr1, np.array([[row['name'],pref_nutr, "You choose to search for recipes with "+exp+" "+str(num_input)+" "+pref_nutr]]), axis=0)
    else:
        for index, row in recipes_final_filtering.head(5).iterrows():
            arr1 = np.append(arr1, np.array([[row['name'], "Keyword: "+pref_keyword+" and Nutrition: "+pref_nutr,"You choose to search for "+pref_keyword+" recipes with "+exp+" "+str(num_input)+" "+pref_nutr]]), axis=0)

    plt.rcParams["figure.figsize"] =[12.50, 2.50]
    fig, axs = plt.subplots(1, 1)
    data = np.random.random((10, 4))
    columns = ("Recipe Name", "The reason","Because")
    axs.axis('tight')
    axs.axis('off')
    the_table = axs.table(cellText=arr1,cellLoc="center", colLabels=columns, loc='center',colColours=np.full(len(columns), 'lavender'))
    the_table.scale(2.5,4.5)
    plt.title("Method 2 Results Table",y=1.8,fontsize=20) 
    plt.show()
    cont_choise = input("Do you want to continue filtering? [Y/n] \n")
    if(cont_choise=='N' or cont_choise=='n'):
        break
        



In [None]:
import webbrowser
for index, row in recipes_final_filtering.head(5).iterrows():
    tmp=row['name'].replace(" ","-")
    if("-s-" in tmp):
        tmp=tmp.replace("-s-","s-")
    webbrowser.open("https://www.food.com/recipe/"+tmp+"-"+str(row['id']), new=1)

In [None]:
#create dataframes for each chronic disease that include the tags we are about to use to recommend recipes  
cancer = pd.DataFrame({'high-protein', 'high-fiber', 'low-saturated-fat', 'low-sodium'})
obesity = pd.DataFrame({'high-protein', 'low-fat', 'low-saturated-fat', 'low-calorie','high-calcium','low-carb'})
diabetes = pd.DataFrame({'diabetic'})
heart = pd.DataFrame({'high-protein', 'high-fiber', 'low-saturated-fat', 'low-sodium','low-cholesterol','low-carb'})
dental = pd.DataFrame({'high-protein', 'high-fiber','low-sodium','low-carb','high-calcium'})
osteoporosis = pd.DataFrame({'high-protein', 'high-calcium', 'low-saturated-fat', 'low-sodium'})
diab=False

In [None]:
#count how many diseases the user has
count_diseases=users_health.loc[user].value_counts().get(True, 0)
if users_health.loc[user].value_counts('Diabetes').get(True, 0)==1:
    diab=True
    count_diseases=count_diseases-1

In [None]:
#create the user health profile 
health_history_df=pd.DataFrame({})
for_explanation_print_diseases=''
if users_health.loc[user,'Cancer']:
    health_history_df=pd.concat([health_history_df, cancer], ignore_index=True)
    for_explanation_print_diseases=for_explanation_print_diseases+'Cancer patient,'
    
if users_health.loc[user,'Obesity']:
    health_history_df=pd.concat([health_history_df, obesity], ignore_index=True)
    for_explanation_print_diseases=for_explanation_print_diseases+'Obese,'
if users_health.loc[user,'Diabetes']:
    health_history_df=pd.concat([health_history_df, diabetes], ignore_index=True)
    for_explanation_print_diseases=for_explanation_print_diseases+'Diabetic,'

if users_health.loc[user,'Cardiovascular']:
    health_history_df=pd.concat([health_history_df, heart], ignore_index=True)
    for_explanation_print_diseases=for_explanation_print_diseases+'Ηeart patient,'

if users_health.loc[user,'Dental']:
    health_history_df=pd.concat([health_history_df, dental], ignore_index=True)
    for_explanation_print_diseases=for_explanation_print_diseases+'Dental patient,'

if users_health.loc[user,'Osteoporosis']:
    health_history_df=pd.concat([health_history_df, osteoporosis], ignore_index=True)
    for_explanation_print_diseases=for_explanation_print_diseases+'Osteoporosis patient,'
if count_diseases==0:
    for_explanation_print_diseases='Healthy user'


In [None]:
#when the user has no chronic disease
if not health_history_df.empty:
    health_history_df = health_history_df.rename(columns={0: 'tag'})
    health_history_df['occurrences'] = health_history_df.groupby('tag')['tag'].transform('count')
    health_history_df=health_history_df.drop_duplicates().sort_values(by='occurrences', ascending=False)
    health_history_df=health_history_df[health_history_df['occurrences'] == count_diseases]
    if diab==True:
        health_history_df=health_history_df.append({'tag':'diabetic'}, ignore_index = True)

    for x in health_history_df['tag'].astype(str).tolist():
        health_recipes_final_filtering_final=recipes_final[recipes_final[x]==True]

    if diab==True:
        health_recipes_final_filtering_final=recipes_final[recipes_final['diabetic']==True]


In [None]:
#visualizing the results of method 3
arr1=np.empty((0,3),str)

if not health_history_df.empty:
    for index, row in health_recipes_final_filtering_final.head(5).iterrows():
        for_explanation_print_tag=''
        for x in health_history_df['tag'].astype(str).tolist():
            if(row[x]==True ):
                if x=='diabetic':
                    for_explanation_print_tag=for_explanation_print_tag+x+'-friendly,'
                else:
                    for_explanation_print_tag=for_explanation_print_tag+x+','
                    
        arr1 = np.append(arr1, np.array([[row['name'],for_explanation_print_diseases,for_explanation_print_tag]]), axis=0)
    plt.rcParams["figure.figsize"] =[12.50, 2.50]
    #plt.rcParams["figure.autolayout"] = True
    fig, axs = plt.subplots(1, 1)
    data = np.random.random((10, 4))
    columns = ("Recipe Name", "Medical History","The reason that recipe is suitable for your diet")
    axs.axis('tight')
    axs.axis('off')
    the_table = axs.table(cellText=arr1,cellLoc="center", colLabels=columns, loc='center',colColours=np.full(len(columns), 'lavender'))
    the_table.scale(2.5,4.5)
    plt.title("Method 3 Results Table",y=1.8,fontsize=20) 
    plt.show()

In [None]:
import webbrowser
if not health_history_df.empty:
    for index, row in health_recipes_final_filtering_final.head(5).iterrows():
        tmp=row['name'].replace(" ","-")
        if("-s-" in tmp):
            tmp=tmp.replace("-s-","s-")
        webbrowser.open("https://www.food.com/recipe/"+tmp+"-"+str(row['id']), new=1)

In [None]:
#implementation of method 4
if not health_history_df.empty:
    health_top_recommendations_filtering=rec_df.sort_values(by='rating',ascending=False)
    health_top_recommendations_filtering.rename(columns = {'recipe_id':'id'}, inplace = True)
    health_recipes_final_filtering=top_recommendations_filtering.merge(recipes,how='inner',on='id')

    while True:
        option = input("Enter enter your preferred filtering option(a,b,c): \n a)Keyword \n b)Nutrition \n c)Both \n")
        if option=='a' or option=='c' :
            pref_keyword = input("Enter the prefered keyword: ") #asking for user to give his prefered tag/keyword

            #recipe_names_filtering=recipes_final_filtering.name.values.tolist()
            health_recipes_final_filtering_final.drop(health_recipes_final_filtering_final.loc[health_recipes_final_filtering_final[pref_keyword]==False].index, inplace=True)
            #rec_df=rec_df[recipes_final_filtering.pref_keyword!=False]

        if option=='b' or option=='c':
            pref_nutr = input("Enter your important nutrition(type the name of nutrition eg 'calories'): \n a)calories \n b)total fat(%) \n c)sugar (%) \n d)sodium(%) \n e)protein(%) \n f)saturated fat(%) \n g)carbohydrates(%) \n") #asking for user to give his important nutrition

            num_input= float(input("Enter the nutrition value you want")) 

            comparison = input("Enter your preferce: \n a)less or equal than nutrition value \n b)greater than nutrition value \n") 

            if(comparison=='a'):
                exp="less or equal than"
                health_recipes_final_filtering_final.drop(health_recipes_final_filtering_final.loc[health_recipes_final_filtering_final[pref_nutr].astype(float)>num_input].index, inplace=True)
            elif(comparison=='b'):
                exp="more than"
                health_recipes_final_filtering_final.drop(health_recipes_final_filtering_final.loc[health_recipes_final_filtering_final[pref_nutr].astype(float)<=num_input].index, inplace=True)
        arr1=np.empty((0,4),str)
        if option=='a':
            for index, row in health_recipes_final_filtering_final.head(5).iterrows():
                arr1 = np.append(arr1, np.array([[row['name'],for_explanation_print_diseases, pref_keyword,"Its your prefered keyword for recipes"]]), axis=0)
        elif option=='b':
            for index, row in health_recipes_final_filtering_final.head(5).iterrows():
                arr1 = np.append(arr1, np.array([[row['name'],for_explanation_print_diseases,pref_nutr, "You choose to search for recipes with "+exp+" "+str(num_input)+" "+pref_nutr]]), axis=0)
        else:
            for index, row in health_recipes_final_filtering_final.head(5).iterrows():
                arr1 = np.append(arr1, np.array([[row['name'],for_explanation_print_diseases, "Keyword: "+pref_keyword+" and Nutrition: "+pref_nutr,"You choose to search for "+pref_keyword+" recipes with "+exp+" "+str(num_input)+" "+pref_nutr]]), axis=0)

        plt.rcParams["figure.figsize"] =[12.50, 2.50]
        #plt.rcParams["figure.autolayout"] = True
        fig, axs = plt.subplots(1, 1)
        data = np.random.random((10, 4))
        columns = ("Recipe Name","Medical History", "The reason","Because")
        axs.axis('tight')
        axs.axis('off')
        the_table = axs.table(cellText=arr1,cellLoc="center", colLabels=columns, loc='center',colColours=np.full(len(columns), 'lavender'))
        the_table.scale(2.5,4.5)
        plt.title("Method 4 Results Table",y=1.8,fontsize=20) 
        plt.show()
        cont_choise = input("Do you want to continue filtering? [Y/n] \n")
        if(cont_choise=='N' or cont_choise=='n'):
            break

In [None]:
import webbrowser
if not health_history_df.empty:

    for index, row in health_recipes_final_filtering_final.head(5).iterrows():
        tmp=row['name'].replace(" ","-")
        if("-s-" in tmp):
            tmp=tmp.replace("-s-","s-")
        webbrowser.open("https://www.food.com/recipe/"+tmp+"-"+str(row['id']), new=1)

In [None]:
rat=ratings
#head=recipes_final.head(5)
for index, row in recipes_final.head(5).iterrows():
    rat_1_2=0
    rat3=0
    rat4_5=0
    #print(row['id'])    
    rate=rat.loc[(rat['recipe_id']== row['id']),'rating']
    rate = rate.value_counts().rename_axis('unique_values').to_frame('counts')
    if 5 in rate.index:
        rat4_5=rate.loc[5].values[0]+rat4_5
    if 4 in rate.index:
        rat4_5=rate.loc[4].values[0]+rat4_5
    if 3 in rate.index:
        rat3=rate.loc[3].values[0]
    if 2 in rate.index:
        rat_1_2=rate.loc[2].values[0]+rat_1_2
    if 1 in rate.index:
        rat_1_2=rate.loc[1].values[0]+rat_1_2
        
    plt.rcParams["figure.figsize"] = [7.50, 3.50]
    plt.rcParams["figure.autolayout"] = True

    x = ["1's and 2's", "3's", "4's and 5's"]
    y = [round(rat_1_2/(rat_1_2+rat3+rat4_5)*100), round(rat3/(rat_1_2+rat3+rat4_5)*100), round(rat4_5/(rat_1_2+rat3+rat4_5)*100)]

    width = 0.35
    fig, ax = plt.subplots()

    pps = ax.bar(x, y, width, align='center')

    for p in pps:
       height = p.get_height()
       ax.text(x=p.get_x() + p.get_width() / 2, y=height+.10,
          s="{}%".format(height),
          ha='center')

    plt.title("Total Ratings for Recommended Recipe "+ str(index+1), bbox={'facecolor':'0.8'})  

    plt.show()



    



In [None]:
rat=ratings
knn_u=knn.loc[user]

for index, row in recipes_final.head(5).iterrows():
    rat_1_2=0
    rat3=0
    rat4_5=0
    for i in knn_u :
        #print(row['id'])    
        rate=rat.loc[(rat['user_id']==i) & (rat['recipe_id']== row['id']),'rating']
        
        if ((rate.values==5) | (rate.values==4)) :
            rat4_5=rat4_5+1
        if ((rate.values==3)) :
            rat3=rat3+1
        if ((rate.values==1) | (rate.values==2)) :
            rat_1_2=rat_1_2+1 

    plt.rcParams["figure.figsize"] = [7.50, 3.50]
    plt.rcParams["figure.autolayout"] = True

    x = ["1's and 2's", "3's", "4's and 5's"]
    y = [round(rat_1_2/(rat_1_2+rat3+rat4_5)*100), round(rat3/(rat_1_2+rat3+rat4_5)*100), round(rat4_5/(rat_1_2+rat3+rat4_5)*100)]

    width = 0.35
    fig, ax = plt.subplots()

    pps = ax.bar(x, y, width, align='center')

    for p in pps:
       height = p.get_height()
       ax.text(x=p.get_x() + p.get_width() / 2, y=height+.10,
          s="{}%".format(height),
          ha='center')

    plt.title("Your Similar Users’ Ratings for Recommended Recipe "+ str(index+1), bbox={'facecolor':'0.8'})      #plt.title("Ratings of the people who share your interests and have watched this movie "+ str(index+1), bbox={'facecolor':'0.8'})  

    plt.show()
        #print(rat.loc[[i],[rat['recipe_id']==row['id']],['rating']]) 
