### Final Recommender

This is our final recomendation mode...here we used both content and colaborative model to recomend dishes

### Recommendation Model

In [36]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.neighbors import NearestNeighbors
from collections import Counter
from sklearn.metrics import mean_squared_error
from math import sqrt
import nltk

import warnings
warnings.filterwarnings('ignore')


#

In [37]:
class Recommender:
    
    def __init__(self):
        self.df = pd.read_csv('dataset.csv')
        self.profiles = pd.read_csv('user_profiles.csv')
        self.recent_activity = pd.read_csv('recent_activity.csv')
    
    ## This method takes a DataFrame as input and processes the columns nutrient, disease, and diet by creating dummy variables for each unique value. 
    ## The resulting DataFrame contains these dummy variables as features. It returns the processed DataFrame.
    def get_features(self,dataframe):
        #getting dummies of dataset
        nutrient_dummies = dataframe.nutrient.str.get_dummies(sep=' ')
        disease_dummies = dataframe.disease.str.get_dummies(sep=' ')
        diet_dummies = dataframe.diet.str.get_dummies(sep=' ')
        feature_df = pd.concat([nutrient_dummies,disease_dummies,diet_dummies],axis=1)
        return feature_df


    ## find_neighbors(self, dataframe, features, k): Given a DataFrame, a list of features, and the number of neighbors to find (k), 
    ## this method converts the features into a binary dictionary and creates a list of 1s and 0s corresponding to the presence or absence of each feature. 
    ## It then calls the k_neighbor method, passing the feature vectors and the DataFrame, to find the k nearest neighbors based on the features. 
    ## It returns the DataFrame of similar neighbors.    
    def find_neighbors(self, dataframe, features, k):
        features_df = self.get_features(dataframe)
        total_features = features_df.columns  
        d = dict()
        for i in total_features:
            d[i]= 0
        for i in features:
            d[i] = 1
        final_input = list(d.values())
        
        similar_neighbors = self.k_neighbor([final_input],features_df,dataframe,k)
        return similar_neighbors

    
    ## k_neighbor(self, inputs, feature_df, dataframe, k): This method initializes a NearestNeighbors model with k neighbors and fits it with the feature DataFrame. 
    ## It then uses the model to find the k nearest neighbors for the given feature inputs. The resulting indices are used to retrieve the corresponding rows from the original DataFrame (dataframe). 
    ## The retrieved DataFrame is returned as the results.
    def k_neighbor(self,inputs,feature_df,dataframe,k):
        
        #initializing model with k neighbors
        model = NearestNeighbors(n_neighbors=k,algorithm='ball_tree')
        
        # fitting model with dataset features
        model.fit(feature_df)
        
        df_results = pd.DataFrame(columns=list(dataframe.columns))
        
        # getting distance and indices for k nearest neighbor
        distnaces , indices = model.kneighbors(inputs)

        for i in list(indices):
            df_results = df_results.append(dataframe.loc[i])

        df_results = df_results.reset_index(drop=True)
        return df_results

    
    ## user_based(self, features, user_id, max_count): This method finds similar users based on the provided features by calling the find_neighbors method on the profiles DataFrame. 
    ## It then retrieves recent activities from the recent_activity DataFrame for those similar users, filters out activities reviewed by the current user, and selects the meals associated with those activities from the df DataFrame. 
    ## The relevant columns are extracted, duplicates are removed, and the resulting DataFrame is returned.
    def user_based(self,features,user_id, max_count):
       
        similar_users = self.find_neighbors(self.profiles,features, max_count)
        users = list(similar_users.user_id)

        results = self.recent_activity[self.recent_activity.user_id.isin(users)] #taking acitivies
   
        results = results[results['user_id']!=user_id] # selecting those which are not reviewed by user
 
        meals = list(results.meal_id.unique())
      
        results = self.df[self.df.meal_id.isin(meals)]
    
        results = results.filter(['meal_id','name','nutrient','veg_nonveg','description','price','review'])

        results = results.drop_duplicates(subset=['name'])
        results = results.reset_index(drop=True)
        return results

        
    ## recent_activity_based(self, user_id, max_count): This method retrieves the recent activities of a given user from the recent_activity DataFrame. 
    ## It extracts the nutrients, diseases, and diets associated with the meals in the activities and calculates the features based on the occurrence of each attribute. 
    ## It then calls the find_neighbors method on the df DataFrame to find similar meals based on these features. The resulting DataFrame is returned.
    def recent_activity_based(self,user_id, max_count):
        recent_df = self.recent_activity[self.recent_activity['user_id']==user_id]
        meal_ids = list(recent_df.meal_id.unique())
        recent_data = self.df[self.df.meal_id.isin(meal_ids)][['nutrient','category','disease','diet']].reset_index(drop=True)

        nutrient = []
        disease = []
        diet = []
        
        for i in range(recent_data.shape[0]):
            for j in recent_data.loc[i,'nutrient'].split():
                nutrient.append(j)
        for i in range(recent_data.shape[0]):
            for j in recent_data.loc[i,'disease'].split():
                disease.append(j)
        for i in range(recent_data.shape[0]):
            for j in recent_data.loc[i,'diet'].split():
                diet.append(j)
        
        features = []

        a = dict(Counter(nutrient))
        m = np.mean(list(a.values()))
        for i in a.items():
            if i[1]>m:
                features.append(i[0])
        

        a = dict(Counter(disease))
        m = np.mean(list(a.values()))
        for i in a.items():
            if i[1]>m:
                features.append(i[0])
        
        a = dict(Counter(diet))
        m = np.mean(list(a.values()))
        for i in a.items():
            if i[1]>m:
                features.append(i[0])
                
        similar_neighbors = self.find_neighbors(self.df,features, max_count)
        return similar_neighbors.filter(['meal_id','name','nutrient','veg_nonveg', 'diet', 'disease', 'description','price','review'])

        
    def recommend(self,user_id, max_count):
        #finding user's profile features by id
        profile = self.profiles[self.profiles['user_id']==user_id]
        features = []
        features.extend(profile['nutrient'].values[0].split())
        features.extend(profile['disease'].values[0].split())
        features.extend(profile['diet'].values[0].split())
        df1 = self.user_based(features,user_id, max_count)
 
        df2 = self.recent_activity_based(user_id, max_count)
        df = pd.concat([df1,df2])
      
        df = df.drop_duplicates('meal_id').reset_index(drop=True)
        return df.dropna()
    
    



In [38]:
class Profile:
    
    # df = pd.read_csv('dataset.csv').drop('Unnamed: 0',axis=1) # static variable
    # static variable
    
    def __init__(self, df, diet, disease, nutrient, food_type=[], favorite_food=""):
        self.df=df
        self.diet = diet
        self.disease = disease
        self.nutrient = nutrient
        self.type = food_type
        self.like = favorite_food
        self.df2 = pd.DataFrame(columns=list(self.df.columns))
        self.df3 = pd.DataFrame(columns=list(self.df.columns))
        self.df4 = pd.DataFrame(columns=list(self.df.columns))
        self.df5 = pd.DataFrame(columns=list(self.df.columns))
        self.df6 = pd.DataFrame(columns=list(self.df.columns))
        
    def removestop(self, tokens):
        return [token for token in tokens if token not in list(open('stopwords.txt','r').read().split())]
            
    def get_parsed_list(self, list_str):
      try: return list_str.split()
      except: return []


    def inputs(self, user_diet_list, user_disease_list, user_nutrient_list, food_type, favorite_food):
       
        if food_type:
            self.df2 = self.df2[self.df.veg_nonveg.isin(food_type)]
            self.df2 = self.df2.reset_index()
      
        if user_diet_list:
            for i in range(self.df.shape[0]):
                diet_list = self.get_parsed_list(self.df.loc[i,'diet'])
                for user_diet in user_diet_list:
                    if user_diet in diet_list:
                        self.df3 = self.df3.append(self.df.loc[i])
 
        if user_disease_list:
            for i in range(self.df.shape[0]):
                disease_list = self.get_parsed_list(self.df.loc[i,'disease'])
                for user_disease in user_disease_list:
                    if user_disease in disease_list:
                        self.df4 = self.df4.append(self.df.loc[i])

        if user_nutrient_list:
            for i in range(self.df.shape[0]):
                nutrient_list = self.get_parsed_list(self.df.loc[i,'nutrient'])
                for user_nutrient in user_nutrient_list:
                    if user_nutrient in nutrient_list:
                        self.df5 = self.df5.append(self.df.loc[i])

        if favorite_food:
            fav_food_list = self.removestop(favorite_food.split())
            for i in range(self.df.shape[0]):
                name_list = [name.lower() for name in str(self.df.loc[i,'name']).split()]
                for name in name_list:
                    for food_name in fav_food_list:
                        if food_name==name:
                            self.df6 = self.df6.append(self.df.loc[i])
            for i in range(self.df.shape[0]):
                name_list = [name.lower() for name in self.get_parsed_list(self.df.loc[i,'ingredient'])]
                for name in name_list:
                    for food_name in fav_food_list:
                        if food_name==name:
                            self.df6 = self.df6.append(self.df.loc[i])
            for i in range(self.df.shape[0]):
                name_list = [name.lower() for name in str(self.df.loc[i,'category']).split()]
                for name in name_list:
                    for food_name in fav_food_list:
                        if food_name ==name:
                            self.df6 = self.df6.append(self.df.loc[i])
            
        return self.df2, self.df3, self.df4, self.df5, self.df6
    
    def get_profile(self):
        df2,df3,df4,df5,df6 = self.inputs(self.diet, self.disease, self.nutrient, self.type, self.like)
        
        df_merge = pd.concat([df2,df3,df4,df5,df6],axis=0).drop_duplicates(subset='name')
        df_merge = df_merge.filter(['name','veg_nonveg','nutrient','ingredient','price','review','description', 'diet', 'disease'])
        print(df_merge.shape)
        
        return df_merge



### Prediction

`recommend(self, user_id, max_count)` combines the recommendations from the user_based and recent_activity_based methods. 
1. It first retrieves the features from the user's profile and calls user_based to get user-based recommendations. 
2. It then calls recent_activity_based to get recommendations based on recent activity. 
3. The results from both methods are concatenated, duplicates are removed, and the final DataFrame is returned.

In [39]:
user_id = 36

ob = Recommender()
result = ob.recommend(f'user_{user_id}', 15)
result


Unnamed: 0,meal_id,name,nutrient,veg_nonveg,description,price,diet,disease
2,meal_id_424,Carrot Halwa With Mango Bavarin,fiber,veg,https://food.ndtv.com/recipe-carrot-halwa-with...,370,high_fiber_diet,pregnancy hypertension
3,meal_id_455,Homemade Snickers Bar,carbohydrates,veg,https://food.ndtv.com/recipe-homemade-snickers...,440,low_sodium_diet high_fiber_diet,heart_disease diabeties
4,meal_id_251,Warm Apple &amp; Kinnu Winter Punch,carbohydrates,veg,https://food.ndtv.com/recipe-warm-apple-kinnu-...,460,high_protien_diet high_fiber_diet,hypertension diabeties
5,meal_id_262,Veg Summer Rolls,carbohydrates,veg,https://food.ndtv.com/recipe-veg-summer-rolls-...,235,ketogenic_diet high_fiber_diet,heart_disease diabeties
6,meal_id_408,Shahi Gulaab &amp; Thandai Bon Bons,magnesium,veg,https://food.ndtv.com/recipe-shahi-gulaab-than...,550,high_protien_diet,goitre hypertension diabeties
7,meal_id_575,Crazy Stupid Smoothie,selenium,veg,https://food.ndtv.com/recipe-crazy-stupid-smoo...,515,ketogenic_diet high_protien_diet type_a_diet,pregnancy goitre
8,meal_id_291,Moongfali Til Laddoo,magnesium copper,veg,https://food.ndtv.com/recipe-moongfali-til-lad...,335,high_protien_diet,goitre hypertension diabeties
9,meal_id_232,X'mas Brownie Tree,magnesium,veg,https://food.ndtv.com/recipe-xmas-brownie-tree...,460,high_protien_diet,heart_disease goitre hypertension diabeties
10,meal_id_541,Chamomile And Lavender Tea,manganese,veg,https://food.ndtv.com/recipe-chamomile-and-lav...,210,low_fat_diet high_protien_diet ketogenic_diet,heart_disease kidney_disease
11,meal_id_534,Strawberry Frozen Margarita,vitamin_c,veg,https://food.ndtv.com/recipe-strawberry-frozen...,530,dash_diet vegan_diet high_fiber_diet low_sodiu...,scurvy


### Evaluation


#### Evaluation Setup

In [40]:
def get_values(df, col_name):
  col_values = []
  for value in df[col_name].values:
    col_values.extend(value.split())
  return list(set(col_values))

user_df = pd.read_csv('dataset.csv')
user_df = user_df[user_df['user_id']==user_id]

user_nutrient_list = get_values(user_df, 'nutrient')
user_diet_list = get_values(user_df, 'diet')
user_disease_list = get_values(user_df, 'disease')


predicted_nutrient_list = get_values(result, 'nutrient')
predicted_diet_list = get_values(result, 'diet')
predicted_disease_list = get_values(result, 'disease')


ob = Profile(df=pd.read_csv('dataset.csv'), diet=user_diet_list, disease=user_disease_list, nutrient=user_nutrient_list)
profile_based_result = ob.get_profile()

vanilla_nutrient_list = get_values(profile_based_result, 'nutrient')
vanilla_diet_list = get_values(profile_based_result, 'diet')
vanilla_disease_list = get_values(profile_based_result, 'disease')

# print(user_nutrient_list)
# print(predicted_nutrient_list)

# print(user_diet_list)
# print(predicted_diet_list)

# print(user_disease_list)
# print(predicted_disease_list)


## calculate_accuracy(p_list, u_list) takes two lists (p_list for predicted values, u_list for user values), removes duplicates, 
## counts the common elements between the lists, and returns the accuracy as a percentage.
def calculate_accuracy(p_list, u_list):
  u_list = list(set(u_list))
  p_list = list(set(p_list))

  count = 0
  for u in u_list:
    if u in p_list:
      count += 1
  return (count/len(u_list))*100

## calculate_accuracy_2(p_list, u_list) takes two lists (p_list for predicted values, u_list for user values), 
## creates a mapping of unique values from both lists to integers, converts the original lists to the mapped values, 
## calculates the Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE) between the mapped values, and returns the calculated MAE and RMSE.
def calculate_accuracy_2(p_list, u_list):
  u_list = list(set(u_list))
  p_list = list(set(p_list))
  map = {}
  count = 1;
  for u in u_list: 
    map[u] = count
    count += 1

  for p in p_list:
    if p not in map:
      map[p] = count
      count += 1
  
  u_list2 = [map[u] for u in u_list]
  p_list2 = [map[p] for p in p_list]

  # create sample true and predicted values
  true_values = np.array(u_list2)
  pred_values = np.array(p_list2)

  # find common indices
  common_indices = np.intersect1d(np.arange(len(true_values)), np.arange(len(pred_values)))

  # extract common values
  true_values_common = true_values[common_indices]
  pred_values_common = pred_values[common_indices]

  # calculate MAE
  mae = np.mean(np.abs(true_values_common - pred_values_common))  
  # calculate RMSE
  rmse = np.sqrt(np.mean((true_values_common - pred_values_common) ** 2))

  return mae, rmse

(591, 8)


#### RMS & AM Score Evaluation

In [41]:
nutrient_mae, nutrient_rmse = calculate_accuracy_2(predicted_nutrient_list, user_nutrient_list)
diet_mae, diet_rmse = calculate_accuracy_2(predicted_diet_list, user_diet_list)
disease_mae, disease_rmse = calculate_accuracy_2(predicted_disease_list, user_disease_list)

vanilla_nutrient_mae, vanilla_nutrient_rmse = calculate_accuracy_2(vanilla_nutrient_list, user_nutrient_list)
vanilla_diet_mae, vanilla_diet_rmse = calculate_accuracy_2(vanilla_diet_list, user_diet_list)
vanilla_disease_mae, vanilla_disease_rmse = calculate_accuracy_2(vanilla_disease_list, user_disease_list)


print(f"{'Mean Absolute Error of nutrient attributes':<50s}: {nutrient_mae:.2f}%")  
print(f"{'Mean Absolute Error of diet attributes':<50s}: {diet_mae:.2f}%")  
print(f"{'Mean Absolute Error of disease attributes':<50s}: {disease_mae:.2f}%")  
print("\n")
print(f"{'Root Mean Squared Error of nutrient attributes':<50s}: {nutrient_rmse:.2f}%")  
print(f"{'Root Mean Squared Error of diet attributes':<50s}: {diet_rmse:.2f}%")  
print(f"{'Root Mean Squared Error of disease attributes':<50s}: {disease_rmse:.2f}%")  

print("\n")
print("\n")

print(f"{'Mean Absolute Error of nutrient attributes with Vanilla(Bruteforce Algorithm)':<85s}: {vanilla_nutrient_mae:.2f}%")  
print(f"{'Mean Absolute Error of diet attributes with Vanilla(Bruteforce Algorithm)':<85s}: {vanilla_diet_mae:.2f}%")  
print(f"{'Mean Absolute Error of disease attributes with Vanilla(Bruteforce Algorithm)':<85s}: {vanilla_disease_mae:.2f}%")  
print("\n")
print(f"{'Root Mean Squared Error of nutrient attributes with Vanilla(Bruteforce Algorithm)':<85s}: {vanilla_nutrient_rmse:.2f}%")  
print(f"{'Root Mean Squared Error of diet attributes with Vanilla(Bruteforce Algorithm)':<85s}: {vanilla_diet_rmse:.2f}%")  
print(f"{'Root Mean Squared Error of disease attributes with Vanilla(Bruteforce Algorithm)':<85s}: {vanilla_disease_rmse:.2f}%")  

Mean Absolute Error of nutrient attributes        : 8.88%
Mean Absolute Error of diet attributes            : 3.44%
Mean Absolute Error of disease attributes         : 0.22%


Root Mean Squared Error of nutrient attributes    : 10.36%
Root Mean Squared Error of diet attributes        : 4.28%
Root Mean Squared Error of disease attributes     : 0.47%




Mean Absolute Error of nutrient attributes with Vanilla(Bruteforce Algorithm)        : 6.33%
Mean Absolute Error of diet attributes with Vanilla(Bruteforce Algorithm)            : 3.56%
Mean Absolute Error of disease attributes with Vanilla(Bruteforce Algorithm)         : 0.50%


Root Mean Squared Error of nutrient attributes with Vanilla(Bruteforce Algorithm)    : 7.78%
Root Mean Squared Error of diet attributes with Vanilla(Bruteforce Algorithm)        : 4.47%
Root Mean Squared Error of disease attributes with Vanilla(Bruteforce Algorithm)     : 1.05%


#### BLUE Score Evaluation

In [42]:
nutrient_bleu = nltk.translate.bleu_score.sentence_bleu([user_nutrient_list], predicted_nutrient_list, weights=(1,))
diet_bleu = nltk.translate.bleu_score.sentence_bleu([user_diet_list], predicted_diet_list, weights=(1,))
disease_bleu = nltk.translate.bleu_score.sentence_bleu([user_disease_list], predicted_disease_list, weights=(1,))

print(f"{'BLEU-1 of nutrient attributes':<50s}: {nutrient_bleu:.2f}")  
print(f"{'BLEU-1 of diet attributes':<50s}: {diet_bleu:.2f}")  
print(f"{'BLEU-1 of disease attributes':<50s}: {disease_bleu:.2f}")  

BLEU-1 of nutrient attributes                     : 0.21
BLEU-1 of diet attributes                         : 0.73
BLEU-1 of disease attributes                      : 0.89


#### GLEU Score Evaluation

In [43]:
nutrient_gleu = nltk.translate.gleu_score.sentence_gleu([user_nutrient_list], predicted_nutrient_list, max_len=1)
diet_gleu = nltk.translate.gleu_score.sentence_gleu([user_diet_list], predicted_diet_list, max_len=1)
disease_gleu = nltk.translate.gleu_score.sentence_gleu([user_disease_list], predicted_disease_list, max_len=1)

print(f"{'GLEU of nutrient attributes':<50s}: {nutrient_gleu:.2f}")  
print(f"{'GLEU of diet attributes':<50s}: {diet_gleu:.2f}")  
print(f"{'GLEU of disease attributes':<50s}: {disease_gleu:.2f}")  

GLEU of nutrient attributes                       : 0.27
GLEU of diet attributes                           : 0.73
GLEU of disease attributes                        : 0.90


#### NIST Score Evaluation

In [44]:
nutrient_nist = nltk.translate.nist_score.sentence_nist([user_nutrient_list], predicted_nutrient_list, n=1)
diet_nist = nltk.translate.nist_score.sentence_nist([user_diet_list], predicted_diet_list, n=1)
disease_nist = nltk.translate.nist_score.sentence_nist([user_disease_list], predicted_disease_list, n=1)

print(f"{'NIST of nutrient attributes':<50s}: {nutrient_nist:.2f}")  
print(f"{'NIST of diet attributes':<50s}: {diet_nist:.2f}")  
print(f"{'NIST of disease attributes':<50s}: {disease_nist:.2f}")  

NIST of nutrient attributes                       : 0.37
NIST of diet attributes                           : 2.31
NIST of disease attributes                        : 3.17


#### RIBES Score Evaluation

In [45]:
nutrient_ribes = nltk.translate.ribes_score.sentence_ribes([user_nutrient_list], predicted_nutrient_list)
diet_ribes = nltk.translate.ribes_score.sentence_ribes([user_diet_list], predicted_diet_list)
disease_ribes = nltk.translate.ribes_score.sentence_ribes([user_disease_list], predicted_disease_list)

print(f"{'RIBES of nutrient attributes':<50s}: {nutrient_ribes:.2f}")  
print(f"{'RIBES of diet attributes':<50s}: {diet_ribes:.2f}")  
print(f"{'RIBES of disease attributes':<50s}: {disease_ribes:.2f}")  

RIBES of nutrient attributes                      : 0.13
RIBES of diet attributes                          : 0.53
RIBES of disease attributes                       : 0.30


#### F1 Score Evaluation

In [46]:
def calculate_f1_score(p_list, u_list):
  u_list = list(set(u_list))
  p_list = list(set(p_list))

  true_positives = len(set(u_list) & set(p_list))
  false_positives = len(set(p_list) - set(u_list))
  false_negatives = len(set(u_list) - set(p_list))

  precision = true_positives / (true_positives + false_positives)
  recall = true_positives / (true_positives + false_negatives)
  f1 = 2 * (precision * recall) / (precision + recall)

  return f1

nutrient_f1 = calculate_f1_score(predicted_nutrient_list, user_nutrient_list)
diet_f1 = calculate_f1_score(predicted_diet_list, user_diet_list)
disease_f1 = calculate_f1_score(predicted_disease_list, user_disease_list)

print(f"{'F1 score of nutrient attributes':<50s}: {nutrient_f1:.2f}")
print(f"{'F1 score of diet attributes':<50s}: {diet_f1:.2f}")
print(f"{'F1 score of disease attributes':<50s}: {disease_f1:.2f}")

F1 score of nutrient attributes                   : 0.35
F1 score of diet attributes                       : 0.80
F1 score of disease attributes                    : 0.95


### Evaluation Overview

In [47]:
evalutaion_overview = {
    'Name': ['Nutrient', 'Diet', 'Disease'],
    'AM Score': [nutrient_mae, diet_mae, disease_mae],
    'RMS Score': [nutrient_rmse, diet_rmse, disease_rmse],
    'BLEU Score': [nutrient_bleu, diet_bleu, disease_bleu],
    'GLEU Score': [nutrient_gleu, diet_gleu, disease_gleu],
    'NIST Score': [nutrient_nist, diet_nist, disease_nist],
    'RIBES Score': [nutrient_ribes, diet_ribes, disease_ribes],
    'F1 Score': [nutrient_f1, diet_f1, disease_f1],
}

pd.DataFrame(evalutaion_overview)

Unnamed: 0,Name,AM Score,RMS Score,BLEU Score,GLEU Score,NIST Score,RIBES Score,F1 Score
0,Nutrient,8.875,10.362191,0.208431,0.266667,0.369198,0.128408,0.347826
1,Diet,3.444444,4.281744,0.727273,0.727273,2.3054,0.527699,0.8
2,Disease,0.222222,0.471405,0.894839,0.9,3.170034,0.302179,0.947368
