# Recipe Recommendation using Filtration Approach

### Importing Libraries

In [1]:
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import pymongo
import warnings
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

warnings.filterwarnings('ignore')

## User Inputs:

    1. Age
	2. Weight
	3. Height
	4. Gender
	5. Activeness Level {Sedentary / Light Activity / Moderate Activity / Active / Very Active}
	6. Health Issues {Diabetes / Hypertension / Hypotension / None}
	7. Your Goal {Lose Weight / Maintain Weight / Gain Weight}
    8. Food Preference {Veg / Non-Veg / Any}
	9. Any Bad Habits {Smoking / Alcohol / Both / None}
	10. Allergic to any specified Food (If any) {Blank Space to type}

<br>***Description of Activeness Levels / Activity Multiplier:***<br>

    1. Sedentary = BMR x 1.2 (little or no exercise, desk job)
    2. Lightly active = BMR x 1.375 (light exercise/ sports 1-3 days/week)
    3. Moderately active = BMR x 1.55 (moderate exercise/ sports 6-7 days/week)
    4. Very active = BMR x 1.725 (hard exercise every day, or exercising 2 hours/day)
    5. Extra active = BMR x 1.9 (hard exercise 2 or more times per day, or training for marathon, or triathlon, etc.)

## Custom Made Functions for this Approach:

### 1. A Function to Calculate Time Taken by Any Process

In [2]:
# Function to calculate total time taken:
def timer(start_time=None):
    from datetime import datetime
    if not start_time:
        start_time = datetime.now()
        return start_time
    elif start_time:
        thour, temp_sec = divmod((datetime.now() - start_time).total_seconds(), 3600)
        tmin, tsec = divmod(temp_sec, 60)
        print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))

### 2. A Function to Load Recipe Dataset from MongoDB

In [3]:
def get_data():
    
    print("\nProcess 1: Loading Dataset from MongoDB...")
    """Loading Dataset & Converting to DataFrame"""
    start_time = timer(None)
    default_connection_url = "mongodb+srv://tejash:Test123@cluster0.fciyb.mongodb.net/foodapp?retryWrites=true&w=majority"

    client = pymongo.MongoClient(default_connection_url)
    data = client["foodapp"]
    collection = data["foodapp_recipedata"].find().limit(1000)
    recipe_df = pd.DataFrame(list(collection))
    print("\nInformation on Recipe Data after Loading from MongoDB :\nNumber of Recipes loaded : {}\nNumber of Features in Recipe Data : {}".format(recipe_df.shape[0], recipe_df.shape[1]))
    timer(start_time)

    # """Code to load data with CSV file"""
    # recipe_df = pd.read_csv("Recipe_Nutr_Dataset_001.csv", nrows=10000)
    # recipe_df = recipe_df.drop(['Unnamed: 0', 'Unnamed: 0.1', "partition"], axis=1)
    # recipe_df.head()
    
    return recipe_df

### 3. A Function for Data Cleaning & Data Pre-processing 

In [4]:
def data_cleaning(recipe_df):
    
    print("\nProcess 2: Cleaning & Pre-processing Dataset...")
    start_time = timer(None)
    recipe_df = recipe_df.dropna(axis=0)
    recipe_df = recipe_df.drop("nutrition", axis=1)
    recipe_df["type"] = np.nan
    recipe_df.reset_index(inplace=True, drop=True)

    for i in range(len(recipe_df.ingredients)):
        lst = []
        ing = recipe_df.ingredients[i]
        for ingred in ing:
            ingredients_text = ingred['text']
            lst.append(ingredients_text)
            l = ' '.join(lst)
            recipe_df.ingredients[i] = str(l.lower())

    for i in range(len(recipe_df.instructions)):
        instructions = []
        ins = recipe_df.instructions[i]
        for instruc in ins:
            instructions_text = instruc['text']
            instructions.append(instructions_text)
            instructions_single_string = ' '.join(instructions)
            recipe_df.instructions[i] = str(instructions_single_string.lower())    
    
    df = recipe_df.copy()
    df1 = recipe_df.copy()
    full_col_lst = ['_id', 'id', 'title', "type", 'calories', 'carbohydrateContent', 'fiberContent', 'sugarContent', 'fatContent', 'saturatedFatContent', 'proteinContent', 'cholesterolContent', 'sodiumContent', 'ingredients', 'instructions', 'url']
    col_lst = ['calories', 'fatContent', 'saturatedFatContent', 'cholesterolContent', 'sodiumContent', 'carbohydrateContent', 'fiberContent', 'sugarContent', 'proteinContent']

    df = df.drop(full_col_lst, axis=1)

    for i in col_lst:
        locals()["lst_{}".format(i)] = list()
        for a in recipe_df[i]:
            try:
                if type(a) == str:
                    x = re.sub("[a-z]*", "", a)
                    locals()["lst_{}".format(i)].append(float(x))
                else:
                    locals()["lst_{}".format(i)].append(a)
            except Exception as e:
                print("Error: ", e)

    for index, i in enumerate(full_col_lst):
        if i in col_lst:
            locals()["{}_series".format(i)] = pd.Series(locals()["lst_{}".format(i)], name=i)
            df.insert(index, i, locals()["{}_series".format(i)])
        else:
            locals()["{}_series".format(i)] = df1.pop(i)
            df.insert(index, i, locals()["{}_series".format(i)])

    print("\nInformation on Recipe Data after Cleaning :\nNumber of Recipes loaded - {}\nNumber of Features in Recipe Data - {}".format(df.shape[0], df.shape[1]))
    timer(start_time)

    return df

### 4. A Function to Calculate BMR & TDEE

In [5]:
def calculate_bmr_tdee(age, weight, height, gender, activeness, goal, activity_multiplier):
    
    print("\nProcess 3: Calculating Basal Metabolic Rate(BMR) & Total Daily Energy Expenditure(TDEE)...")    
    if gender == "Male":
        bmr = round(5 + (10 * weight) + (6.25 * height) - (5 * age), ndigits=2)
        tdee = round(bmr * activity_multiplier.get(activeness), ndigits=2)
        print("\nUser's BMR - {}kcal & TDEE - {}kcal".format(bmr, tdee))
    else:
        bmr = round((9.247 * weight) + (3.098 * height) - (4.33 * age) - 161, ndigits=2)
        tdee = round(bmr * activity_multiplier.get(activeness), ndigits=2)
        print("\nUser's BMR - {}kcal & TDEE - {}kcal".format(bmr, tdee))
    
    
    if goal == "Lose Weight":
        final_tdee = tdee - 500
        print("Final TDEE - {}kcal".format(final_tdee))
    elif goal == "Maintain Weight":
        final_tdee = tdee
        print("Final TDEE - {}kcal".format(final_tdee))
    else:
        final_tdee = tdee + 500
        print("Final TDEE - {}kcal".format(final_tdee))
    return bmr, final_tdee

### 5. A Function for Calculating Nutritional Ranges as per Health Issues

In [6]:
def calculate_nutritional_limits_day(health_issues, final_tdee):
    
    print("\nProcess 4: Calculating Nutritional Ranges as per provided Health Issues...")    
    if health_issues == "Diabetes":
        min_carbs = round((final_tdee * 0.45) / 4, ndigits=2)
        max_carbs = round((final_tdee * 0.6) / 4, ndigits=2)
        min_fats = round((final_tdee * 0.20) / 9, ndigits=2)
        max_fats = round((final_tdee * 0.35) / 9, ndigits=2)
        min_proteins = round((final_tdee * 0.15) / 4, ndigits=2)
        max_proteins = round((final_tdee * 0.2) / 4, ndigits=2)
        cholesterol = 200
        sodium = 2300

    elif health_issues == "Hypertension":
        min_carbs = round((final_tdee * 0.4) / 4, ndigits=2)
        max_carbs = round((final_tdee * 0.55) / 4, ndigits=2)
        min_fats = round((final_tdee * 0.25) / 9, ndigits=2)
        max_fats = round((final_tdee * 0.27) / 9, ndigits=2)
        min_proteins = round((final_tdee * 0.12) / 4, ndigits=2)
        max_proteins = round((final_tdee * 0.18) / 4, ndigits=2)
        cholesterol = 150
        sodium = 1500

    elif health_issues == "Hypotension":
        min_carbs = round((final_tdee * 0.26) / 4, ndigits=2)
        max_carbs = round((final_tdee * 0.44) / 4, ndigits=2)
        min_fats = round((final_tdee * 0.25) / 9, ndigits=2)
        max_fats = round((final_tdee * 0.27) / 9, ndigits=2)
        min_proteins = round((final_tdee * 0.12) / 4, ndigits=2)
        max_proteins = round((final_tdee * 0.15) / 4, ndigits=2)
        cholesterol = 200
        sodium = 2300

    elif health_issues == "Hypertension & Diabetes" or health_issues == "Diabetes & Hypertension":
        min_carbs = round((final_tdee * 0.4) / 4, ndigits=2)
        max_carbs = round((final_tdee * 0.55) / 4, ndigits=2)
        min_fats = round((final_tdee * 0.2) / 9, ndigits=2)
        max_fats = round((final_tdee * 0.27) / 9, ndigits=2)
        min_proteins = round((final_tdee * 0.12) / 4, ndigits=2)
        max_proteins = round((final_tdee * 0.18) / 4, ndigits=2)
        cholesterol = 150
        sodium = 1500

    elif health_issues == "Hypotension & Diabetes" or health_issues == "Diabetes & Hypotension":
        min_carbs = round((final_tdee * 0.26) / 4, ndigits=2)
        max_carbs = round((final_tdee * 0.44) / 4, ndigits=2)
        min_fats = round((final_tdee * 0.2) / 9, ndigits=2)
        max_fats = round((final_tdee * 0.27) / 9, ndigits=2)
        min_proteins = round((final_tdee * 0.135) / 4, ndigits=2)
        max_proteins = round((final_tdee * 0.175) / 4, ndigits=2)
        cholesterol = 200
        sodium = 2300        
        
    else:
        min_carbs = round((final_tdee * 0.5) / 4, ndigits=2)
        max_carbs = round((final_tdee * 0.6) / 4, ndigits=2)
        min_fats = round((final_tdee * 0.25) / 9, ndigits=2)
        max_fats = round((final_tdee * 0.3) / 9, ndigits=2)
        min_proteins = round((final_tdee * 0.12) / 4, ndigits=2)
        max_proteins = round((final_tdee * 0.2) / 4, ndigits=2)
        cholesterol = 300
        sodium = 2300
    return min_carbs, max_carbs, min_fats, max_fats, min_proteins, max_proteins, cholesterol, sodium

### 6. A Function to Filter Recipes based on Food Preferences

Veg Recipes are denoted by 1 & Non-Veg Recipes are denoted by 0

In [7]:
def food_type_finder(food_type, recipe_df):
    
    print("\nProcess 5: Started Filtering Recipes based on provided Food Preferences...")    
    non_veg = ['Chicken', 'Turkey', 'Ham', 'Pork Chops', 'Beef', 'Lamb', 'Moose Meat', 'Venison', 'Caribou', 'Elk', 'Buffalo', 'Bear Meat', 'Veal', 'Salmon', 'Trout', 'Tilapia', 'Mahi Mahi', 'Cod', 'Halibut', 'Pike', 'Mackerel', 'Sea Bass', 'Tuna', 'Shark', 'Swordfish', 'Catfish', 'Carp', 'Flounder', 'Red Snapper', 'Perch', 'Sturgeon', 'Striped Bass', 'Barramundi', 'Red Mullet', 'Whale Meat', 'Anchovy', 'Sardines', 'Herring', 'Eel', 'Octopus', 'Squid', 'Cuttlefish', 'Pufferfish', 'Oysters', 'Clams', 'Mussels', 'Cockles', 'Scallops', 'Shrimp', 'Prawns', 'Lobster', 'Crab', 'Crayfish', 'Conch', 'Percebes', 'Goose Barnacles', 'Abalone', 'Escargot', 'Whelks', 'Frog Legs', 'Snake', 'Bacon', 'Duck', 'Cornish Game Hen', 'Goose', 'Grouse', 'Pheasant', 'Quail', 'Squab', 'Pigeon', 'Guineafowl', 'Guinea Fowl', 'Partridge', 'Snipe', 'Swan', 'Goat', 'Rabbit', 'Horse Meat', 'Antelope Meat', 'Emu', 'Kangaroo', 'Ostrich', 'Guinea Pig', 'Crickets', 'Tripe', 'Mutton', 'Bison', 'Alligator', 'Crocodile', 'Seal Meat', 'Dolphin', 'Alpaca Meat', 'Turtle Meat', 'Capybara', 'Possum Meat', 'Donkey Meat', 'Wild Boar', 'Squirrel', 'Hare', 'Wolf Meat', 'Zebra Meat', 'Beondegi', 'Steamed Silkworm Pupae', 'Sago Grubs', 'Chocolate Covered Ants', 'Llama Meat', 'Yak Meat', 'Impala Meat', 'Wildebeest Meat', 'Dog Meat', 'Cat Meat', 'Salt-Cured Meat', 'Salted Meat', 'Head Cheese', 'Sausage', 'Smoked Meat', 'Steak Tartare', 'Turducken', 'Barbecue', 'Ribs', 'Ground Meat', 'Bone Marrow', 'Pork Belly', 'Foie Gras', 'Balut', 'Shashlik Kebab', 'Haggis', 'Liver', 'Tongue', 'Svio', 'Boiled Sheep Head', 'Fish Head', 'Pigs Feet', 'Chicken Feet', 'Jellyfish', 'Sea Cucumber', 'Geoduck', 'Saumagen', 'Stuffed Pig Stomach', 'Beef Tendon', 'Stew', 'Shawarma', 'Meatballs', 'Pulled Pork', 'Fried Chicken', 'Chicharones', 'Rocky Mountain Oysters', 'Gizzard', 'Hot Dogs', 'Hamburger Patties', 'Spam', 'Deli Meat', 'Jerky', 'Cutlet', 'Meat Loaf', 'Scrapple', 'Pot Roast', 'Brisket', 'Kobe Beef', 'Oxtail', 'Sashimi', 'Poke', 'Dried Fish', 'Satay', 'Veggie Meat']
    non_veg = [x.lower() for x in non_veg]
    
    old_rows = recipe_df.shape[0]
    lst = list(recipe_df.ingredients)

    for i in range(len(lst)):
        count = 0
        x = lst[i].split(" ")
        for a in x:
            if a in non_veg:
                recipe_df["type"][i] = 0
                break
            else:
                count += 1
                if count == len(x):
                    recipe_df["type"][i] = 1
                    break
    
    if food_type == "Non-Veg":
        recipe_df = recipe_df[recipe_df.type == 0]
    elif food_type == "Veg":
        recipe_df = recipe_df[recipe_df.type == 1]
    else:
        recipe_df = recipe_df
    new_rows = recipe_df.shape[0]
    print("\nOut of {}, {} are droped by Process 5\nProcess 5: Completed".format(old_rows, (old_rows - new_rows)))
    recipe_df.reset_index(inplace=True, drop=True)
    
    return recipe_df

### 7. A Function to Filter Recipes based on User's Allergies

In [8]:
def filter_allergic_recipes(recipe_df, lst_allergy):
    
    print("\nProcess 6: Started Filtering Recipes based on provided User's Allergies...")    
    df = recipe_df.copy()

    old_rows = recipe_df.shape[0]
    stopwords_lst = ['cups', 'cup', 'tablespoons', 'tablespoon', 'box', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]
    # lst_ingredients = list()
    unique_ingredients_lst = list()
    lemmatizer = WordNetLemmatizer()

    for i in df.ingredients.values:
        i = re.sub('[^A-Za-z]+', ' ', i)  # removes punctuations
        i = " ".join(lemmatizer.lemmatize(a) for a in i.split() if a not in stopwords_lst)
        i = re.sub(' +', ' ', i)  # To remove extra spaces
        non_repetitive_ingredients = list()

        for ing in i.split():
            if ing not in non_repetitive_ingredients:
                non_repetitive_ingredients.append(ing)
        unique_ingredients_lst.append(non_repetitive_ingredients)


    def index_of_food(food_list):
        remove_food = []
        for index, i in enumerate(unique_ingredients_lst):
            for j in i:
                if j in lst_allergy:
                    remove_food.append(index)
        return remove_food

    junk = index_of_food(lst_allergy)

    desired_indices = [i for i in range(len(unique_ingredients_lst)) if i not in junk]

    recipe_df = df.loc[desired_indices]
    recipe_df.reset_index(inplace=True, drop=True)
    new_rows = recipe_df.shape[0]
    print("\nOut of {}, {} are droped by Process 6\nProcess 6: Completed".format(old_rows, (old_rows - new_rows)))
    return recipe_df

### 8. Final Function to call all above Functions & Perform Final Filtration on Recipe Dataset

In [9]:
def recommend_recipes_with_allergies(age, weight, height, gender, health_issues, activeness, goal, food_type, lst_allergy, activity_multiplier, per_meal=0.35):
    
    recipe_df = get_data()
    recipe_df = data_cleaning(recipe_df)
    bmr, final_tdee = calculate_bmr_tdee(age, weight, height, gender, activeness, goal, activity_multiplier)
    min_carbs, max_carbs, min_fats, max_fats, min_proteins, max_proteins, cholesterol, sodium = calculate_nutritional_limits_day(health_issues, final_tdee)


    """Maximum amount of Nutrients Per Meal"""
    max_carbohydrate = round(max_carbs * per_meal, ndigits=2)
    max_fat = round(max_fats * per_meal, ndigits=2)
    max_protein = round(max_proteins * per_meal, ndigits=2)
    max_cholesterol = round(cholesterol * per_meal, ndigits=2)
    max_sodium = round(sodium * per_meal, ndigits=2)
    
    print("\nMaximum Amount of Nutrients under which all Recommended Recipes should come :")
    print("Maximum Carbohydrates for a Meal - {}g\nMaximum Fats for a Meal - {}g\nMaximum Proteins for a Meal - {}g\nMaximum Cholesterol for a Meal - {}mg\nMaximum Sodium for a Meal - {}mg\n".format(max_carbohydrate, max_fat, max_protein, max_cholesterol, max_sodium))

    recipe_df_new = food_type_finder(food_type, recipe_df)
    recipe_df_new = filter_allergic_recipes(recipe_df_new, lst_allergy)
    
    print("\nProcess 7: Final Filtration Process Started...")
    old_rows = recipe_df_new.shape[0]
    recommended_recipes_df = recipe_df_new[(recipe_df_new["fatContent"] < max_fat) & (recipe_df_new["carbohydrateContent"] < max_carbohydrate) & (recipe_df_new["proteinContent"] < max_protein) & (recipe_df_new["cholesterolContent"] < max_cholesterol) & (recipe_df_new["sodiumContent"] < max_sodium)]
    recommended_recipes_df.reset_index(inplace=True, drop=True)
    new_rows = recommended_recipes_df.shape[0]
    print("\nOut of {}, {} are droped using Final Filtration Process.\nProcess 7: Completed".format(old_rows, (old_rows - new_rows)))

    print("\nRecommended Recipes for User : {} Recipes".format(recommended_recipes_df.shape[0]))

    return recipe_df, recommended_recipes_df

def recommend_recipes_without_allergies(age, weight, height, gender, health_issues, activeness, goal, food_type, activity_multiplier, per_meal=0.35):
    
    recipe_df = get_data()
    recipe_df = data_cleaning(recipe_df)
    bmr, final_tdee = calculate_bmr_tdee(age, weight, height, gender, activeness, goal, activity_multiplier)
    min_carbs, max_carbs, min_fats, max_fats, min_proteins, max_proteins, cholesterol, sodium = calculate_nutritional_limits_day(health_issues, final_tdee)


    """Maximum amount of Nutrients Per Meal"""
    max_carbohydrate = round(max_carbs * per_meal, ndigits=2)
    max_fat = round(max_fats * per_meal, ndigits=2)
    max_protein = round(max_proteins * per_meal, ndigits=2)
    max_cholesterol = round(cholesterol * per_meal, ndigits=2)
    max_sodium = round(sodium * per_meal, ndigits=2)
    
    print("\nMaximum Amount of Nutrients under which all Recommended Recipes should come :")
    print("Maximum Carbohydrates for a Meal - {}g\nMaximum Fats for a Meal - {}g\nMaximum Proteins for a Meal - {}g\nMaximum Cholesterol for a Meal - {}mg\nMaximum Sodium for a Meal - {}mg\n".format(max_carbohydrate, max_fat, max_protein, max_cholesterol, max_sodium))

    recipe_df_new = food_type_finder(food_type, recipe_df)
    
    print("\nProcess 6: Final Filtration Process Started...")
    recommended_recipes_df = recipe_df_new[(recipe_df_new["fatContent"] < max_fat) & (recipe_df_new["carbohydrateContent"] < max_carbohydrate) & (recipe_df_new["proteinContent"] < max_protein) & (recipe_df_new["cholesterolContent"] < max_cholesterol) & (recipe_df_new["sodiumContent"] < max_sodium)]
    recommended_recipes_df.reset_index(inplace=True, drop=True)
    print("\nProcess 6: Completed")

    print("\nRecommended Recipes for User : {} Recipes".format(recommended_recipes_df.shape[0]))

    return recipe_df, recommended_recipes_df


## Taking User Inputs & Calling Recommender System for Recommendation

In [10]:
"""Activity Multiplier as Dictionary"""
activity_multiplier = {
    "Sedentary": 1.2,
    "Lightly Active": 1.375,
    "Moderately Active": 1.55,
    "Very Active": 1.725,
    "Extra Active": 1.9
}


"""Percentage of Calorie intake in a Meal"""
per_meal = 0.35


"""Taking Inputs from Users"""

print("USER INPUTS:\n")
age = int(input("Age : "))
weight = int(input("Weight (in Kg) : "))
height = int(input("Height (in cm) : "))
gender = input("Gender {Male / Female} : ")
health_issues = input("Any Health Issues {Diabetes / Hypertension / Hypotension / None} : ")
activeness = input("Activeness Level {Sedentary / Lightly Active / Moderately Active / Very Active / Extra Active} : ")
goal = input("Lose Weight / Maintain Weight / Gain Weight : ")
food_type = input("Veg / Non-Veg / Any : ")

yes_no = input("Are you Allergic to any kind of Food or Ingredients {Yes / No} : ")

if yes_no == "Yes":
    lst_allergy = []
    allergy = input("Please provide all the ingredients you are allergic with {Give only a Space between each Allergy} : ")

    for i in allergy.split():
        lst_allergy.append(i.lower())
    recipe_df, recommended_recipes_df = recommend_recipes_with_allergies(age, weight, height, gender, health_issues, activeness, goal, food_type, lst_allergy, activity_multiplier, per_meal)

elif yes_no == "No":
    recipe_df, recommended_recipes_df = recommend_recipes_without_allergies(age, weight, height, gender, health_issues, activeness, goal, food_type, activity_multiplier, per_meal)
    pass

# """Let's get some Recipe's Recommendation"""
# start_time = timer(None)
# recipe_df, recommended_recipes_df = recommend_recipes(age, weight, height, gender, health_issues, activeness, goal, food_type, activity_multiplier, per_meal)
# timer(start_time)

USER INPUTS:

Age : 22
Weight (in Kg) : 75
Height (in cm) : 174
Gender {Male / Female} : Male
Any Health Issues {Diabetes / Hypertension / Hypotension / None} : None
Activeness Level {Sedentary / Lightly Active / Moderately Active / Very Active / Extra Active} : Sedentary
Lose Weight / Maintain Weight / Gain Weight : Lose Weight
Veg / Non-Veg / Any : Any
Are you Allergic to any kind of Food or Ingredients {Yes / No} : Yes
Please provide all the ingredients you are allergic with {Give only a Space between each Allergy} : Butter Strawberry Corn

Process 1: Loading Dataset from MongoDB...

Information on Recipe Data after Loading from MongoDB :
Number of Recipes loaded : 1000
Number of Features in Recipe Data : 17

 Time taken: 0 hours 0 minutes and 2.61 seconds.

Process 2: Cleaning & Pre-processing Dataset...

Information on Recipe Data after Cleaning :
Number of Recipes loaded - 1000
Number of Features in Recipe Data - 17

 Time taken: 0 hours 0 minutes and 13.56 seconds.

Process 3: C

In [11]:
recommended_recipes_df

Unnamed: 0,_id,id,title,type,calories,carbohydrateContent,fiberContent,sugarContent,fatContent,saturatedFatContent,proteinContent,cholesterolContent,sodiumContent,ingredients,instructions,url,image_url
0,60e214669f6805b90e52631c,00011fc1f9,Lentils Vegetable Soup,1.0,178.1,16.9,6.0,4.1,10.6,1.5,5.5,0.0,51.7,1 cup lentils 12 onion (finely chopped) 1 toma...,"saute the onions, carrots, celery with olive o...",http://www.food.com/recipe/lentils-vegetable-s...,https://delightfulvegans.com/wp-content/upload...
1,60e214669f6805b90e526326,00029df38f,Praline Kisses,1.0,48.0,5.8,0.4,5.2,2.8,0.2,0.5,0.0,10.3,"1 egg white, room temperature 18 teaspoon salt...",preheat oven to 325 degrees farenheit. grease ...,http://www.food.com/recipe/praline-kisses-189697,https://img-global.cpcdn.com/recipes/7a0ca7685...
2,60e214669f6805b90e526336,00054e238b,Half-Time Sunshine Bars,1.0,123.6,21.5,1.6,6.5,3.5,0.5,2.1,9.3,67.8,"1 12 cups old fashioned oats 1 cup flour, all-...",heat oven to 375 degrees and grease a 13x9-inc...,http://www.food.com/recipe/half-time-sunshine-...,https://bakeorbreak.com/wp-content/uploads/201...
3,60e214669f6805b90e52634c,00089c3010,Easy Italian-Style Chili,0.0,298.0,25.7,5.4,3.0,12.2,4.6,20.5,51.4,299.9,1 lb ground beef (80% lean) 1 cup chopped onio...,brown meat and onion in large saucepan; drain....,http://www.food.com/recipe/easy-italian-style-...,https://champagne-tastes.com/wp-content/upload...
4,60e214669f6805b90e52635b,0009dae0d8,Zucchini Carrot Spice Muffins,1.0,232.0,33.3,0.9,18.7,9.9,1.6,2.9,26.4,221.3,2 cups sugar 3 eggs 1 cup oil 2 teaspoons vani...,preheat oven to 350*. squeeze excess moisture ...,http://www.food.com/recipe/zucchini-carrot-spi...,https://images-na.ssl-images-amazon.com/images...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,60e214669f6805b90e5266d3,007ef0f9a3,Honey Whipped Cream,1.0,128.1,7.4,0.0,6.6,11.0,6.8,0.6,40.8,11.7,1 cup heavy whipping cream 3 tablespoons honey...,beat whipping cream until it starts to thicken...,http://www.food.com/recipe/honey-whipped-cream...,https://goldengrain-bakery.com/wp-content/uplo...
306,60e214669f6805b90e5266f6,0083031ddf,Blueberry Trifle,1.0,455.0,71.0,1.3,53.0,16.5,10.2,8.4,54.8,509.3,"1 (8 ounce) package cream cheese, softened 12 ...","mix together cream cheese, and powdered sugar....",http://www.food.com/recipe/blueberry-trifle-30...,https://cdn.greatlifepublishing.net/wp-content...
307,60e214669f6805b90e5266f3,0082a58df7,Deceptively Delicious Vegan Chocolate Cookies,1.0,84.8,14.8,0.7,9.1,2.9,1.1,1.0,0.0,52.0,14 cup applesauce 2 tablespoons oil 34 cup sug...,"cream applesauce, oil, sugar and 1 t water. mi...",http://www.food.com/recipe/deceptively-delicio...,https://shortgirltallorder.com/wp-content/uplo...
308,60e214669f6805b90e5266ec,0081edad7d,Champagne St Moritz,1.0,44.3,1.6,0.0,1.3,0.0,0.0,0.1,0.0,0.4,1 ounce gin 1 ounce apricot brandy 1 ounce ora...,"shake gin, brandy and orange juice with ice. s...",http://www.food.com/recipe/champagne-st-moritz...,https://upload.travelawaits.com/ta/uploads/202...


In [12]:
recommended_recipes_df.type.value_counts()

1.0    234
0.0     76
Name: type, dtype: int64

In [13]:
recipe_df

Unnamed: 0,_id,id,title,type,calories,carbohydrateContent,fiberContent,sugarContent,fatContent,saturatedFatContent,proteinContent,cholesterolContent,sodiumContent,ingredients,instructions,url,image_url
0,60e214669f6805b90e526327,0002e15d76,Party Dog Appetizers,0.0,718.5,19.2,1.0,12.7,54.9,18.7,36.2,108.3,2925.6,"1 onion, finely chopped 3 tablespoons oil 1 cu...",grease a 13 x 9-inch baking dish. in a large s...,http://www.food.com/recipe/party-dog-appetizer...,http://www.thecomfortofcooking.com/wp-content/...
1,60e214669f6805b90e52631c,00011fc1f9,Lentils Vegetable Soup,1.0,178.1,16.9,6.0,4.1,10.6,1.5,5.5,0.0,51.7,1 cup lentils 12 onion (finely chopped) 1 toma...,"saute the onions, carrots, celery with olive o...",http://www.food.com/recipe/lentils-vegetable-s...,https://delightfulvegans.com/wp-content/upload...
2,60e214669f6805b90e526326,00029df38f,Praline Kisses,1.0,48.0,5.8,0.4,5.2,2.8,0.2,0.5,0.0,10.3,"1 egg white, room temperature 18 teaspoon salt...",preheat oven to 325 degrees farenheit. grease ...,http://www.food.com/recipe/praline-kisses-189697,https://img-global.cpcdn.com/recipes/7a0ca7685...
3,60e214669f6805b90e526336,00054e238b,Half-Time Sunshine Bars,1.0,123.6,21.5,1.6,6.5,3.5,0.5,2.1,9.3,67.8,"1 12 cups old fashioned oats 1 cup flour, all-...",heat oven to 375 degrees and grease a 13x9-inc...,http://www.food.com/recipe/half-time-sunshine-...,https://bakeorbreak.com/wp-content/uploads/201...
4,60e214669f6805b90e52634c,00089c3010,Easy Italian-Style Chili,0.0,298.0,25.7,5.4,3.0,12.2,4.6,20.5,51.4,299.9,1 lb ground beef (80% lean) 1 cup chopped onio...,brown meat and onion in large saucepan; drain....,http://www.food.com/recipe/easy-italian-style-...,https://champagne-tastes.com/wp-content/upload...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,60e214669f6805b90e5266f9,00834d6bff,Three-layer Brownies,1.0,385.0,48.5,0.8,37.3,19.4,5.0,5.1,20.8,185.3,1 (19 1/2 ounce) box dark chocolate brownie mi...,prepare the dark chocolate brownie mix as dire...,http://www.food.com/recipe/three-layer-brownie...,https://bromabakery.com/wp-content/uploads/201...
996,60e214669f6805b90e5266d0,007e9cfd78,Vegetable Rolls,0.0,53.1,8.5,0.6,1.2,1.5,0.7,1.6,12.7,121.5,"1 carrot, grated 1 zucchini, grated 12 small r...","combine vegetables, barbecue sauce and stuffin...",http://www.food.com/recipe/vegetable-rolls-336161,https://www.tamarindnthyme.com/wp-content/uplo...
997,60e214669f6805b90e5266ea,0081bb2b38,Creamy Potato Leek Soup,0.0,386.2,44.3,5.6,4.2,18.6,10.8,11.8,61.1,1231.8,"12 cup sliced leek, washed well 2 cans chicken...",bring broth to a boil and reduce to medium. ad...,http://www.food.com/recipe/creamy-potato-leek-...,https://www.bakerita.com/wp-content/uploads/20...
998,60e214669f6805b90e5266ec,0081edad7d,Champagne St Moritz,1.0,44.3,1.6,0.0,1.3,0.0,0.0,0.1,0.0,0.4,1 ounce gin 1 ounce apricot brandy 1 ounce ora...,"shake gin, brandy and orange juice with ice. s...",http://www.food.com/recipe/champagne-st-moritz...,https://upload.travelawaits.com/ta/uploads/202...


***|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|***

#### Shubham's Code

In [14]:
"""Cleaning & Pre-processing Nutrientional Values in Dataset as some are object & some are int/float in type."""

# col_lst = ['calories', 'fatContent', 'saturatedFatContent', 'cholesterolContent', 'sodiumContent', 'carbohydrateContent', 'fiberContent', 'sugarContent', 'proteinContent']

# lst_calories = []
# lst_fatContent = []
# lst_saturatedFatContent = []
# lst_cholesterolContent = []
# lst_sodiumContent = []
# lst_carbohydrateContent = []
# lst_fiberContent = []
# lst_sugarContent = []
# lst_proteinContent = []


# for i in col_lst:
#     print("\n{}\n".format(i))
#     for a in tqdm(recipe_df[i]):
#         try:
#             if type(a) == str:
#                 x = re.sub("[a-z]*", "", a)
#                 locals()["lst_{}".format(i)].append(float(x))
#             else:
#                 locals()["lst_{}".format(i)].append(a)
#         except Exception as e:
#             print("Error: ", e)
#     print("{}".format(i), len(locals()["lst_{}".format(i)]))

'Cleaning & Pre-processing Nutrientional Values in Dataset as some are object & some are int/float in type.'

In [15]:
"""Converting Nutrient's Lists into Series & Re-ordering Features"""

# df = recipe_df.copy()
# df1 = recipe_df.copy()
# full_col_lst = ['_id', 'id', 'title', 'calories', 'carbohydrateContent', 'fiberContent', 'sugarContent', 'fatContent', 'saturatedFatContent', 'proteinContent', 'cholesterolContent', 'sodiumContent', 'ingredients', 'instructions', 'url', 'image_url']

# col_lst = ['calories', 'fatContent', 'saturatedFatContent', 'cholesterolContent', 'sodiumContent', 'carbohydrateContent', 'fiberContent', 'sugarContent', 'proteinContent']

# df = df.drop(full_col_lst, axis=1)

# for index, i in enumerate(full_col_lst):
#     if i in col_lst:
#         locals()["{}_series".format(i)] = pd.Series(locals()["lst_{}".format(i)], name=i)
#         df.insert(index, i, locals()["{}_series".format(i)])        
# #         print("{} - {}".format(i, len(locals()["lst_{}".format(i)])))
#     else:
# #         locals()["{}_series".format(i)] = pd.Series(list(recipe_df[i]), name=i)
#         locals()["{}_series".format(i)] = df1.pop(i)
#         df.insert(index, i, locals()["{}_series".format(i)])
# #         print("{} - {}".format(i, len(locals()["{}_series".format(i)])))
# df

"Converting Nutrient's Lists into Series & Re-ordering Features"

***|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|***
#### Ankur's Code

In [16]:
"""Code for Ingredients & Instructions Pre-processing"""

# for i in range(len(recipe_df.ingredients)):
#     lst = []
#     ing = recipe_df.ingredients[i]
#     for ingred in ing:
#         ingredients_text = ingred['text']
#         lst.append(ingredients_text)
#         l = ' '.join(lst)
#         recipe_df.ingredients[i] = str(l.lower())

# for i in range(len(recipe_df.instructions)):
#     instructions = []
#     ins = recipe_df.instructions[i]
#     for instruc in ins:
#         instructions_text = instruc['text']
#         instructions.append(instructions_text)
#         instructions_single_string = ' '.join(instructions)
#         recipe_df.instructions[i] = str(instructions_single_string.lower())

"""Code for Filtering Recipes based on Veg or Non-Veg"""

# non_veg = ['Chicken', 'Turkey', 'Ham', 'Pork Chops', 'Beef', 'Lamb', 'Moose Meat', 'Venison', 'Caribou', 'Elk', 'Buffalo', 'Bear Meat', 'Veal', 'Salmon', 'Trout', 'Tilapia', 'Mahi Mahi', 'Cod', 'Halibut', 'Pike', 'Mackerel', 'Sea Bass', 'Tuna', 'Shark', 'Swordfish', 'Catfish', 'Carp', 'Flounder', 'Red Snapper', 'Perch', 'Sturgeon', 'Striped Bass', 'Barramundi', 'Red Mullet', 'Whale Meat', 'Anchovy', 'Sardines', 'Herring', 'Eel', 'Octopus', 'Squid', 'Cuttlefish', 'Pufferfish', 'Oysters', 'Clams', 'Mussels', 'Cockles', 'Scallops', 'Shrimp', 'Prawns', 'Lobster', 'Crab', 'Crayfish', 'Conch', 'Percebes', 'Goose Barnacles', 'Abalone', 'Escargot', 'Whelks', 'Frog Legs', 'Snake', 'Bacon', 'Duck', 'Cornish Game Hen', 'Goose', 'Grouse', 'Pheasant', 'Quail', 'Squab', 'Pigeon', 'Guineafowl', 'Guinea Fowl', 'Partridge', 'Snipe', 'Swan', 'Goat', 'Rabbit', 'Horse Meat', 'Antelope Meat', 'Emu', 'Kangaroo', 'Ostrich', 'Guinea Pig', 'Crickets', 'Tripe', 'Mutton', 'Bison', 'Alligator', 'Crocodile', 'Seal Meat', 'Dolphin', 'Alpaca Meat', 'Turtle Meat', 'Capybara', 'Possum Meat', 'Donkey Meat', 'Wild Boar', 'Squirrel', 'Hare', 'Wolf Meat', 'Zebra Meat', 'Beondegi', 'Steamed Silkworm Pupae', 'Sago Grubs', 'Chocolate Covered Ants', 'Llama Meat', 'Yak Meat', 'Impala Meat', 'Wildebeest Meat', 'Dog Meat', 'Cat Meat', 'Salt-Cured Meat', 'Salted Meat', 'Head Cheese', 'Sausage', 'Smoked Meat', 'Steak Tartare', 'Turducken', 'Barbecue', 'Ribs', 'Ground Meat', 'Bone Marrow', 'Pork Belly', 'Foie Gras', 'Balut', 'Shashlik Kebab', 'Haggis', 'Liver', 'Tongue', 'Svio', 'Boiled Sheep Head', 'Fish Head', 'Pigs Feet', 'Chicken Feet', 'Jellyfish', 'Sea Cucumber', 'Geoduck', 'Saumagen', 'Stuffed Pig Stomach', 'Beef Tendon', 'Stew', 'Shawarma', 'Meatballs', 'Pulled Pork', 'Fried Chicken', 'Chicharones', 'Rocky Mountain Oysters', 'Gizzard', 'Hot Dogs', 'Hamburger Patties', 'Spam', 'Deli Meat', 'Jerky', 'Cutlet', 'Meat Loaf', 'Scrapple', 'Pot Roast', 'Brisket', 'Kobe Beef', 'Oxtail', 'Sashimi', 'Poke', 'Dried Fish', 'Satay', 'Veggie Meat']
# non_veg = [x.lower() for x in non_veg]

# lst = list(recipe_df.ingredients)

# recipe_df["type"] = np.nan

# for i in range(len(lst)):
#     count = 0
#     x = lst[i].split(" ")
#     for a in x:
#         if a in non_veg:
#             recipe_df["type"][i] = 1
#             break
#         else:
#             count += 1
#             if count == len(x):
#                 recipe_df["type"][i] = 0
#                 break

"""Code for Filtering Recipes based on Allergies"""

# # lst_allergy = []
# # for i in range(int(input("Numbers : "))):
# #     x = input("allergy : ")
# #     lst_allergy.append(x.lower())
# allergy = input("Please provide all the ingredients you are allergic with : ")

# lst_allergy = []
# for i in allergy.split():
#     lst_allergy.append(i)

# lst_allergy

# df = recipe_df.copy()

# stopwords_lst = ['cups', 'cup', 'tablespoons', 'tablespoon', 'box', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]
# # lst_ingredients = list()
# unique_ingredients_lst = list()
# lemmatizer = WordNetLemmatizer()

# for i in df.ingredients.values:
#     i = re.sub('[^A-Za-z]+', ' ', i)  # removes punctuations
#     i = " ".join(lemmatizer.lemmatize(a) for a in i.split() if a not in stopwords_lst)
#     i = re.sub(' +', ' ', i)  # To remove extra spaces
#     non_repetitive_ingredients = list()
    
#     for ing in i.split():
#         if ing not in non_repetitive_ingredients:
#             non_repetitive_ingredients.append(ing)
#     unique_ingredients_lst.append(non_repetitive_ingredients)


# def index_of_food(food_list):
#     remove_food = []
#     for index, i in enumerate(unique_ingredients_lst):
#         for j in i:
#             if j in lst_allergy:
#                 remove_food.append(index)
#     return remove_food

# junk = index_of_food(lst_allergy)

# desired_indices = [i for i in range(len(unique_ingredients_lst)) if i not in junk]

# recipe_df = df.loc[desired_indices]
# recipe_df

'Code for Filtering Recipes based on Allergies'

***|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|***