# First Model-1::  Model Building using food_data_db_v2 but data loading from SQL DB

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import warnings

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder

# for postgres SQL database
import psycopg2

# Filter and ignore warnings
warnings.filterwarnings("ignore")

### Configuration

In [None]:
import logging

VERSION = "v0.1.0"
RELEASE_DATE = "16 Jun 2023"

##### DATABASE RELATED #####
DB_CONFIG_LOCAL = {
    "dbname": "food_db",
    "host": "localhost",
    "port": 5432,
    "username": "postgres",
    "password": "postgres"
}

DB_CONFIG_CLOUD = {
    "dbname": "food_db",
    "host": "",
    "port": 5432,
    "username": "postgres",
    "password": ""
}

# select which database (local or cloud)
DB_CONFIG = DB_CONFIG_LOCAL

### Database Functions

In [None]:
class DatabaseAccess:
    def __init__(self, db_config):
        self.conn = psycopg2.connect(
                            database=db_config['dbname'],
                            user=db_config['username'],
                            password=db_config['password'],
                            host=db_config['host'],
                            port=db_config['port'])
    def getConnection(self):
        return self.conn

In [None]:
# create database connection
gbl_db_conn = DatabaseAccess(DB_CONFIG).getConnection()

In [None]:
# read food_data from db
df_food_data_sql = pd.read_sql_query('select * from "food_data"',con=gbl_db_conn)

In [None]:
df_food_data_sql.head()

Unnamed: 0,food_id,food_name,food_description,ingredients,ingredients_clean,nutrition,veg_or_non_veg,allergies,cuisine,course,diet,state,region,seasons,dietary_restrictions,data_source,created_time
0,137739,arriba baked winter squash mexican style,autumn is my favorite time of year to cook! th...,"[winter squash, mexican seasoning, mixed spice...","[winter squash, mexican seasoning, mixed spice...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",veg,"[honey, milk, lactose, olive, squash, dairy]",,,,,,[autumn],"[low fat, low lactose]",raw_recipes,2023-06-17 15:32:27.077620
1,31490,a bit different breakfast pizza,this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi...","[prepared pizza crust, sausage patty, egg, mil...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",non-veg,"[gluten, milk, lactose, dairy]",,,,,,[],"[low carb, low caffeine]",raw_recipes,2023-06-17 15:32:27.083003
2,112140,all in the kitchen chili,this modified version of 'mom's' chili was a h...,"[ground beef, yellow onions, diced tomatoes, t...","[ground beef, yellow onion, diced tomato, toma...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",non-veg,"[legume, ltp, milk, lactose, nightshade, dairy]",,,,,,"[autumn, winter]",[],raw_recipes,2023-06-17 15:32:27.083932
3,59389,alouette potatoes,"this is a super easy, great tasting, make ahea...","[spreadable cheese with garlic and herbs, new ...","[spreadable cheese with garlic and herb, new p...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",veg,"[hypersensitivity, milk, lactose, olive, potat...",,,,,,[],[],raw_recipes,2023-06-17 15:32:27.084828
4,44061,amish tomato ketchup for canning,my dh's amish mother raised him on this recipe...,"[tomato juice, apple cider vinegar, sugar, sal...","[tomato juice, apple cider vinegar, sugar, sal...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",veg,"[nightshade, oral, sugar]",,,,,,[],[low fat],raw_recipes,2023-06-17 15:32:27.085716


In [None]:
df_food_data_sql.columns

Index(['food_id', 'food_name', 'food_description', 'ingredients',
       'ingredients_clean', 'nutrition', 'veg_or_non_veg', 'allergies',
       'cuisine', 'course', 'diet', 'state', 'region', 'seasons',
       'dietary_restrictions', 'data_source', 'created_time'],
      dtype='object')

## Model with top 5 recommended foods basis user input

In [None]:
#import pandas as pd
#from sklearn.feature_extraction.text import TfidfVectorizer
#from sklearn.metrics.pairwise import linear_kernel

#def recommend_food(user_input):
    # Filter based on veg_or_non_veg preference
 #   model_data_filtered = model_data[model_data["veg_or_non_veg"] == user_input['veg_or_non_veg']]
  #  print(f"After veg/non-veg filter: {len(model_data_filtered)} dishes left")

    # Filter out any foods that contain ingredients user is allergic to
   # for allergen in user_input['allergy']:
    #    model_data_filtered = model_data_filtered[~model_data_filtered['ingredients_str_lemmatized'].str.contains(allergen)]
     # print(f"After removing dishes containing {allergen}: {len(model_data_filtered)} dishes left")

    # Filter based on dietary restrictions
    # if user_input['dietary_restrictions'] != "no dietary restriction":
      #  model_data_filtered = model_data_filtered[model_data_filtered["dietary_restrictions"] == user_input['dietary_restrictions']]
       # print(f"After dietary restriction filter: {len(model_data_filtered)} dishes left")

    # Filter based on seasonal preference
   # model_data_filtered = model_data_filtered[model_data_filtered["seasonal_preference"] == user_input['seasonal_preference']]
   # print(f"After seasonal preference filter: {len(model_data_filtered)} dishes left")

    # Check if there's anything left
   # if len(model_data_filtered) == 0:
    #    print("No dishes left after filtering. Please try different preferences.")
     #   return None, None

    # Use TF-IDF to vectorize the ingredients_str_lemmatized
    #tfidf = TfidfVectorizer(stop_words='english')
    #model_data_filtered['ingredients_str_lemmatized'] = model_data_filtered['ingredients_str_lemmatized'].fillna('')
    #tfidf_matrix = tfidf.fit_transform(model_data_filtered['ingredients_str_lemmatized'])

    # Vectorize user's ingredients preference
    #user_pref_vector = tfidf.transform([user_input['ingredients_str_lemmatized']])

    # Calculate cosine similarity
    #cosine_similarities = linear_kernel(user_pref_vector, tfidf_matrix).flatten()

    # Get top 5 most similar items
    #top_5_indices = cosine_similarities.argsort()[:-6:-1]

    # Get corresponding cosine similarity scores
    #top_5_scores = cosine_similarities[top_5_indices]

    # Return names of the dishes and their cosine similarity scores
    #return model_data_filtered.iloc[top_5_indices]['name'], top_5_scores

# User inputs
#veg_or_non_veg = input("Please enter your preference (veg/non-veg): ")
#seasonal_preference = input("Please enter your seasonal preference: ")
#allergy = input("Please enter your allergies (separated by comma): ").split(", ")
#dietary_restrictions = input("Please enter your dietary restrictions: ")
#ingredients_str_lemmatized = input("Please enter your preferred ingredients: ")

#user_input = {
 #   'veg_or_non_veg': veg_or_non_veg,
  #  'seasonal_preference': seasonal_preference,
   # 'allergy': allergy,
   # 'dietary_restrictions': dietary_restrictions,
   # 'ingredients_str_lemmatized': ingredients_str_lemmatized
#}

#recommendations, scores = recommend_food(user_input)
#print("\nTop 5 food recommendations for you:")
#for i in range(5):
 #   print(f"{i+1}. {recommendations.iloc[i]} with a cosine similarity of {scores[i]}")


In [None]:
columns = ["food_id", "food_name", "seasons", "ingredients_clean", "allergies", "dietary_restrictions", "veg_or_non_veg"]
model_data = df_food_data_sql[columns]

In [None]:
model_data.head()

Unnamed: 0,food_id,food_name,seasons,ingredients_clean,allergies,dietary_restrictions,veg_or_non_veg
0,137739,arriba baked winter squash mexican style,[autumn],"[winter squash, mexican seasoning, mixed spice...","[honey, milk, lactose, olive, squash, dairy]","[low fat, low lactose]",veg
1,31490,a bit different breakfast pizza,[],"[prepared pizza crust, sausage patty, egg, mil...","[gluten, milk, lactose, dairy]","[low carb, low caffeine]",non-veg
2,112140,all in the kitchen chili,"[autumn, winter]","[ground beef, yellow onion, diced tomato, toma...","[legume, ltp, milk, lactose, nightshade, dairy]",[],non-veg
3,59389,alouette potatoes,[],"[spreadable cheese with garlic and herb, new p...","[hypersensitivity, milk, lactose, olive, potat...",[],veg
4,44061,amish tomato ketchup for canning,[],"[tomato juice, apple cider vinegar, sugar, sal...","[nightshade, oral, sugar]",[low fat],veg


In [None]:
# model expect comma seperate element in string form
model_data["ingredients_clean"] = model_data["ingredients_clean"].apply(lambda x_list: ", ".join(x_list))
model_data["dietary_restrictions"] = model_data["dietary_restrictions"].apply(lambda x_list: ", ".join(x_list))
model_data["seasons"] = model_data["ingredients_clean"].apply(lambda x_list: ", ".join(x_list))
model_data.head()

Unnamed: 0,food_id,food_name,seasons,ingredients_clean,allergies,dietary_restrictions,veg_or_non_veg
0,137739,arriba baked winter squash mexican style,"w, i, n, t, e, r, , s, q, u, a, s, h, ,, , m...","winter squash, mexican seasoning, mixed spice,...","[honey, milk, lactose, olive, squash, dairy]","low fat, low lactose",veg
1,31490,a bit different breakfast pizza,"p, r, e, p, a, r, e, d, , p, i, z, z, a, , c...","prepared pizza crust, sausage patty, egg, milk...","[gluten, milk, lactose, dairy]","low carb, low caffeine",non-veg
2,112140,all in the kitchen chili,"g, r, o, u, n, d, , b, e, e, f, ,, , y, e, l...","ground beef, yellow onion, diced tomato, tomat...","[legume, ltp, milk, lactose, nightshade, dairy]",,non-veg
3,59389,alouette potatoes,"s, p, r, e, a, d, a, b, l, e, , c, h, e, e, s...","spreadable cheese with garlic and herb, new po...","[hypersensitivity, milk, lactose, olive, potat...",,veg
4,44061,amish tomato ketchup for canning,"t, o, m, a, t, o, , j, u, i, c, e, ,, , a, p...","tomato juice, apple cider vinegar, sugar, salt...","[nightshade, oral, sugar]",low fat,veg


## Making the model flexible with scores in case of varied user preferences

In [None]:
def recommend_food(user_input):
    # Create a score for each dish
    scores = np.zeros(len(model_data))

    # Give higher score to dishes that match veg_or_non_veg preference
    scores[model_data["veg_or_non_veg"] == user_input['veg_or_non_veg']] += 1

    # Give lower score to dishes that contain ingredients user is allergic to
    for allergen in user_input['allergy']:
        scores[model_data['ingredients_clean'].str.contains(allergen)] -= 0.5

    # Give higher score to dishes that match dietary restrictions
    if user_input['dietary_restrictions'] != "no dietary restriction":
        scores[model_data["dietary_restrictions"] == user_input['dietary_restrictions']] += 1

    # Give higher score to dishes that match seasonal preference
    scores[model_data["seasons"].isin(user_input['seasonal_preference'])] += 1

    # Use TF-IDF to vectorize the ingredients_str_lemmatized
    tfidf = TfidfVectorizer(stop_words='english')
    model_data['ingredients_clean'] = model_data['ingredients_clean'].fillna('')
    tfidf_matrix = tfidf.fit_transform(model_data['ingredients_clean'])

    # Vectorize user's ingredients preference
    user_pref_vector = tfidf.transform([user_input['ingredients_str_lemmatized']])

    # Calculate cosine similarity and add to scores
    cosine_similarities = linear_kernel(user_pref_vector, tfidf_matrix).flatten()
    scores += cosine_similarities

    # Get top 10 highest scoring dishes
    top_10_indices = scores.argsort()[:-11:-1]

    # Get corresponding cosine similarity scores
    top_10_scores = cosine_similarities[top_10_indices]

    # Return names of the dishes and their scores
    return model_data.iloc[top_10_indices], top_10_scores

In [None]:
# User inputs
veg_or_non_veg = input("Please enter your preference (veg/non-veg): ")
seasonal_preference = input("Please enter your seasonal preference (separated by comma): ").split(", ")
allergy = input("Please enter your allergies (separated by comma): ").split(", ")
dietary_restrictions = input("Please enter your dietary restrictions: ")
ingredients_str_lemmatized = input("Please enter your preferred ingredients: ")

user_input = {
    'veg_or_non_veg': veg_or_non_veg,
    'seasonal_preference': seasonal_preference,
    'allergy': allergy,
    'dietary_restrictions': dietary_restrictions,
    'ingredients_str_lemmatized': ingredients_str_lemmatized
}

recommendations, scores = recommend_food(user_input)

print(f"\nList of food-ids: {list(recommendations['food_id'])}")

print("\nTop ten food recommendations for you:")
for i in range(10):
    print(f"{i+1}. {recommendations.iloc[i]['food_id']}, {recommendations.iloc[i]['food_name']} with a cosine similarity of {scores[i]}")

Please enter your preference (veg/non-veg): veg
Please enter your seasonal preference (separated by comma): winter
Please enter your allergies (separated by comma): nut
Please enter your dietary restrictions: milk
Please enter your preferred ingredients: rice
List of food-ids: [250026, 335754, 164741, 339010, 600444, 39269, 234759, 700047, 16465, 339009]

Top ten food recommendations for you:
1. 250026, no fail foolproof   rice with a cosine similarity of 0.8159473867022633
2. 335754, perfect rice 2 cups with a cosine similarity of 0.8159473867022633
3. 164741, pressure cooker rice with a cosine similarity of 0.7684255749626663
4. 339010, himalayan red rice with a cosine similarity of 0.7684255749626663
5. 600444, Rice   Steamed Rice  with a cosine similarity of 0.7684255749626663
6. 39269, mirj s foolproof microwave rice   perfect every time with a cosine similarity of 0.7684255749626663
7. 234759, simple microwave cooked rice with a cosine similarity of 0.7332038062911125
8. 700047, 

In [None]:
user_input

{'veg_or_non_veg': 'veg',
 'seasonal_preference': ['winter'],
 'allergy': ['nut'],
 'dietary_restrictions': 'milk',
 'ingredients_str_lemmatized': 'rice'}

In [None]:
len(recommendations)

10

In [None]:
pip freezecl

alabaster @ file:///home/ktietz/src/ci/alabaster_1611921544520/work
anaconda-client==1.11.0
anaconda-navigator==2.3.1
anaconda-project @ file:///C:/Windows/TEMP/abs_91fu4tfkih/croots/recipe/anaconda-project_1660339890874/work
aniso8601==9.0.1
anyio @ file:///C:/ci/anyio_1644481921011/work/dist
appdirs==1.4.4
argon2-cffi @ file:///opt/conda/conda-bld/argon2-cffi_1645000214183/work
argon2-cffi-bindings @ file:///C:/ci/argon2-cffi-bindings_1644551690056/work
arrow @ file:///opt/conda/conda-bld/arrow_1649166651673/work
astroid @ file:///C:/Windows/TEMP/abs_b0dtxgpicv/croots/recipe/astroid_1659023126745/work
astropy @ file:///C:/ci/astropy_1657719656942/work
atomicwrites==1.4.0
attrs @ file:///opt/conda/conda-bld/attrs_1642510447205/work
Automat @ file:///tmp/build/80754af9/automat_1600298431173/work
autopep8 @ file:///opt/conda/conda-bld/autopep8_1650463822033/work
Babel @ file:///tmp/build/80754af9/babel_1620871417480/work
backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work
