In [None]:
# import all common necessary packages
import pandas as pd
import numpy as np
import json
import re
import ast

# for postgres SQL database
import psycopg2

import warnings
warnings.filterwarnings('ignore')

## Common Utility Function for
- creating unique ingredient corpus from dataset
- tagging each food itens for seasons

### Configuration

In [None]:
import logging

VERSION = "v0.1.0"
RELEASE_DATE = "16 Jun 2023"

##### DATABASE RELATED #####
DB_CONFIG_LOCAL = {
    "dbname": "food_db",
    "host": "localhost",
    "port": 5432,
    "username": "postgres",
    "password": "postgres"
}

DB_CONFIG_CLOUD = {
    "dbname": "food_db",
    "host": "",
    "port": 5432,
    "username": "postgres",
    "password": ""
}

# select which database (local or cloud)
DB_CONFIG = DB_CONFIG_LOCAL

### Database Functions

In [None]:
class DatabaseAccess:
    def __init__(self, db_config):
        self.conn = psycopg2.connect(
                            database=db_config['dbname'],
                            user=db_config['username'],
                            password=db_config['password'],
                            host=db_config['host'],
                            port=db_config['port'])
    def getConnection(self):
        return self.conn

In [None]:
# create database connection
gbl_db_conn = DatabaseAccess(DB_CONFIG).getConnection()

### Read food_data from Database

In [None]:
# read food_data from db
df_food_data_sql = pd.read_sql_query('select * from "food_data"',con=gbl_db_conn)

In [None]:
print("df_food_data_sql shape: ", df_food_data_sql.shape)

df_food_data_sql shape:  (238763, 16)


In [None]:
df_food_data_sql.head()

Unnamed: 0.1,Unnamed: 0,food_id,food_name,food_description,ingredients,nutrition,veg_or_non_veg,allergies,cuisine,course,diet,state,region,seasons,data_source,created_time
0,0,87867,greek yogurt cake with raisin syrup,this is a fabulous greek cake. you wont regret...,"['raisins', 'brandy', 'ouzo', 'lemon juice', '...","['3665.2', '142.0', '1533.0', '83.0', '115.0',...",non-veg,"['citrus', 'honey', 'milk', 'lactose', 'poultr...",,,,,,[],raw_recipes,2023-06-17 00:43:04.231724
1,1,507638,greek yogurt chicken,hubby brought this recipe home from work.,"['chicken breasts', 'parmesan cheese', 'breadc...","['358.2', '27.0', '3.0', '15.0', '73.0', '30.0...",non-veg,"['milk', 'lactose', 'poultry', 'dairy']",,,,,,[],raw_recipes,2023-06-17 00:43:04.232004
2,2,422389,greek yogurt dessert with honey and strawberries,healthy and low fat.,"['greek yogurt', 'honey', 'strawberries', 'nuts']","['11.5', '0.0', '7.0', '0.0', '0.0', '0.0', '0...",veg,"['honey', 'dairy']",,,,,,[],raw_recipes,2023-06-17 00:43:04.232257
3,3,427998,greek yogurt pancakes,a recipe posted for zwt - greece. from the pi...,"['greek yogurt', 'all-purpose flour', 'sugar',...","['32.6', '1.0', '4.0', '5.0', '3.0', '1.0', '1...",non-veg,"['gluten', 'poultry', 'sugar', 'dairy']",,,,,,[],raw_recipes,2023-06-17 00:43:04.232513
4,4,293735,greek yogurt parfait,"if you're not worried about going vegan, the s...","['soy yogurt', 'vanilla extract', 'honey', 'cl...","['310.8', '6.0', '215.0', '0.0', '11.0', '2.0'...",veg,"['honey', 'soy', 'dairy']",,,,,,[],raw_recipes,2023-06-17 00:43:04.232770


## Find unique ingredients keywords

In [None]:
# build the unique vocabulary
INGR_CORPUS = set()
res = df_food_data_sql["ingredients"].apply(lambda words: INGR_CORPUS.update(words))

In [None]:
# remove any empty string elemtn
INGR_CORPUS.remove("")

In [None]:
INGR_CORPUS = [word.lower() for word in INGR_CORPUS]

In [None]:
df_ingr_keywords = pd.DataFrame(zip(range(0, len(INGR_CORPUS)), INGR_CORPUS), columns=["sno", "keywords"])

In [None]:
print("df_ingr_keywords shape: ", df_ingr_keywords.shape)
df_ingr_keywords.head()

df_ingr_keywords shape:  (28734, 2)


Unnamed: 0,sno,keywords
0,0,tiny pasta
1,1,chocolate ice cream cones
2,2,बड़ा चम्मच हरा धनिया काट
3,3,sour apple liqueur
4,4,पानी वेजिटेबल स्टॉक


In [None]:
df_ingr_keywords.to_csv("./data/processed/ingredients_keywords.csv", index=False)

#### save ingredient keyword into Database

In [None]:
# create database connection
gbl_db_conn = DatabaseAccess(DB_CONFIG).getConnection()

def ingredients_keywords_save_to_db(item):
    global gbl_db_conn
    try:
        cur = gbl_db_conn.cursor()
        cur.execute("INSERT INTO public.ingredients_keywords (sno, keywords) \
                                                VALUES (%s, %s)", \
                                                (item.sno, item.keywords))
        gbl_db_conn.commit()
    except (Exception, psycopg2.Error) as error:
        print("Error inserting data: ", error)
    finally:
        cur.close()

    return

def print_data(item):
    print(item.keywords)

In [None]:
# store all data into DB
#res = df_ingr_keywords.apply(ingredients_keywords_save_to_db, axis=1)

In [None]:
# read db and validate stored dataset
df_ingr_keywords_sql = pd.read_sql_query('select * from "ingredients_keywords"',con=gbl_db_conn)
print(df_ingr_keywords_sql.shape)
df_ingr_keywords_sql.head()

(28734, 3)


Unnamed: 0,sno,keywords,created_time
0,0,tiny pasta,2023-06-17 00:52:19.602177
1,1,chocolate ice cream cones,2023-06-17 00:52:19.611537
2,2,बड़ा चम्मच हरा धनिया काट,2023-06-17 00:52:19.612320
3,3,sour apple liqueur,2023-06-17 00:52:19.612907
4,4,पानी वेजिटेबल स्टॉक,2023-06-17 00:52:19.613693


In [None]:
gbl_db_conn.close()

## Tag seasons

In [None]:
import re

def detect_season(text):
    season_keywords = {
        "spring": ["spring", "bloom", "flowers", "warm"],
        "summer": ["summer", "hot", "sun", "beach"],
        "autumn": ["autumn", "fall", "leaves", "cold", "harvest", "rainy"],
        "winter": ["winter", "snow", "christmas", "cold", "freeze"]
    }

    cleaned_text = re.sub(r'\W+', ' ', text)  # Remove non-alphanumeric characters
    tokenized_text = cleaned_text.lower().split()  # Tokenize the text

    found_seasons = []
    for season, keywords in season_keywords.items():
        for keyword in keywords:
            if keyword in tokenized_text:
                found_seasons.append(season)
                break

    #return ", ".join(found_seasons) if found_seasons else "none"
    return found_seasons

# Convert the 'description' column to string type
df_food_data_sql['food_description'] = df_food_data_sql['food_description'].astype(str)

# Apply the function to the 'description' column in the dataset
df_food_data_sql['seasons'] = df_food_data_sql['food_description'].apply(detect_season)

In [None]:
df_food_data_sql.head()

Unnamed: 0,food_id,food_name,food_description,ingredients,nutrition,veg_or_non_veg,allergies,cuisine,course,diet,state,region,seasons,data_source,created_time
0,87867,greek yogurt cake with raisin syrup,this is a fabulous greek cake. you wont regret...,"[raisins, brandy, ouzo, lemon juice, water, su...","[3665.2, 142.0, 1533.0, 83.0, 115.0, 252.0, 20...",non-veg,"[citrus, honey, milk, lactose, poultry, sugar,...",,,,,,[],raw_recipes,2023-06-17 00:43:04.231724
1,507638,greek yogurt chicken,hubby brought this recipe home from work.,"[chicken breasts, parmesan cheese, breadcrumbs...","[358.2, 27.0, 3.0, 15.0, 73.0, 30.0, 3.0]",non-veg,"[milk, lactose, poultry, dairy]",,,,,,[],raw_recipes,2023-06-17 00:43:04.232004
2,422389,greek yogurt dessert with honey and strawberries,healthy and low fat.,"[greek yogurt, honey, strawberries, nuts]","[11.5, 0.0, 7.0, 0.0, 0.0, 0.0, 0.0]",veg,"[honey, dairy]",,,,,,[],raw_recipes,2023-06-17 00:43:04.232257
3,427998,greek yogurt pancakes,a recipe posted for zwt - greece. from the pi...,"[greek yogurt, all-purpose flour, sugar, bakin...","[32.6, 1.0, 4.0, 5.0, 3.0, 1.0, 1.0]",non-veg,"[gluten, poultry, sugar, dairy]",,,,,,[],raw_recipes,2023-06-17 00:43:04.232513
4,293735,greek yogurt parfait,"if you're not worried about going vegan, the s...","[soy yogurt, vanilla extract, honey, clementin...","[310.8, 6.0, 215.0, 0.0, 11.0, 2.0, 23.0]",veg,"[honey, soy, dairy]",,,,,,[],raw_recipes,2023-06-17 00:43:04.232770


In [None]:
df_food_data_sql.shape

(238763, 15)

In [None]:
df_food_data_sql.to_csv("food_data_seasons.csv")

In [None]:
df_food_data_sql.columns

Index(['food_id', 'food_name', 'food_description', 'ingredients', 'nutrition',
       'veg_or_non_veg', 'allergies', 'cuisine', 'course', 'diet', 'state',
       'region', 'seasons', 'data_source', 'created_time'],
      dtype='object')

In [None]:
# create database connection
gbl_db_conn = DatabaseAccess(DB_CONFIG).getConnection()

def food_data_save_to_db(item):
    global gbl_db_conn
    try:
        cur = gbl_db_conn.cursor()
        cur.execute("INSERT INTO public.food_data (food_id, food_name, food_description, ingredients, nutrition, veg_or_non_veg, allergies, cuisine, course, diet, state, region, seasons, data_source) \
                                                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", \
                                                (item.food_id, item.food_name, item.food_description, item.ingredients, item.nutrition, item.veg_or_non_veg, item.allergies, item.cuisine, item.course, item.diet, item.state, item.region, item.seasons, item.data_source))
        gbl_db_conn.commit()
    except (Exception, psycopg2.Error) as error:
        print("Error inserting data: ", error)
    finally:
        cur.close()

    return

In [None]:
# store all data into DB
res = df_food_data_sql.apply(food_data_save_to_db, axis=1)

In [None]:
# read db and validate stored dataset
df_food_data_sql1 = pd.read_sql_query('select * from "food_data"',con=gbl_db_conn)
print(df_food_data_sql1.shape)
df_food_data_sql1.head()

(238763, 15)


Unnamed: 0,food_id,food_name,food_description,ingredients,nutrition,veg_or_non_veg,allergies,cuisine,course,diet,state,region,seasons,data_source,created_time
0,87867,greek yogurt cake with raisin syrup,this is a fabulous greek cake. you wont regret...,"[raisins, brandy, ouzo, lemon juice, water, su...","[3665.2, 142.0, 1533.0, 83.0, 115.0, 252.0, 20...",non-veg,"[citrus, honey, milk, lactose, poultry, sugar,...",,,,,,{},raw_recipes,2023-06-17 12:08:04.545607
1,507638,greek yogurt chicken,hubby brought this recipe home from work.,"[chicken breasts, parmesan cheese, breadcrumbs...","[358.2, 27.0, 3.0, 15.0, 73.0, 30.0, 3.0]",non-veg,"[milk, lactose, poultry, dairy]",,,,,,{},raw_recipes,2023-06-17 12:08:04.559112
2,422389,greek yogurt dessert with honey and strawberries,healthy and low fat.,"[greek yogurt, honey, strawberries, nuts]","[11.5, 0.0, 7.0, 0.0, 0.0, 0.0, 0.0]",veg,"[honey, dairy]",,,,,,{},raw_recipes,2023-06-17 12:08:04.560659
3,427998,greek yogurt pancakes,a recipe posted for zwt - greece. from the pi...,"[greek yogurt, all-purpose flour, sugar, bakin...","[32.6, 1.0, 4.0, 5.0, 3.0, 1.0, 1.0]",non-veg,"[gluten, poultry, sugar, dairy]",,,,,,{},raw_recipes,2023-06-17 12:08:04.561593
4,293735,greek yogurt parfait,"if you're not worried about going vegan, the s...","[soy yogurt, vanilla extract, honey, clementin...","[310.8, 6.0, 215.0, 0.0, 11.0, 2.0, 23.0]",veg,"[honey, soy, dairy]",,,,,,{},raw_recipes,2023-06-17 12:08:04.562680


In [None]:
gbl_db_conn.close()