In [1]:
pip install openai==0.28

Note: you may need to restart the kernel to use updated packages.


In [2]:
import openai
import json
import pandas as pd
from config import API_KEY  # Import the API key from the config file

# Set the OpenAI API key
openai.api_key = API_KEY

In [3]:
def get_response(prompt, model="gpt-3.5-turbo", max_tokens=150):
    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=max_tokens,
            temperature=0.7,
        )
        cleaned_response = response.choices[0].message['content'].strip()
        # Remove unwanted prefixes and ensure only the ingredient name is returned
        if "Output: " in cleaned_response:
            cleaned_response = cleaned_response.split("Output: ")[-1].strip().strip('"')
        return cleaned_response
    except Exception as e:
        return f"An error occurred: {e}"

def simplify_ingredient(ingredient):
    prompt_base = """
You are an ingredient identifier, please do the following tasks:

Task: Simplify the given ingredient names into single keywords by ignoring quantities, units, and descriptive words. Extract only the main ingredient names.

Examples:

1. "(10 oz.) tomato sauce" simplifies to "tomato sauce"
2. "(10 oz.) frozen chopped spinach, thawed and squeezed dry" simplifies to "spinach"
3. "(14 oz.) sweetened condensed milk" simplifies to "milk"
4. "(2 cups) shredded cheddar cheese" simplifies to "cheddar cheese"
5. "(1/4 cup) finely chopped onions" simplifies to "onions"
6. "(3 tbsp.) olive oil" simplifies to "olive oil"
7. "(1 lb.) ground beef" is "ground beef"
8. "(1 lb.) grilled chicken breasts" simplifies to "chicken breasts"
9. "(1 lb.) chicken livers" simplifies to "chicken livers"
10. "zatarain\u2019s jambalaya mix" simplifies to "jambalaya mix"

Instructions:
1. Read the provided ingredient name.
2. Remove any quantity, unit, and unnecessary descriptive words.
3. Extract and provide the main ingredient name, ensuring to retain the meat type and specific part descriptions.

Input and Output Format:
Input: "(quantity unit) ingredient description"
Output: "main ingredient"

Here are some ingredients to simplify:
    """
    
    prompt = prompt_base + f'\nInput: "{ingredient}"'
    response = get_response(prompt)
    return response

def post_process_response(response):
    if response.startswith("Output:"):
        response = response.replace("Output:", "").strip()
    return response

In [4]:
if __name__ == "__main__":
    # Load dataset from JSON file
    with open('../data/Recipe-Ingredients-Dataset/sample.json', 'r') as file:
        data = json.load(file)

    # Iterate through the dataset and simplify ingredients
    for entry in data:
        simplified_ingredients = [post_process_response(simplify_ingredient(ingredient)) for ingredient in entry['ingredients']]
        entry['simplified_ingredients'] = simplified_ingredients

    # Save the updated dataset to a new JSON file
    with open('../data/simplified_ingredients_train.json', 'w') as file:
        json.dump(data, file, indent=4)

    print("Simplified ingredients have been processed and saved.")

Simplified ingredients have been processed and saved.


In [5]:
# Load json file
def load_json(file_path):
    return pd.read_json(file_path)

# Load data
data = load_json('../data/simplified_ingredients_train.json')

# Load JSON data
cuisine_df = pd.json_normalize(data.to_dict(orient='records'))

In [9]:
cuisine_df.head()

Unnamed: 0,id,cuisine,ingredients,simplified_ingredients
0,10259,greek,"[romaine lettuce, black olives, grape tomatoes...","[lettuce, olives, tomatoes, garlic, pepper, on..."
1,25693,southern_us,"[plain flour, ground pepper, salt, tomatoes, g...","[flour, pepper, salt, tomatoes, black pepper, ..."
2,20130,filipino,"[eggs, pepper, salt, mayonaise, cooking oil, g...","[eggs, pepper, salt, mayonaise, oil, chilies, ..."
3,22213,indian,"[water, vegetable oil, wheat, salt]","[water, oil, wheat, salt]"
4,13162,indian,"[black pepper, shallots, cornflour, cayenne pe...","[black pepper, shallots, cornflour, pepper, on..."
