In [1]:
from user_data import openai_key
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import os
import pandas as pd

In [2]:

os.environ['OPENAI_API_KEY'] = openai_key


In [14]:
# Prompt for Classification
prompt_classification = """
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: {ingredients}
"""

In [15]:
# role for model
prefix_messages = [{"role": "system", "content": "You are a accurate data annotator and classifier for Food Recipe"}]


In [16]:
prompt_template = PromptTemplate(
    input_variables=['ingredients'],
    template=prompt_classification
)

# gpt-3.5-turbo-instruct
llm = OpenAI(model='gpt-3.5-turbo-instruct',temperature='0')

# gpt-3.5-turbo
#llm = ChatOpenAI(model_name='gpt-3.5-turbo', 
#             temperature=0.1,
#             #prefix_messages=prefix_messages, max_tokens=256
#             )


chain = LLMChain(
    llm=llm,
    prompt=prompt_template,
    verbose=True
)

In [17]:
ingredients = "['1 c. firmly packed brown sugar', '1/2 c. evaporated milk', '1/2 tsp. vanilla', '1/2 c. broken nuts (pecans)', '2 Tbsp. butter or margarine', '3 1/2 c. bite size shredded rice biscuits']"

In [18]:
chain.run(ingredients)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: ['1 c. firmly packed brown sugar', '1/2 c. evaporated milk', '1/2 tsp. vanilla', '1/2 c. broken nuts (pecans)', '2 Tbsp. butter or margarine', '3 1/2 c. bite size shredded rice biscuits']
[0m

[1m> Finished chain.[0m


'\nVegan'

Classification Testing on Data

In [19]:
import pandas as pd

In [20]:
df = pd.read_csv("recipe_top_1000_rows.csv")

In [21]:
df2 = df.head(50)

In [22]:
df2['GPT_35_turbo_OP'] = df2.ingredients.apply(lambda x: chain.run(x))



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: ["1 c. firmly packed brown sugar", "1/2 c. evaporated milk", "1/2 tsp. vanilla", "1/2 c. broken nuts (pecans)", "2 Tbsp. butter or margarine", "3 1/2 c. bite size shredded rice biscuits"]
[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: ["1 small jar chipped beef, cut up", "4 boned chicken breasts", "1 can cream of mushroom soup", "1 carton sour cream"]
[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['GPT_35_turbo_OP'] = df2.ingredients.apply(lambda x: chain.run(x))


In [None]:
# Nutrition Task
prompt_nutrition = "Using the given ingredients of a recipe, You need to just return nutritional information enclosed in double quotations containing comma separated values. Ingredients: "

In [None]:
prompt_template = PromptTemplate(
    input_variables=['ingredients'],
    template=prompt_classification
)

# gpt-3.5-turbo-instruct
llm = OpenAI(model='gpt-3.5-turbo-instruct',temperature='0')

# gpt-3.5-turbo
#llm = ChatOpenAI(model_name='gpt-3.5-turbo', 
#             temperature=0.1,
#             #prefix_messages=prefix_messages, max_tokens=256
#             )


chain = LLMChain(
    llm=llm,
    prompt=prompt_template,
    verbose=True
)

In [None]:
df2['nutritional_data'] = df2.ingredients.apply(lambda x: chain.run(x))

In [23]:
df2.to_csv("GPT_35_turbo_raw_output_v5_50.csv")

In [12]:
df2.head()

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER,site,GPT_35_turbo_OP
0,0,No-Bake Nut Cookies,"[""1 c. firmly packed brown sugar"", ""1/2 c. eva...","[""In a heavy 2-quart saucepan, mix brown sugar...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"[""bite size shredded rice biscuits"", ""vanilla""...",www.cookbooks.com,\nVegan
1,1,Jewell Ball'S Chicken,"[""1 small jar chipped beef, cut up"", ""4 boned ...","[""Place chipped beef on bottom of baking dish....",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"[""cream of mushroom soup"", ""beef"", ""sour cream...",www.cookbooks.com,\nNonVeg
2,2,Creamy Corn,"[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg...","[""In a slow cooker, combine all ingredients. C...",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"[""frozen corn"", ""pepper"", ""cream cheese"", ""gar...",www.cookbooks.com,\nNonVeg
3,3,Chicken Funny,"[""1 large whole chicken"", ""2 (10 1/2 oz.) cans...","[""Boil and debone chicken."", ""Put bite size pi...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"[""chicken gravy"", ""cream of mushroom soup"", ""c...",www.cookbooks.com,\nNonVeg
4,4,Reeses Cups(Candy),"[""1 c. peanut butter"", ""3/4 c. graham cracker ...","[""Combine first four ingredients and press in ...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"[""graham cracker crumbs"", ""powdered sugar"", ""p...",www.cookbooks.com,\nNonVeg
