In [1]:
from user_data import openai_key
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import os
import pandas as pd

In [2]:

os.environ['OPENAI_API_KEY'] = openai_key


In [3]:
# Prompt for Classification
prompt_classification = """
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: {ingredients}
"""

In [4]:
# role for model
prefix_messages = [{"role": "system", "content": "You are a accurate data annotator and classifier for Food Recipe"}]


In [9]:
prompt_template = PromptTemplate(
    input_variables=['ingredients'],
    template=prompt_classification
)

# gpt-3.5-turbo-instruct
#llm = OpenAI(model='gpt-3.5-turbo-instruct',temperature='0')

# gpt-3.5-turbo
llm = ChatOpenAI(model_name='gpt-4-1106-preview', 
             temperature=0.6,
             #prefix_messages=prefix_messages, max_tokens=256
             )


chain = LLMChain(
    llm=llm,
    prompt=prompt_template,
    verbose=True
)

In [10]:
ingredients = "['1 c. firmly packed brown sugar', '1/2 c. evaporated milk', '1/2 tsp. vanilla', '1/2 c. broken nuts (pecans)', '2 Tbsp. butter or margarine', '3 1/2 c. bite size shredded rice biscuits']"

In [11]:
chain.run(ingredients)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: ['1 c. firmly packed brown sugar', '1/2 c. evaporated milk', '1/2 tsp. vanilla', '1/2 c. broken nuts (pecans)', '2 Tbsp. butter or margarine', '3 1/2 c. bite size shredded rice biscuits']
[0m

[1m> Finished chain.[0m


'Veg'

Classification Testing on Data

In [12]:
import pandas as pd

In [13]:
df = pd.read_csv("recipe_top_1000_rows.csv")

In [15]:
df2 = df.head(100)

In [16]:
df2['GPT_4_turbo_OP'] = df2.ingredients.apply(lambda x: chain.run(x))



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: ["1 c. firmly packed brown sugar", "1/2 c. evaporated milk", "1/2 tsp. vanilla", "1/2 c. broken nuts (pecans)", "2 Tbsp. butter or margarine", "3 1/2 c. bite size shredded rice biscuits"]
[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: ["1 small jar chipped beef, cut up", "4 boned chicken breasts", "1 can cream of mushroom soup", "1 carton sour cream"]
[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['GPT_4_turbo_OP'] = df2.ingredients.apply(lambda x: chain.run(x))


In [17]:
df2.to_csv("GPT_4_turbo_raw_output_v1_100.csv")

In [18]:
df3 = df.iloc[100:501].copy()

In [19]:
df3['GPT_4_turbo_OP'] = df3.ingredients.apply(lambda x: chain.run(x))



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: ["1 c. flour", "1 tsp. soda", "1 tsp. salt", "1 Tbsp. sugar", "1 egg", "3 Tbsp. margarine, melted", "1 c. buttermilk"]
[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Using the given ingredients of a recipe and analysing them, You need to just return MAXIMUM ONE WORD and nothing else whether it is 'NonVeg' or 'Veg' or 'Vegan'. Ingredients: ["1/4 c. butter", "1/4 lb. mushrooms, sliced", "1 garlic clove, minced", "1 medium zucchini, sliced thinly", "1/4 c. chopped onion", "1 c. diced, cooked ham", "4 eggs", "2 c. Ricotta cheese", "1 c. shredded Monterey Jack", "1 (10 oz.) pkg. frozen spinach, thawed and drained", "1/2 tsp. dill weed", "salt and pepper"]
[0m

[1m> Finis

In [20]:
df5 = pd.concat([df2,df3],axis=0)

In [21]:
df5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 501 entries, 0 to 500
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Unnamed: 0      501 non-null    int64 
 1   title           501 non-null    object
 2   ingredients     501 non-null    object
 3   directions      501 non-null    object
 4   link            501 non-null    object
 5   source          501 non-null    object
 6   NER             501 non-null    object
 7   site            501 non-null    object
 8   GPT_4_turbo_OP  501 non-null    object
dtypes: int64(1), object(8)
memory usage: 35.4+ KB


In [None]:
df5.to_csv("GPT-4-TURBO_raw_output_v2.csv")