In [39]:
from dotenv import load_dotenv
import json
from openai import OpenAI
import os
import pandas as pd

In [2]:
# Load the environment variables from the .env file
load_dotenv()

# Retrieve the OpenAI API key
api_key = os.getenv('OPENAI_API_KEY')

# Use it (e.g., in an OpenAI API call)
import openai
openai.api_key = api_key
client = OpenAI()


In [3]:
df = pd.read_csv('../data/plants_data.csv')
documents = df.to_dict(orient='records')

In [29]:
prompt_template = """
You are simulating a user interacting with our Indoor Plant Knowledge Assistant.
Based on the given plant record, create 5 complete, specific questions the user might ask about the plant. Only create questions for categories that contain information different than "No data available".
The questions must:

* Be relevant to the details in the record.
* Be clear and self-contained (not too short).
* Use as few exact words from the record as possible while keeping the meaning.

Plant record format:

plant name: {name}
summary: {summary}
cultivation: {cultivation}
toxicity: {toxicity}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [30]:
prompt = prompt_template.format(**documents[0])

In [31]:
print(prompt)

You are simulating a user interacting with our Indoor Plant Knowledge Assistant.
Based on the given plant record, create 5 complete, specific questions the user might ask about the plant. Only create questions for categories that contain information different than "No data available".
The questions must:

* Be relevant to the details in the record.
* Be clear and self-contained (not too short).
* Use as few exact words from the record as possible while keeping the meaning.

Plant record format:

plant name: Adelonema wallisii
summary: Adelonema wallisii (synonym Homalomena wallisii) is a species of aroid plant (family Araceae) native to Venezuela, Colombia, and Panama.


cultivation: No data available
toxicity: No data available

Provide the output in parsable JSON without using code blocks:

{"questions": ["question1", "question2", ..., "question5"]}


In [32]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [33]:
questions = llm(prompt)

In [34]:
questions

'{"questions": ["What regions are native habitats for the Adelonema wallisii species?", "Can you tell me what family the Adelonema wallisii belongs to?", "Is there another name that Adelonema wallisii is known by?", "In which countries can I find Adelonema wallisii growing naturally?", "What type of plant is Adelonema wallisii classified as within its family?"]}'

In [35]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [36]:
from tqdm.auto import tqdm

In [37]:
results = {}

In [48]:
documents

[{'id': 0,
  'name': 'Adelonema wallisii',
  'summary': 'Adelonema wallisii (synonym Homalomena wallisii) is a species of aroid plant (family Araceae) native to Venezuela, Colombia, and Panama.\n\n',
  'cultivation': 'No data available',
  'toxicity': 'No data available'},
 {'id': 1,
  'name': 'Adenium obesum',
  'summary': 'Adenium obesum, more commonly known as a desert rose, is a poisonous species of flowering plant belonging to the tribe Nerieae of the subfamily Apocynoideae of the dogbane family, Apocynaceae. It is native to the Sahel regions south of the Sahara (from Mauritania and Senegal to Sudan), tropical and subtropical eastern and southern Africa, as well as the Arabian Peninsula. Other names for the flower include Sabi star, kudu, mock azalea, and impala lily. Adenium obesum is a popular houseplant and bonsai in temperate regions.\n\n',
  'cultivation': "Adenium obesum is a popular houseplant and bonsai in temperate regions. It requires a sunny location and a minimum indoo

In [None]:
for doc in tqdm(documents): 
    if doc['id'] > 57:
        doc_id = doc['id']
        if doc_id in results:
            continue

        questions_raw = generate_questions(doc)
        questions = json.loads(questions_raw)
        results[doc_id] = questions['questions']

  0%|          | 0/197 [00:00<?, ?it/s]

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-XuePFf5vqiEA52dZbYdbKTzO on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}

In [50]:
results

{0: ['What regions is the Adelonema wallisii originally found in?',
  'Can you explain what family the Adelonema wallisii belongs to?',
  'What is a synonym for the plant species Adelonema wallisii?',
  'Is the Adelonema wallisii considered an aroid plant?',
  'In which countries can I find the Adelonema wallisii growing naturally?'],
 1: ['What specific conditions are necessary for Adenium obesum to thrive in terms of sunlight and temperature?',
  'Can you explain the different methods available for propagating Adenium obesum, including any differences in results from seeds and cuttings?',
  'How can I care for my Adenium obesum during the winter months to ensure its health?',
  'Why is Adenium obesum considered popular for bonsai cultivation?',
  "What is the significance of the Royal Horticultural Society's Award of Garden Merit for Adenium obesum?"],
 2: ['What are the ideal growing conditions for Adiantum aethiopicum in indoor settings?',
  'How can I propagate the common maidenha

In [None]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))