In [2]:

%load_ext autoreload
%autoreload 2

In [3]:

from farmbase_agent_toolkit.api import FarmbaseAPI

api = FarmbaseAPI(secret_key="fdsfds", context=None)

get_tool_defs([api.create_farm, api.create_field, api.do_nothing])

[{'type': 'function',
  'function': {'name': 'create_farm',
   'description': 'This tool creates a new farm in Farmbase',
   'parameters': {'properties': {'name': {'type': 'string'},
     'location': {'type': 'string'}},
    'required': ['name', 'location'],
    'type': 'object'}}},
 {'type': 'function',
  'function': {'name': 'create_field',
   'description': 'This tool creates a new field in Farmbase',
   'parameters': {'$defs': {'LineString': {'description': 'LineString Model',
      'properties': {'bbox': {'anyOf': [{'maxItems': 4,
          'minItems': 4,
          'prefixItems': [{'type': 'number'},
           {'type': 'number'},
           {'type': 'number'},
           {'type': 'number'}],
          'type': 'array'},
         {'maxItems': 6,
          'minItems': 6,
          'prefixItems': [{'type': 'number'},
           {'type': 'number'},
           {'type': 'number'},
           {'type': 'number'},
           {'type': 'number'},
           {'type': 'number'}],
          'ty

In [2]:
import inspect


def function_to_schema(func) -> dict:
    type_map = {
        str: "string",
        int: "integer",
        float: "number",
        bool: "boolean",
        list: "array",
        dict: "object",
        type(None): "null",
    }

    try:
        signature = inspect.signature(func)
    except ValueError as e:
        raise ValueError(
            f"Failed to get signature for function {func.__name__}: {str(e)}"
        )

    parameters = {}
    for param in signature.parameters.values():
        try:
            param_type = type_map.get(param.annotation, "string")
        except KeyError as e:
            raise KeyError(
                f"Unknown type annotation {param.annotation} for parameter {param.name}: {str(e)}"
            )
        parameters[param.name] = {"type": param_type}

    required = [
        param.name
        for param in signature.parameters.values()
        if param.default == inspect._empty
    ]

    return {
        "type": "function",
        "name": func.__name__,
        "description": (func.__doc__ or "").strip(),
        "parameters": {
            "type": "object",
            "properties": parameters,
            "required": required,
        },
    }

In [6]:
from pprint import pprint
from dotenv import load_dotenv, find_dotenv
from openai import OpenAI

load_dotenv(find_dotenv())


# Example usage
def some_function(
        parameter1: int,  # Some description
        parameter2: tuple[int, int] = (1, 2),  # p2 description
):
    """
    some_function docstring
    """
    pass


def sample_function(param_1, param_2, the_third_one: int, some_optional="John Doe"):
    """
    I can tell you about the black boot
    """
    print("Hello, world")


# schema =  function_to_schema(sample_function)


tools = [sample_function]
tool_schemas = [function_to_schema(tool) for tool in tools]

pprint(tool_schemas)

client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Look up the black boot."}],
    # input=[{"role": "user", "content": "What is the weather like in Paris today?"}],
    tools=tool_schemas
)

# print(response.output)
message = response.choices[0].message

message.tool_calls[0].function
# strict=True : BadRequestError: Error code: 400 - {'error': {'message': "Invalid schema for function 'some_function': In context=('properties', 'parameter2'), 'minItems' is not permitted.", 'type': 'invalid_request_error', 'param': 'tools[0].parameters', 'code': 'invalid_function_parameters'}}

# strict=False : BadRequestError: Error code: 400 - {'error': {'message': "Invalid schema for function 'some_function': In context=('properties', 'parameter2'), array schema missing items.", 'type': 'invalid_request_error', 'param': 'tools[0].parameters', 'code': 'invalid_function_parameters'}}

[{'function': {'description': 'I can tell you about the black boot',
               'name': 'sample_function',
               'parameters': {'properties': {'param_1': {'type': 'string'},
                                             'param_2': {'type': 'string'},
                                             'some_optional': {'type': 'string'},
                                             'the_third_one': {'type': 'integer'}},
                              'required': ['param_1',
                                           'param_2',
                                           'the_third_one'],
                              'type': 'object'}},
  'type': 'function'}]


Function(arguments='{"param_1":"black","param_2":"boot","the_third_one":1}', name='sample_function')

In [8]:
from openai import OpenAI

client = OpenAI()

tools = [{'description': 'gets the count of animals and size of the farm for a user',
          'name': 'count_and_size',
          'parameters': {'$defs': {'User': {'additionalProperties': False,
                                            'properties': {'age': {'title': 'Age',
                                                                   'type': 'integer'},
                                                           'name': {'title': 'Name',
                                                                    'type': 'string'}},
                                            'required': ['name', 'age'],
                                            'title': 'User',
                                            'type': 'object'}},
                         'additionalProperties': False,
                         'properties': {'count': {'title': 'Count', 'type': 'integer'},
                                        'size': {'anyOf': [{'type': 'number'},
                                                           {'type': 'null'}],
                                                 'title': 'Size'},
                                        'user': {'$ref': '#/$defs/User'}},
                         'required': ['count', 'size', 'user'],
                         'title': 'simple_method_ParameterModel',
                         'type': 'object'},
          'strict': True,
          'type': 'function'}]

response = client.responses.create(
    model="gpt-4o",
    input=[{"role": "user", "content": "Get the count of animals for user Peter?"}],
    tools=tools
)

print(response.output)

[ResponseFunctionToolCall(arguments='{"count":0,"size":null,"user":{"age":0,"name":"Peter"}}', call_id='call_ngGmjwCYTQ4kH5wy8pQ5C55P', name='count_and_size', type='function_call', id='fc_67dd57dcc5d081928deddad7162ca8ce0e2824a3081bcd99', status='completed')]


In [27]:


tools = [{'description': 'This tool creates a new field in Farmbase\n',
          'name': 'create_field',
          'parameters': {'$defs': {'Position2D': {'items': {'type': 'number'},
                                                  'type': 'array'}},
                         'additionalProperties': False,
                         'properties': {'boundary': {'description': 'the boundary of '
                                                                    'the field as a '
                                                                    'list of [long, '
                                                                    'lat] coordinates.',
                                                     'items': {'$ref': '#/$defs/Position2D'},
                                                     'title': 'Boundary',
                                                     'type': 'array'},
                                        'farm_id': {'description': 'The ID of the farm '
                                                                   'that the field '
                                                                   'belongs to.',
                                                    'title': 'Farm Id',
                                                    'type': 'string'},
                                        'name': {'description': 'the name of the '
                                                                'field.',
                                                 'title': 'Name',
                                                 'type': 'string'}},
                         'required': ['farm_id', 'name', 'boundary'],
                         'title': 'create_field_ParameterModel',
                         'type': 'object'},
          'strict': True,
          'type': 'function'}]

response = client.responses.create(
    model="gpt-4o",
    input=[{"role": "user", "content": "register my new field"}],
    tools=tools
)

print(response.output)

[ResponseOutputMessage(id='msg_67dd6918c3988192b42399145a311c7700f46f408b974f40', content=[ResponseOutputText(annotations=[], text="Could you please provide the details for the new field? I'll need the following information:\n\n1. **Name** of the field.\n2. **Boundary** coordinates (a list of `[longitude, latitude]` pairs defining the field's boundary).\n3. **Farm ID** that the field belongs to.", type='output_text')], role='assistant', status='completed', type='message')]


In [17]:

from collections import namedtuple
from dotenv import load_dotenv

load_dotenv("/Users/markns/workspace/farmwise/.env")

from pydantic import BaseModel

from agents import Agent, Runner, function_tool


class Weather(BaseModel):
    city: tuple[float, float]
    temperature_range: str
    conditions: str


LatLong = namedtuple('LatLong', "lat long")


@function_tool
def get_weather(lat_lon: tuple[float, float]) -> Weather:
    print("[debug] get_weather called")
    return Weather(city=lat_long, temperature_range="14-20C", conditions="Sunny with wind.")


agent = Agent(
    name="Hello world",
    instructions="You are a helpful agent.",
    tools=[get_weather],
)


async def main():
    result = await Runner.run(agent, input="What's the weather in Tokyo?")
    print(result.final_output)
    # The weather in Tokyo is sunny.


await main()

Error getting response: Error code: 400 - {'error': {'message': "Invalid schema for function 'get_weather': In context=('properties', 'lat_lon'), 'minItems' is not permitted.", 'type': 'invalid_request_error', 'param': 'tools[0].parameters', 'code': 'invalid_function_parameters'}}. (request_id: req_ae8166f28d94d39f86ae7cc7dc686d96)


BadRequestError: Error code: 400 - {'error': {'message': "Invalid schema for function 'get_weather': In context=('properties', 'lat_lon'), 'minItems' is not permitted.", 'type': 'invalid_request_error', 'param': 'tools[0].parameters', 'code': 'invalid_function_parameters'}}

In [14]:

from dotenv import load_dotenv, find_dotenv
from openai import OpenAI

load_dotenv(find_dotenv())

client = OpenAI()

response = client.responses.create(
    model="gpt-4o",  # or another supported model
    input="Please search the internet and give me information about the Maize variety named 'DroughtTEGO WE1101'. ",
    tools=[
        {
            "type": "web_search"
        }
    ]
)

In [12]:
response.output[0].content[0].text

'\'DroughtTEGO WE1101\' is a variety of maize developed to withstand drought conditions. Here are some key points about it:\n\n1. **Development**: This maize variety was developed as part of efforts to improve agricultural resilience in regions prone to water scarcity, particularly in Sub-Saharan Africa.\n\n2. **TEGO Series**: Part of the TEGO series, which stands for "Tolerant Efficient Germplasm Options," this variety is designed to offer farmers solutions that are both high-yielding and drought-tolerant.\n\n3. **Adaptation**: \'DroughtTEGO WE1101\' is tailored for areas with erratic rainfall patterns, helping to stabilize maize production even during dry spells.\n\n4. **Benefits**:\n   - **Yield Stability**: Offers consistent yields compared to traditional maize varieties under drought conditions.\n   - **Nutritional Value**: Retains the nutritional value intrinsic to maize.\n\n5. **Implementation**: Widely adopted in areas with limited water resources, it has been a significant par

In [15]:
response.output[0].content[0].text

AttributeError: 'ResponseFunctionWebSearch' object has no attribute 'content'

In [17]:


response.output_text

'DroughtTEGO™ WE1101 is a drought-tolerant maize hybrid developed under the Water Efficient Maize for Africa (WEMA) project, aimed at enhancing food security in regions prone to drought. Released in Kenya in June 2013, this variety offers several notable features:\n\n- **Maturity Period**: WE1101 matures in approximately 125–135 days (4–5 months), depending on the growing area. ([paperzz.com](https://paperzz.com/doc/8963971/droughttego%E2%84%A2-we1101---wema---african-agricultural-technology?utm_source=openai))\n\n- **Adaptation**: It is suited for low to medium altitude areas ranging from 1,000 to 1,600 meters. ([paperzz.com](https://paperzz.com/doc/8963971/droughttego%E2%84%A2-we1101---wema---african-agricultural-technology?utm_source=openai))\n\n- **Grain Characteristics**: The variety produces white grains with a flint to dent-like texture and has good husk cover, protecting the grain from damage by birds, maize weevils, and grain rotting due to water seepage. ([paperzz.com](https:

In [64]:

import pandas as pd

crops = pd.read_csv("apps/farmbase/data/kalro/crops.csv")
# filter_ = ((crops.crop == 'Maize') & (crops.variety == 'KDH41411 (Ukamez-6)'))
filter_ = (crops.crop == 'Macadamia')

print(crops[filter_])
for category, crop, variety in crops[filter_].itertuples(index=False):
    print(f"{category}, {crop}, {variety}")

    variety_sanitized = variety.replace("/", " ")
    filename = f"apps/farmbase/data/varieties/{crop}/{variety_sanitized}.json"

    # if os.path.exists(filename):
    #     continue

    # focusing on the agronomic characteristics of the variety such as disease/pest resistance, optimal growing conditions,
    # yield potential, maturity period and any other special characteristics of the variety
    response = client.responses.create(
        model="gpt-4o",
        input=f"""what do you know about the cashew variety A81""",
        # tools=[{
        #     "type": "web_search_preview",
        #     "search_context_size": "high",
        # }],
        # tool_choice={"type": 'web_search_preview'},
    )
    text = response.output_text
    # sources = set([a.url for a in response.output[1].content[0].annotations])
    print(response.output_text)
    # print(sources)
    break
    # os.makedirs(os.path.dirname(filename), exist_ok=True)
    # with open(filename, "w") as f:
    #     print(json.dumps(dict(output_text=response.output_text, sources=list(sources)), indent=2), file=f)

     category       crop variety
1496     Nuts  Macadamia  MRG-20
1497     Nuts  Macadamia  KRG-15
1498     Nuts  Macadamia   EMB-1
1499     Nuts  Macadamia   KMB-3
Nuts, Macadamia, MRG-20
The cashew variety A81 is a high-yielding clone developed for improved agricultural output. It is known for producing larger nuts and has been favored in various producing regions due to its robust yield and quality. As with many cashew varieties, A81 may also have been selected for disease resistance and adaptability to different growing conditions. If you’re considering cultivating this variety, contacting local agricultural extensions or cashew associations could provide more detailed, region-specific insights.


In [65]:
!uv add google-genai

[2K[2mResolved [1m213 packages[0m [2min 1.96s[0m[0m                                       [0m
[2K[2mPrepared [1m4 packages[0m [2min 505ms[0m[0m                                             [37m⠋[0m [2mPreparing packages...[0m (0/0)                                                   
[2K[2mInstalled [1m5 packages[0m [2min 11ms[0m[0m                                [0m
 [32m+[39m [1mgoogle-auth[0m[2m==2.39.0[0m
 [32m+[39m [1mgoogle-genai[0m[2m==1.11.0[0m
 [32m+[39m [1mpyasn1[0m[2m==0.6.1[0m
 [32m+[39m [1mpyasn1-modules[0m[2m==0.4.2[0m
 [32m+[39m [1mrsa[0m[2m==4.9.1[0m


In [107]:

import pathlib
import json
import os
import pandas as pd

from google import genai
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch

client = genai.Client(api_key="AIzaSyBVgdfzIIqoHU9-AcjJf5xoZ1d_PQkqnxE")

google_search_tool = Tool(
    google_search=GoogleSearch()
)

crops = pd.read_csv("apps/farmbase/data/kalro/crops.csv")
# filter_ = ((crops.crop == 'Maize') & (crops.variety == 'DH01'))
# filter_ = ((crops.crop == 'Macadamia') & (crops.variety == 'EMB-1'))
filter_ = (crops.crop != 'Maize')
# filter_ = ()

# print(crops[filter_])
for category, crop, variety in crops[filter_].itertuples(index=False):
    print(f"{category}, {crop}, {variety}")

    variety_sanitized = variety.replace("/", " ")
    filename = f"apps/farmbase/data/varieties/{crop}/{variety_sanitized}.json"

    if os.path.exists(filename):
        continue

    # filepath = pathlib.Path('apps/farmbase/data/kalro/NATIONAL CROP VARIETY LIST 2025 EDITION.pdf')

    prompt = f"""You are a helpful agronomic advisor. Please provide a report on the {crop} crop variety {variety}.
                 - Focus on the agronomic characteristics of the variety such as disease resistance,
                 pest resistance, optimal growing conditions, yield potential, maturity period and any
                 other special characteristics of the variety.
                 - Only include information specific to the variety.
                 - Do not include general agronomic information about growing the crop.
                 """

    response = client.models.generate_content(
        model="gemini-2.0-flash",

        contents=[
            # Part.from_bytes(
            #     data=filepath.read_bytes(),
            #     mime_type='application/pdf',
            # ),
            prompt],
        config=GenerateContentConfig(
            tools=[google_search_tool],
            response_modalities=["TEXT"],
        )
    )
    # print(response.text)
    sources = []
    if response.candidates[0].grounding_metadata.grounding_chunks:
        sources = [(c.web.title, c.web.uri) for c in response.candidates[0].grounding_metadata.grounding_chunks]
    # print([s[0] for s in sources])
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, "w") as f:
        print(json.dumps(dict(output_text=response.text, sources=sources), indent=2), file=f)


Cereals, Barley, CERISE LAUREL
Cereals, Barley, BAHATI
Cereals, Barley, NGAO
Cereals, Barley, Bima
Cereals, Barley, KARNE
Cereals, Barley, PEBLICAN
Cereals, Barley, COCKTAIL
Cereals, Barley, GRACE
Cereals, Barley, ALICIANA
Cereals, Barley, QUENCH
Cereals, Barley, NFC TIPPLE
Cereals, Barley, Planet
Cereals, Barley, Sabini
Cereals, Barley, Tumaini
Cereals, Barley, Ahadi
Cereals, Finger millet, KAK-WIMBI 1
Cereals, Finger millet, KAK-WIMBI 2
Cereals, Finger millet, KAK-WIMBI 3
Cereals, Finger millet, Kat/FM I
Cereals, Finger millet, MSU FM 60D
Cereals, Finger millet, EUFM-401
Cereals, Finger millet, EUFM-503
Cereals, Finger millet, Snapping finger millet green
Cereals, Finger millet, P224
Cereals, Finger millet, MARIDADI
Cereals, Finger millet, EUFM-502
Cereals, Finger millet, Nakuru/FMI
Cereals, Sorghum, SC Smile
Cereals, Sorghum, ADV7450
Cereals, Sorghum, Kamani (KM 32-1)
Cereals, Sorghum, Gadam
Cereals, Sorghum, KARI 16. Mtama 2
Cereals, Sorghum, 2K x 17
Cereals, Sorghum, KS -Sorg2
Cer

In [106]:
 import pdfplumber
import pandas as pd
import re
import os
import zipfile

PDF_PATH    = "apps/farmbase/data/kalro/NATIONAL CROP VARIETY LIST 2025 EDITION.pdf"
OUT_DIR     = "csv_exports"
ZIP_ARCHIVE = "csv_exports.zip"

os.makedirs(OUT_DIR, exist_ok=True)

# 1) Catch lines like "1.  NATIONAL COFFEE VARIETY LIST"
heading_re = re.compile(r'^\s*\d+\.\s+NATIONAL\s+([A-Z][A-Za-z &]+?)\s+VARIETY\s+LIST', re.MULTILINE)

crop_tables   = {}
current_crop  = None
last_header   = None  # for carrying header row across split tables

with pdfplumber.open(PDF_PATH) as pdf:
    for page in pdf.pages:
        txt = page.extract_text() or ""
        # detect a new crop section
        m = heading_re.search(txt)
        if m:
            current_crop = m.group(1).strip().title()
            crop_tables.setdefault(current_crop, [])
            last_header = None

        if not current_crop:
            continue

        for raw in page.extract_tables() or []:
            # clean up \uf0a7 bullets and whitespace
            cleaned = [[re.sub(r'\s+', ' ', (cell or '').replace('\uf0a7','')).strip()
                        for cell in row] for row in raw]
            # drop empty rows
            cleaned = [r for r in cleaned if any(c for c in r)]
            if not cleaned:
                continue

            # first non‐empty row is header if we haven't yet set it
            if last_header is None:
                last_header = cleaned[0]
                data_rows  = cleaned[1:]
            else:
                # if this block’s first row equals the header, skip it
                if cleaned[0] == last_header:
                    data_rows = cleaned[1:]
                else:
                    # otherwise assume header carried over
                    data_rows = cleaned

            # turn into DataFrame
            df = pd.DataFrame(data_rows, columns=last_header)
            crop_tables[current_crop].append(df)

# 2) Write one CSV per crop, concat with sort=False to avoid column‐order conflicts
for crop, dfs in crop_tables.items():
    if not dfs:
        continue
    try:
        big = pd.concat(dfs, ignore_index=True, sort=False)
    except Exception as e:
        print(f"⚠️  Error concatenating {crop}: {e}")
        big = dfs[0].copy()

    big.dropna(how='all', inplace=True)
    fname = f"{crop.replace(' ', '_')}.csv"
    path  = os.path.join(OUT_DIR, fname)
    big.to_csv(path, index=False)
    print(f"✔ Wrote {fname} ({big.shape[0]} rows × {big.shape[1]} cols)")

# 3) Zip them up
with zipfile.ZipFile(ZIP_ARCHIVE, 'w', zipfile.ZIP_DEFLATED) as zf:
    for f in os.listdir(OUT_DIR):
        zf.write(os.path.join(OUT_DIR, f), f)

print(f"\nDone! CSVs in ./{OUT_DIR}/ and archive ./{ZIP_ARCHIVE}")

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def

ValueError: 9 columns passed, passed data had 8 columns