# OAS->requests

In [None]:
#| default_exp oas_to_requests

In [2]:
#| hide
from nbdev.showdoc import *

In [3]:
#| export
import requests
from pprint import pprint
import json
import yaml
import copy
import re

## OAS inspection

In [4]:
!wget https://tie.digitraffic.fi/swagger/openapi.json

--2024-11-10 13:20:07--  https://tie.digitraffic.fi/swagger/openapi.json
Resolving tie.digitraffic.fi (tie.digitraffic.fi)... 108.156.22.13, 108.156.22.84, 108.156.22.113, ...
Connecting to tie.digitraffic.fi (tie.digitraffic.fi)|108.156.22.13|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 183198 (179K) [application/json]
Saving to: ‘openapi.json.1’


2024-11-10 13:20:07 (1.39 MB/s) - ‘openapi.json.1’ saved [183198/183198]



In [5]:
BASE_URL = "https://tie.digitraffic.fi"

# Load and parse the OAS file
with open("openapi.json") as f:
    spec_dict = json.load(f)
# oas = OpenAPI.parse_obj(spec_dict)
oas = spec_dict
oas

{'openapi': '3.0.1',
 'info': {'title': 'Digitraffic Road API',
  'description': '[OpenAPI document](/swagger/openapi.json) \n\nDigitraffic is a service operated by the [Fintraffic](https://www.fintraffic.fi) offering real time traffic information. Currently the service covers *road, marine and rail* traffic. More information can be found at the [Digitraffic website](https://www.digitraffic.fi/) \n\nThe service has a public Google-group [road.digitraffic.fi](https://groups.google.com/forum/#!forum/roaddigitrafficfi) for communication between developers, service administrators and Fintraffic. The discussion in the forum is mostly in Finnish, but you\'re welcome to communicate in English too. \n\n### General notes of the API\n* Many Digitraffic APIs use GeoJSON as data format. Definition of the GeoJSON format can be found at https://tools.ietf.org/html/rfc7946.\n* For dates and times [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format is used with "Zulu" zero offset from UTC unless

### Deep reference extraction

In order to generate tool schemas, we need to resolve and flatten the references to `components`.

In [6]:
#| export
def extract_refs(
        oas: dict  # The OpenAPI schema
    ) -> dict:  # The extracted references (flattened)
    refs = copy.deepcopy(oas)
    refs_list = set()
    refs_locations = {}
    refs_dependencies = {}

    # Traverse the components section of the spec
    for section, items in refs["components"].items():
        for item in items:
            refs_list.add(f"components/{section}/{item}")
            refs_locations[f"components/{section}/{item}"] = []
            refs_dependencies[f"components/{section}/{item}"] = set()
    
    # Initialize the clean_refs set
    clean_refs = refs_list.copy()

    # Traverse the spec and extract the references
    def traverse_location(obj, path=""):
        for key, value in obj.items():
            if key == "$ref":
                # Determine the root of the reference and remove it from the clean_refs set
                ref_root = "/".join(path.split("/")[:3])
                clean_refs.discard(ref_root)

                # Extract the sub reference and add the current path to the list of locations
                sub_ref = value[2:]
                refs_locations[sub_ref].append(path)

                # Add the sub reference to the dependencies of the current reference
                refs_dependencies[ref_root].add(sub_ref)

            elif isinstance(value, dict):
                # Recursively traverse the object
                traverse_location(value, f"{path}/{key}")

    traverse_location(refs["components"], "components")

    # Attach the reference objects to the locations
    def attach_clean_refs():
        for ref in clean_refs:
            # Extract the reference object
            ref_obj = refs
            for part in ref.split("/"):
                ref_obj = ref_obj[part]

            # Extract the locations where the reference is used
            locations = refs_locations[ref]

            # Attach the reference object to the locations
            for location in locations:
                location_parts = location.split("/")

                obj = refs
                prev = None
                for part in location_parts:
                    prev = obj
                    obj = obj[part]

                prev[location_parts[-1]] = ref_obj

            # Remove the reference from the dependencies
            for dependency in refs_dependencies:
                refs_dependencies[dependency].discard(ref)

    # Check if there are any clean references
    def check_clean_refs():
        clean_refs = set()
        for ref, dependencies in refs_dependencies.items():
            if len(dependencies) == 0:
                clean_refs.add(ref)
        return clean_refs
    
    # Iterate until all references are attached or no progress is made
    prev_nof_clean = None
    while len(clean_refs) < len(refs_list) and prev_nof_clean != len(clean_refs):
        prev_nof_clean = len(clean_refs)
        attach_clean_refs()
        clean_refs = check_clean_refs()

    # Flatten the references
    flatten_refs = {}
    for section, items in refs["components"].items():
        for item in items:
            flatten_refs[f"#/components/{section}/{item}"] = refs["components"][section][item]

    return flatten_refs

In [7]:
refs = extract_refs(oas)
pprint(refs)

{'#/components/schemas/Address': {'properties': {'addressLines': {'items': {'$ref': '#/components/schemas/AddressLine'},
                                                                  'type': 'array',
                                                                  'xml': {'name': 'addressLine'}},
                                                 'city': {'properties': {'values': {'properties': {'values': {'items': {'properties': {'lang': {'type': 'string',
                                                                                                                                                'xml': {'attribute': True}},
                                                                                                                                       'value': {'type': 'string'}},
                                                                                                                        'type': 'object'},
                                                          

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



'15:30:00',
                                                                           'properties': {'hour': {'format': 'int32',
                                                                                                   'type': 'integer'},
                                                                                          'minute': {'format': 'int32',
                                                                                                     'type': 'integer'},
                                                                                          'nano': {'format': 'int32',
                                                                                                   'type': 'integer'},
                                                                                          'second': {'format': 'int32',
                                                                                                     'type': 'integer'}},
                               

In [8]:
show_doc(extract_refs)

---

[source](https://github.com/ninjalabo/llmcam/blob/main/llmcam/oas_to_requests.py#L15){target="_blank" style="float:right; font-size:smaller"}

### extract_refs

>      extract_refs (oas:dict)

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| oas | dict | The OpenAPI schema |
| **Returns** | **dict** | **The extracted references (flattened)** |

### Toolbox schema

Extract important information about the functions and creates a GPT-compatible toolbox schema. The idea is to convert all necessary information for generating an API request to a parameter for GPT to provide. As such, the parameters of each function in this toolbox schema will include:

- `url`: URL to send requests to (type `string`, with `const` default value formed with a base URL and endpoint path) 
- `method`: HTTP method for each endpoint (type `string`, with `const` value)
- `path`: dictionary for path parameters that maps parameter names to schema
- `query`: dictionary for query parameters that maps parameter names to schema
- `body`: request body schema

In [9]:
#| export
def toolbox_schema(
        base_url: str,  # The base URL of the API
        oas: dict,  # The OpenAPI schema
    ) -> dict:  # The toolbox schema
    """Form the toolbox schema from the OpenAPI schema."""
    
    # Extract the references
    flatten_refs = extract_refs(oas)
    
    # Initialize the toolbox
    toolbox = []

    # Traverse the paths section of the spec
    for path, methods in oas["paths"].items():
        for method, info in methods.items():
            # Extract the function name
            name = info["operationId"] if "operationId" in info else \
                f"{method}{path.replace('/', '_').replace('{', 'by').replace('}', '').replace('-', '_')}"
            name = re.sub(r'[^a-zA-Z0-9_-]', '_', name)

            # Extract the function description
            description = info["description"] if "description" in info else info.get("summary", "")

            # Extract the function parameters
            parameters = {
                "type": "object",

                # Initialize with the constant properties - path and method of endpoint
                "properties": {
                    "url": {"type": "string", "const": base_url + path},
                    "method": {"type": "string", "const": method},
                },

                # Initialize the required properties
                "required": ["url", "method"]
            }

            # Extract endpoint parameters
            if "parameters" in info:
                for param in info["parameters"]:
                    location = param.get("in", "query")
                    if location not in parameters["properties"]:
                        parameters["properties"][location] = {
                            "type": "object",
                            "properties": {}
                        }
                    
                    if "schema" in param:
                        if "$ref" in param["schema"]:
                            ref = param["schema"]["$ref"]
                            parameters["properties"][location]["properties"][param["name"]] = flatten_refs[ref]
                        else:
                            parameters["properties"][location]["properties"][param["name"]] = param["schema"]
                    else:
                        parameters["properties"][location]["properties"][param["name"]] = {
                        "type": param["type"]
                    }
                    if location == "path":
                        parameters["properties"][location].setdefault("required", []).append(param["name"])
            
            # Extract the function body
            body = {}
            if "requestBody" in info and "content" in info["requestBody"] \
                    and "application/json" in info["requestBody"]["content"] \
                    and "schema" in info["requestBody"]["content"]["application/json"]:
                body = info["requestBody"]["content"]["application/json"]["schema"]
                if "$ref" in body:
                    ref = body["$ref"]
                    body = flatten_refs[ref]
                parameters["properties"]["body"] = body
                
            # Conclude the function information
            function = {
                "name": name,
                "description": description,
                "parameters": parameters
            }

            # Add the function to the toolbox
            toolbox.append(
                {
                    "type": "function",
                    "function": function
                }
            )
        
    return toolbox

In [10]:
show_doc(toolbox_schema)

---

[source](https://github.com/ninjalabo/llmcam/blob/main/llmcam/oas_to_requests.py#L105){target="_blank" style="float:right; font-size:smaller"}

### toolbox_schema

>      toolbox_schema (base_url:str, oas:dict)

*Form the toolbox schema from the OpenAPI schema.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| base_url | str | The base URL of the API |
| oas | dict | The OpenAPI schema |
| **Returns** | **dict** | **The toolbox schema** |

In [11]:
tool_schema = toolbox_schema(BASE_URL, oas)
tool_schema

[{'type': 'function',
  'function': {'name': 'weatherDataHistory',
   'description': 'List the history of sensor values from the weather road station',
   'parameters': {'type': 'object',
    'properties': {'url': {'type': 'string',
      'const': 'https://tie.digitraffic.fi/api/beta/weather-history-data/{stationId}'},
     'method': {'type': 'string', 'const': 'get'},
     'path': {'type': 'object',
      'properties': {'stationId': {'type': 'integer', 'format': 'int64'}},
      'required': ['stationId']},
     'query': {'type': 'object',
      'properties': {'from': {'type': 'string', 'format': 'date-time'},
       'to': {'type': 'string', 'format': 'date-time'}}}},
    'required': ['url', 'method']}}},
 {'type': 'function',
  'function': {'name': 'weatherDataHistory_1',
   'description': 'List the history of sensor value from the weather road station',
   'parameters': {'type': 'object',
    'properties': {'url': {'type': 'string',
      'const': 'https://tie.digitraffic.fi/api/beta

## Executing requests with GPT

### Auxiliary function to generate requests

In [12]:
#| export
def generate_request(
    function_name: str,  # The name of the function
    url: str,  # The URL of the request
    method: str,  # The method of the request
    path: dict = {},  # The path parameters of the request
    query: dict = {},  # The query parameters of the request
    body: dict = {}  # The body of the request
) -> dict:  # The response of the request
    """Generate a request from the function name and parameters."""
    # Prepare the request
    headers = {
        "Content-Type": "application/json"
    }

    # Execute the request
    response = requests.request(
        method,
        url.format(**path),
        headers=headers,
        params=query,
        json=body if len(body) > 0 else None
    )

    # Return the response (either as JSON or text)
    try:
        return response.json()
    except:
        return {"message": response.text}

In [13]:
show_doc(generate_request)

---

[source](https://github.com/ninjalabo/llmcam/blob/main/llmcam/oas_to_requests.py#L193){target="_blank" style="float:right; font-size:smaller"}

### generate_request

>      generate_request (function_name:str, url:str, method:str, path:dict={},
>                        query:dict={}, body:dict={})

*Generate a request from the function name and parameters.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| function_name | str |  | The name of the function |
| url | str |  | The URL of the request |
| method | str |  | The method of the request |
| path | dict | {} | The path parameters of the request |
| query | dict | {} | The query parameters of the request |
| body | dict | {} | The body of the request |
| **Returns** | **dict** |  | **The response of the request** |

Test with usage from DigiTraffic:

In [14]:
generate_request(
    function_name="weathercamStations", 
    url="https://tie.digitraffic.fi/api/weathercam/v1/stations/{id}",
    method="GET",
    path={"id": "C01504"}
)

{'type': 'Feature',
 'id': 'C01504',
 'geometry': {'type': 'Point', 'coordinates': [24.235601, 60.536727, 0.0]},
 'properties': {'id': 'C01504',
  'name': 'vt2_Karkkila_Korpi',
  'cameraType': 'HIKVISION',
  'nearestWeatherStationId': 1052,
  'collectionStatus': 'GATHERING',
  'state': None,
  'dataUpdatedTime': '2024-11-10T03:27:00Z',
  'collectionInterval': 600,
  'names': {'fi': 'Tie 2 Karkkila, Kappeli',
   'sv': 'Väg 2 Högfors, Kappeli',
   'en': 'Road 2 Karkkila, Kappeli'},
  'roadAddress': {'roadNumber': 2,
   'roadSection': 13,
   'distanceFromRoadSectionStart': 3818,
   'carriageway': 'ONE_CARRIAGEWAY',
   'side': 'LEFT',
   'contractArea': '',
   'contractAreaCode': 344},
  'liviId': 'Livi1089298',
  'country': None,
  'startTime': '1995-06-01T00:00:00Z',
  'repairMaintenanceTime': None,
  'annualMaintenanceTime': None,
  'purpose': 'keli',
  'municipality': 'Karkkila',
  'municipalityCode': 224,
  'province': 'Uusimaa',
  'provinceCode': 1,
  'presets': [{'id': 'C0150401',
 

### GPT integration

In [15]:
import textwrap
from llmcam.fn_to_fc import complete
from openai import OpenAI

client = OpenAI()


In [16]:
SYSTEM_MESSAGE = """
You are a helpful assistant.
Respond to the following prompt by using function_call and then summarize actions.
Ask for clarification if a user request is ambiguous.
"""

# Maximum number of function calls allowed to prevent infinite or lengthy loops
MAX_CALLS = 5


def get_openai_response(functions, messages):
    return client.chat.completions.create(
        model="gpt-4",
        tools=functions,
        tool_choice="auto",  # "auto" means the model can pick between generating a message or calling a function.
        temperature=0,
        messages=messages,
    )


def process_user_instruction(functions, instruction):
    num_calls = 0
    messages = [
        {"content": SYSTEM_MESSAGE, "role": "system"},
        {"content": instruction, "role": "user"},
    ]

    while num_calls < MAX_CALLS:
        response = get_openai_response(functions, messages)
        message = response.choices[0].message
        print(message)
        try:
            print(f"\n>> Function call #: {num_calls + 1}\n")
            pp(message.tool_calls)
            messages.append(message)

            # For the sake of this example, we'll simply add a message to simulate success.
            # Normally, you'd want to call the function here, and append the results to messages.
            messages.append(
                {
                    "role": "tool",
                    "content": "success",
                    "tool_call_id": message.tool_calls[0].id,
                }
            )

            num_calls += 1
        except:
            print("\n>> Message:\n")
            print(message.content)
            break

    if num_calls >= MAX_CALLS:
        print(f"Reached max chained function calls: {MAX_CALLS}")


USER_INSTRUCTION = """
Instruction: Get the weather camera stations for the station with ID C01504.
"""

process_user_instruction(tool_schema, USER_INSTRUCTION)

BadRequestError: Error code: 400 - {'error': {'message': "Invalid schema for function 'forecastSections': False is not of type 'number'.", 'type': 'invalid_request_error', 'param': 'tools[27].function.parameters', 'code': 'invalid_function_parameters'}}

In [None]:
# Test the function
messages = [
    {
        "role":"system", 
        "content":"You are a helpful system administrator. Use the supplied tools to assist the user."    
    }
]
complete(
    messages, 
    "user", 
    "Can you get me information about the weather cameras stations?",
    tool_schema,
    aux_fn=generate_request
)

for message in messages:
    print(f">> {message['role'].capitalize()}:")
    try:
        print(textwrap.fill(message["content"], 100))
    except:
        print(message)

BadRequestError: Error code: 400 - {'error': {'message': "Invalid 'tools[7].function.name': string does not match pattern. Expected a string that matches the pattern '^[a-zA-Z0-9_-]+$'.", 'type': 'invalid_request_error', 'param': 'tools[7].function.name', 'code': 'invalid_value'}}

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()