In [66]:
import requests 
from typing import List, Dict, Any

url = 'https://nedlasting.geonorge.no/api/codelists/area/'
uuid = '24d7e9d1-87f6-45a0-b38e-3447f8d7f9a1'


def extract_names(results: List[Dict[str, Any]]) -> List[str]:
    names = list(map(lambda res: res['name'], results))
    return names

res = requests.get(f'{url}{uuid}')
# extract_names(res.json())

In [86]:
from pydantic import BaseModel, UUID4
from typing import List, Optional
from typing import Optional, Type
from operator import itemgetter

from langchain.schema import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI, OpenAI
from langchain_core.runnables import RunnableLambda, RunnableAssign
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from dotenv import load_dotenv
from langchain.tools import BaseTool, StructuredTool, tool
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import format_tool_to_openai_function
import json


load_dotenv('../../.env')


class AvailableFormatsAndProjectsionsInput(BaseModel):
    metadata_uuid: str = Field(
        description='Unique identifier for the item to download')
    area: str = Field(
        description="The 'kommune' or 'fylke' to be queried for available data formats and projections.")


class AvailableFormatsAndProjectionsTool(BaseTool):
    name = "available_formats_and_projections"
    description = "Useful for when you need to find available data formats and projections for a given geographical area."
    args_schema: Type[BaseModel] = AvailableFormatsAndProjectsionsInput

    def _run(self, metadata_uuid: str, area: str) -> str:
        """Use the tool."""
        res = requests.get(
            f'https://nedlasting.geonorge.no/api/codelists/area/{metadata_uuid}')
        data = res.json()

        for item in data:
            if item.get("name") == area:
                return item

        return 'Could not find formats and projections for provided area.'

    async def _arun(self, metadata_uuid: str, area: str) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("custom_search does not support async")


class Area(BaseModel):
    code: str
    name: str
    type: str


class Format(BaseModel):
    name: str


class Projection(BaseModel):
    code: str


class OrderLine(BaseModel):
    metadataUuid: UUID4
    areas: List[Area]
    formats: List[Format]
    projections: List[Projection]


class DownloadGeonorgeDatasetInput(BaseModel):
    orderLines: List[OrderLine] = Field(
        description="Information necessary for download.")


class DownloadGeonorgeDatasetTool(BaseTool):
    name = "download_geonorge_dataset"
    description = "Useful when you need to download a dataset from Geonorge."
    args_schema: Type[BaseModel] = DownloadGeonorgeDatasetInput

    def _run(self, orderLines: List[OrderLine]) -> str:
        """Use the tool."""
        res = requests.post(
            f'https://nedlasting.geonorge.no/api/order', json=orderLines)
        data = res.json()
        for file in data['files']:
            url = file['downloadUrl']
            filename = file['name']
            download_file(url, filename)


    async def _arun(self, orderLines: List[OrderLine]) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("custom_search does not support async")


prompt1 = ChatPromptTemplate.from_template("""Dataset UUID: {uuid}
                                           
These are the available areas to choose from: 
                                           
{available_areas}
                                           
Here is the area(s) requested by the user: 
                                           
{requested_areas}
                                           
Select corresponding area(s) from the available ones. 
""")


formats_and_projections_model = ChatOpenAI(
    model="gpt-3.5-turbo-1106",
    temperature=0,
    streaming=True,
    model_kwargs={
        'tools': [{'type': 'function', 'function': format_tool_to_openai_function(
            AvailableFormatsAndProjectionsTool())}],
        'tool_choice': AvailableFormatsAndProjectionsTool.name
    }
)

downloading_model = ChatOpenAI(
    model="gpt-3.5-turbo-1106",
    temperature=0,
    streaming=True,
    model_kwargs={
        'tools': [{'type': 'function', 'function': format_tool_to_openai_function(
            DownloadGeonorgeDatasetTool())}],
        'tool_choice': DownloadGeonorgeDatasetTool.name
    }
)

data = res.json()
names = extract_names(data)


def call_formats_and_projections_tool(message):
    kwargs = message.additional_kwargs['tool_calls'][0]['function']['arguments']
    return AvailableFormatsAndProjectionsTool()._run(**json.loads(kwargs))


def call_download_tool(message):
    kwargs = message.additional_kwargs['tool_calls'][0]['function']['arguments']
    return DownloadGeonorgeDatasetTool()._run(json.loads(kwargs))


def download_file(url, filename):
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(filename, 'wb') as file:
            for chunk in response.iter_content(chunk_size=128):
                file.write(chunk)
        print(f"Downloaded file: {filename}")
    else:
        print(f"Failed to download file: status code {response.status_code}")

prompt2 = ChatPromptTemplate.from_template("""Here are available formats and projections that can be used for download: 
                                           
{result}
                                           
You are only allowed to use ONE format; it can be picked at random. GML format is most preferable. 
                                           
The UUID for the dataset is: {uuid}
""")

chain = (
    prompt1
    | formats_and_projections_model
    | {
        'result': RunnableLambda(lambda msg: call_formats_and_projections_tool(msg)),
        'uuid': RunnableLambda(lambda _: uuid)
    }
    | prompt2
    | downloading_model
    | RunnableLambda(lambda msg: call_download_tool(msg))
    
)

chain.invoke({'uuid': uuid, 'available_areas': names,
              'requested_areas': ['ozlo']})

{'referenceNumber': '454bdd30-b8b0-4595-8481-bd5d458292d7', 'files': [{'downloadUrl': 'https://nedlasting.geonorge.no/api/download/order/454bdd30-b8b0-4595-8481-bd5d458292d7/6afde85b-0e1a-4522-9529-dc087163d502', 'name': 'Basisdata_03_Oslo_25832_MatrikkelenBygning_GML.zip', 'fileId': '6afde85b-0e1a-4522-9529-dc087163d502', 'metadataUuid': '24d7e9d1-87f6-45a0-b38e-3447f8d7f9a1', 'area': '03', 'projection': '25832', 'format': 'GML 3.2.1', 'status': 'ReadyForDownload', 'metadataName': 'MatrikkelenBygning', 'areaName': 'Oslo', 'projectionName': 'EUREF89 UTM sone 32, 2d'}], 'orderDate': '2024-01-18T15:54:57.3761629+01:00', 'downloadAsBundle': False, '_links': [{'href': 'https://nedlasting.geonorge.no/api/order/454bdd30-b8b0-4595-8481-bd5d458292d7', 'rel': 'self', 'templatedSpecified': False}, {'href': 'https://nedlasting.geonorge.no/api/order/454bdd30-b8b0-4595-8481-bd5d458292d7', 'rel': 'http://rel.geonorge.no/download/order/bundle', 'templatedSpecified': False}]}
Downloaded file: Basisdat

In [68]:
ya = requests.post(
    f'https://nedlasting.geonorge.no/api/order', json={
        "orderLines": [
            {
                "metadataUuid": "24d7e9d1-87f6-45a0-b38e-3447f8d7f9a1",
                "areas": [
                    {
                        "code": "03",
                        "name": "Oslo",
                        "type": "fylke"
                    }
                ]
            }
        ]
    })

ya.json()

{'referenceNumber': '4b26f52a-356c-4f9c-9316-e13c9a7efc72',
 'files': [{'downloadUrl': 'https://nedlasting.geonorge.no/api/download/order/4b26f52a-356c-4f9c-9316-e13c9a7efc72/04ecdfe5-883a-4d24-9b41-e95429955f17',
   'name': 'Basisdata_03_Oslo_25832_MatrikkelenBygning_GML.zip',
   'fileId': '04ecdfe5-883a-4d24-9b41-e95429955f17',
   'metadataUuid': '24d7e9d1-87f6-45a0-b38e-3447f8d7f9a1',
   'area': '03',
   'projection': '25832',
   'format': 'GML 3.2.1',
   'status': 'ReadyForDownload',
   'metadataName': 'MatrikkelenBygning',
   'areaName': 'Oslo',
   'projectionName': 'EUREF89 UTM sone 32, 2d'},
  {'downloadUrl': 'https://nedlasting.geonorge.no/api/download/order/4b26f52a-356c-4f9c-9316-e13c9a7efc72/1c41566a-acd7-43c6-aa5e-32cbd99340d7',
   'name': 'Basisdata_03_Oslo_25833_MatrikkelenBygning_GML.zip',
   'fileId': '1c41566a-acd7-43c6-aa5e-32cbd99340d7',
   'metadataUuid': '24d7e9d1-87f6-45a0-b38e-3447f8d7f9a1',
   'area': '03',
   'projection': '25833',
   'format': 'GML 3.2.1',
   

In [69]:
kw = json.loads(
    '{"metadata_uuid": "24d7e9d1-87f6-45a0-b38e-3447f8d7f9a1", "area": "Oslo"}')
fn = AvailableFormatsAndProjectionsTool()
# fn._run(**json.loads(kw))
kw['area']

'Oslo'

In [70]:
format_tool_to_openai_function(AvailableFormatsAndProjectionsTool())

{'name': 'available_formats_and_projections',
 'description': 'Useful for when you need to find available data formats and projections for a given geographical area.',
 'parameters': {'title': 'AvailableFormatsAndProjectsionsInput',
  'type': 'object',
  'properties': {'metadata_uuid': {'title': 'Metadata Uuid',
    'description': 'Unique identifier for the item to download',
    'type': 'string'},
   'area': {'title': 'Area',
    'description': "The 'kommune' or 'fylke' to be queried for available data formats and projections.",
    'type': 'string'}},
  'required': ['metadata_uuid', 'area']}}

In [71]:
from langchain_core.runnables import RunnableLambda


def get_array(x):
    return [i for i in range(x)]


def get_len(lst):
    print(lst)
    return len(lst)


# wrap the functions with RunnableLambda
chain = RunnableLambda(get_array) | RunnableLambda(get_len)
chain.invoke(10)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


10