### Exercise 2 - AI Attachment Reviewer

This example will walk you through the following steps:

1. Downloading attachments from a Feature Service
2. Use a LLM to analyze attachments (images & PDFs) and provide a text summary for each
3. Write that summary back to the feature for review in an ArcGIS Manager Instant Application


#### Import the needed python libraries


In [None]:
import toml
import sys
import os
import shutil
import pandas as pd
from arcgis.gis import GIS
from langchain_openai import AzureChatOpenAI
from pydantic import BaseModel, Field
from getpass import getpass
from typing import Optional, Dict, List

parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))

# Add the parent directory to the Python path
sys.path.append(parent_dir)

from chains.image_extractor import ImageExtractChain
from chains.pdf_extractor import PDFExtractChain

print("done importing")

#### Connect to ArcGIS Online

You can choose to connect to an Organization that uses a named user account or Single Sign On. Below are three options:

---

OPTION 1. Connect to any ArcGIS Online Organization using a named user account.

OPTION 2. If you have an **Esri Federal** account, connect using Single Sign On.

OPTION 3. Connect to any ArcGIS Online Organization using a Single Sign On account.

---

**Please run only one of the next three code cells.**


OPTION 1: Connect to any ArcGIS Online Organization using a named user account.

- Simply log in with your username and password when prompted


In [None]:
username = input(prompt="Enter your Esri username: ")
password = getpass(prompt="Enter your Esri password: ")
gis = GIS("https://www.arcgis.com", username, password)

gis

OPTION 2. If you have an **Esri Federal** account, connect using Single Sign On.


In [None]:
## this is url to your Organization
org_url = "https://esrifederal.maps.arcgis.com"

# ## client_id is the application id of the app registered through the org
client_id = "VEFjlNUX3GINnELq"
gis = GIS(org_url, client_id=client_id)

gis

OPTION 3. Connect to any ArcGIS Online Organization using a Single Sign On account.

- Follow these instructions to create OAuth credentials in your ArcGIS Online account, https://developers.arcgis.com/documentation/security-and-authentication/app-authentication/tutorials/create-oauth-credentials-app-auth/.
- After creating, copy the Client Id and use it below in the `client_id` variable.


In [None]:
## this is url to your Organization. ex: https://myOrg.maps.arcgis.com
org_url = "<Your ArcGIS Online Organization's URL>"

# ## client_id is the application id of the app registered through the org
client_id = "<Your Client ID>"
gis = GIS(org_url, client_id=client_id)

gis

#### Clone existing feature service with attachments into your own ArcGIS Online account.


In [None]:
# clone over a feature service to your own content
attachment_fs_item = gis.content.get("ac062cbb932b478f96075b4ae326447c")

print(f"Cloning item: {attachment_fs_item.title}")
cloned_items = gis.content.clone_items(
    items=[attachment_fs_item], copy_data=True, search_existing_items=False
)

cloned_item_id = None
if len(cloned_items) > 0:
    cloned_item = cloned_items[0]
    cloned_item_id = cloned_item.id
    # build the url to the cloned feature service
    cloned_item_url = f"{gis.url}/home/item.html?id={cloned_item_id}"
    print(f"Successfully cloned item. Please review @ {cloned_item_url}.")

else:
    print("Failed to clone item")

#### Prepare the attachments download folder


In [None]:
# Get the item
item = gis.content.get(cloned_item_id)

feature_layer = item.layers[0]

# Create a directory to save attachments
output_dir = "attachments"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

#### Download Feature Service Attachments

This helpful section of code was pulled from Rami's excellent sample here: https://github.com/ralouta/ArcGIS_Code_Repo/blob/main/src/scripts/Feature%20Service%20Management/download_attachments_fs.ipynb.


In [None]:
# Function to download attachments
def download_attachments(feature_layer, output_dir):
    # Query all features
    features = feature_layer.query(where="1=1", out_fields="*").features

    for feature in features:
        object_id = feature.attributes[feature_layer.properties.objectIdField]
        globalid = feature.attributes[feature_layer.properties.globalIdField]
        attachments = feature_layer.attachments.get_list(object_id)

        for attachment in attachments:
            attachment_id = attachment["id"]
            attachment_name = attachment["name"]
            final_attachment_path = os.path.join(
                output_dir, f"{globalid}__{attachment_name}"
            )

            # Check if the attachment already exists
            if not os.path.exists(final_attachment_path):
                temp_dir = os.path.join(output_dir, f"temp_{object_id}")

                # Create a temporary directory to download the attachment
                if not os.path.exists(temp_dir):
                    os.makedirs(temp_dir)

                # Download the attachment to the temporary directory
                feature_layer.attachments.download(
                    oid=object_id, attachment_id=attachment_id, save_path=temp_dir
                )

                # Move the attachment from the temporary directory to the output directory with the new name
                temp_attachment_path = os.path.join(temp_dir, attachment_name)
                shutil.move(temp_attachment_path, final_attachment_path)

                # Remove the temporary directory
                shutil.rmtree(temp_dir)

                print(
                    f"Downloaded {attachment_name} for feature {object_id} as {final_attachment_path}"
                )
            else:
                print(
                    f"Attachment {attachment_name} for feature {object_id} already exists as {final_attachment_path}"
                )


# Download attachments
download_attachments(feature_layer, output_dir)
print("Attachments downloaded successfully")

#### Use a LLM to analyze the attachments


In [None]:
azure_config = toml.load("config.toml")["configs"][0]
llm = AzureChatOpenAI(
    openai_api_version=azure_config["api_version"],
    azure_deployment=azure_config["deployment_name"],
    api_key=azure_config["api_key"],
    azure_endpoint=azure_config["api_endpoint"],
    model=azure_config["model_name"],
    model_name=azure_config["model_name"],
    temperature=0,
)
llm.invoke("hi")

#### Setup the Output Model Structure


For all images, extract the following information:


In [None]:
class ImageInformation(BaseModel):
    """Information about an image."""

    title: str = Field(description="Title of the PDF document")
    image_description: str = Field(description="a short description of the image")
    point_measurements: list[float] = Field(
        description="list of the numbers of measurements in the map or graph or table"
    )
    location_point_measurements: list[str] = Field(
        description="list of the locations of the measurements in the map or graph or table"
    )
    point_measurements_description: str = Field(
        description="explaination of the point measurements"
    )
    average_measurements: float = Field(
        description="average measurement of data in the image"
    )
    average_measurements_description: str = Field(
        description="explaination of the average measurement calculation"
    )
    country: Optional[str] = Field(
        "", description="country where the picture was taken"
    )
    main_objects: list[str] = Field(
        description="list of the main objects on the picture"
    )

For all PDFs, extract the following information:


In [None]:
class GraphAttributes(BaseModel):
    """Attributes of a graph in the PDF document."""

    title: str = Field(description="Title of the graph")
    page_number: int = Field(description="Page number where the graph is located")
    description: Optional[str] = Field("", description="Description of the graph")


class PDFInformation(BaseModel):
    """Information about a PDF document."""

    title: str = Field(description="Title of the PDF document")
    author: Optional[str] = Field("", description="Author of the PDF document")
    number_of_pages: int = Field(description="Number of pages in the PDF document")
    number_of_graphs: int = Field(description="Number of graphs in the PDF document")
    graphs: Dict[str, GraphAttributes] = Field(description="Graphs in the PDF document")
    creation_date: Optional[str] = Field(
        "", description="Creation date of the PDF document"
    )
    modification_date: Optional[str] = Field(
        "", description="Last modification date of the PDF document"
    )
    keywords: List[str] = Field(
        description="List of keywords associated with the PDF document"
    )
    summary: str = Field(description="A short summary of the PDF document")
    main_topics: List[str] = Field(
        description="List of the main topics covered in the PDF document"
    )

#### Loop through each attachment and have the LLM analyze it and output the description to your feature service.


In [None]:
# Initialize dataframes to store results for pdf and image attachments
results_pdf = pd.DataFrame()
results_image = pd.DataFrame()

# Loop through attachments folder
for root, dirs, files in os.walk(output_dir):
    for file in files:
        global_id = file.split("__")[0]
        print("=====================================")
        print(f"Processing attachment for global id: {global_id}")

        file_path = os.path.join(root, file)

        attachment_description = None
        if file.upper().endswith(".PNG") or file.upper().endswith(".JPG"):
            print("analyzing image attachment ...")
            iec_chain = ImageExtractChain(
                model=llm, image_path=file_path, output_model=ImageInformation
            )
            result = iec_chain.extract_from_image()
            attributes = vars(result)
            attributes["globalid"] = global_id
            result_df = pd.DataFrame([attributes])
            results_image = pd.concat([results_image, result_df], ignore_index=True)
            attachment_description = iec_chain.format_for_attachment(result)
        else:
            print("analyzing pdf attachment ...")
            pdf_chain = PDFExtractChain(
                model=llm, pdf_path=file_path, output_model=PDFInformation
            )
            result = pdf_chain.extract_from_pdf()
            attributes = vars(result)
            attributes["globalid"] = global_id
            result_df = pd.DataFrame([attributes])
            results_pdf = pd.concat([results_pdf, result_df], ignore_index=True)
            attachment_description = pdf_chain.format_for_attachment(result)

        if attachment_description is None:
            continue

        feature = [
            {
                "attributes": {
                    feature_layer.properties.globalIdField: global_id,
                    "AIReview": attachment_description,
                }
            }
        ]

        print("Updating AI Review...")
        feature_layer.edit_features(
            use_global_ids=True,
            updates=feature,
        )

        print(f"Updated AI Review for global id: {global_id}")
        print("=====================================")
        print("")

print("done analyzing attachments")

#### View the output of the images in a pandas dataframe


In [None]:
results_image

#### View the output of the pdfs in a pandas dataframe


In [None]:
results_pdf