In [1]:
# Configs and utility
import yaml 
import os
from datetime import datetime
from tqdm import tqdm 
import pandas as pd
import requests
import json
import uuid

# geospatial libraries
import geopandas as gpd
from shapely.geometry import Polygon, Point

# Deep learning library
from ultralytics import YOLO 
import cv2

# paths
import sys
sys.path.append("..")

# Custom functions
from src.utils import (
    pixel_to_gps,
    calculate_gsd,
    extract_gps_coordinates,
    extract_focal_length
)

In [2]:
def authenticate(portal_url, username, password):
    token_url = f"{portal_url}/sharing/rest/generateToken"
    payload = {
        "username": username,
        "password": password,
        "referer": "https://www.arcgis.com",
        "f": "json",
    }
    response = requests.post(token_url, data=payload)
    if response.status_code == 200:
        token = response.json().get("token")
        if token:
            return token
        else:
            raise Exception(
                "Failed to generate token: "
                + response.json().get("error", {}).get("message", "Unknown error")
            )
    else:
        response.raise_for_status()

In [3]:
def insert_features_to_arcgis(url, token, features):
    add_features_url = f"{url}/addFeatures"

    payload = {
        "features": json.dumps(features),
        "f": "json",
        "token": token,
        # "rollbackOnFailure": True,
    }

    response = requests.post(add_features_url, data=payload)
    return response.json()

In [4]:
def wrangling_geometry(coords):
    rings = []
    for coord in coords:
        ring = []
        for point in coord:
            point = list(point)
            ring.append(point)
        rings.append(ring)
    return {"rings": rings}

In [5]:
# Hardcoding the ID Vilnius drone camera parameters
SENSOR_WIDTH = 6.4
SENSOR_HEIGHT = 4.8

In [6]:
# Load environment variables from coffig file
current_dir = os.getcwd()

# Reading the configuration file
with open("configuration.yaml", "r") as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

# Extracting the YOLO model name
yolo_model_name = config["YOLO_MODEL"]
yolo_model_path = os.path.join("ml_models", yolo_model_name)
yolo_model = YOLO(yolo_model_path)

# Extracting the input connection string and container name
input_connection_string = config["INPUT_CONNECTION_STRING"]
input_container_name = config["INPUT_CONTAINER_NAME"]

portal_url = config["PORTAL_URL"]
username = config["USERNAME"]
password = config["PASSWORD"]
feature_service_url = config["URL"]

In [7]:
# Create input file paths
paths = [os.path.join(current_dir, "input", file) for file in os.listdir(os.path.join(current_dir, "input"))]

In [8]:
# Create placeholder for Sosnovskies
sosnovskies, probablities, classes = [], [], []

for path in tqdm(paths):
    # Reading and ploting the image
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Predicting
    results = yolo_model.predict(img, conf=0.01)
    segments = results[0].masks

    # Get probabilities and classes
    probs = results[0].boxes.conf
    mask_classes = results[0].boxes.cls


    if segments is not None: 
        # Gathering all the needed information for the Sosnovskies
        img_w, img_h = img.shape[1], img.shape[0]
        focal_length = extract_focal_length(path)
        latitude, longitude, altitude, relative_altitude = extract_gps_coordinates(path)

        gsd_h, gsd_v = calculate_gsd(
            relative_altitude,
            img_w,
            img_h,
            SENSOR_WIDTH, 
            SENSOR_HEIGHT, 
            focal_length
            )

        if gsd_h is None or gsd_v is None:
            print(f"Skipping image {path} due to invalid GSD values.")
            continue

        # Creating the gps coordinates
        polygon_idx = 0
        
        for mask, prob, mask_cls in zip(segments, probs, mask_classes):

            points = mask.xy[0]
            for point in points:
                sosnovskies.append(
                    (
                        f"{path}",
                        polygon_idx,
                        pixel_to_gps(
                            point[0], 
                            point[1], 
                            img.shape[1], 
                            img.shape[0], 
                            latitude, 
                            longitude, 
                            gsd_h, 
                            gsd_v, 
                            relative_altitude
                        )
                    )
                )

            # Adding an additional last point as the first point
            sosnovskies.append(
                (
                    f"{path}",
                    polygon_idx,
                    pixel_to_gps(
                        points[0][0], 
                        points[0][1], 
                        img.shape[1], 
                        img.shape[0], 
                        latitude, 
                        longitude, 
                        gsd_h, 
                        gsd_v, 
                        relative_altitude
                    )
                )
            )

            # Incrementing
            polygon_idx += 1
            
            probablities.append(round(float(prob), 2))
            classes.append(int(mask_cls))

    # Clearning the image
    del img 

  0%|          | 0/4 [00:00<?, ?it/s]


0: 480x640 3 sosnovskis, 521.8ms
Speed: 6.9ms preprocess, 521.8ms inference, 12.6ms postprocess per image at shape (1, 3, 480, 640)


 25%|██▌       | 1/4 [00:02<00:08,  2.82s/it]


0: 480x640 2 sosnovskis, 352.9ms
Speed: 5.2ms preprocess, 352.9ms inference, 3.2ms postprocess per image at shape (1, 3, 480, 640)


 50%|█████     | 2/4 [00:03<00:03,  1.76s/it]


0: 480x640 5 sosnovskis, 351.2ms
Speed: 2.4ms preprocess, 351.2ms inference, 4.8ms postprocess per image at shape (1, 3, 480, 640)


 75%|███████▌  | 3/4 [00:04<00:01,  1.22s/it]


0: 480x640 (no detections), 381.7ms
Speed: 3.4ms preprocess, 381.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)


100%|██████████| 4/4 [00:05<00:00,  1.30s/it]


In [9]:
# Create dataframe from the Sosnovskies list
gps_points_df = pd.DataFrame(sosnovskies, columns=["image_path", "polygon_idx", "gps_coords"])
gps_points_df['lat'] = gps_points_df['gps_coords'].apply(lambda x: x[0])
gps_points_df['lon'] = gps_points_df['gps_coords'].apply(lambda x: x[1])
gps_points_df.drop(columns=['gps_coords'], inplace=True)

In [10]:
# Grouping the data by image path and polygon index
gdf = gps_points_df.groupby(['image_path', 'polygon_idx']).agg(list).reset_index()

gdf["prediction_prob"] = probablities
gdf["species"] = classes

# Creating the geometry column
gdf["geometry"] = gdf.apply(
    lambda row: Polygon(
        [Point(xy) for xy in zip(row["lon"], row["lat"])]
    ),
    axis=1,
)

gdf["begin_lifespan_version"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")


In [11]:
# Create a geodataframe
gdf = gpd.GeoDataFrame(gdf, geometry="geometry", crs="EPSG:4326")
gdf = gdf.to_crs("EPSG:3346")
gdf["population_size"] = round(gdf.geometry.area, 2)
gdf['created_at'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
gdf['updated_at'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Create a unique ID for each prediction UUID
gdf['prediction_id'] = gdf.apply(lambda x: str(uuid.uuid4()), axis=1)
gdf["species"] = classes

In [12]:
# Drop unnecessary columns
gdf = gdf.drop(columns=["lat", "lon", "polygon_idx"])
gdf = gdf[gdf["population_size"] > 0.00]
gdf.reset_index(drop=True, inplace=True)
gdf.head()

Unnamed: 0,image_path,prediction_prob,species,geometry,begin_lifespan_version,population_size,created_at,updated_at,prediction_id
0,c:\Projects\sosnovski-identifier\notebooks\inp...,0.01,0,"POLYGON ((576079.209 6068075.69, 576079.18 606...",2025-04-24 13:23:09,2.48,2025-04-24 13:23:09,2025-04-24 13:23:09,dd789579-0ae6-4cb8-8636-7d5e12b69f01
1,c:\Projects\sosnovski-identifier\notebooks\inp...,0.01,0,"POLYGON ((576083.602 6068084.682, 576083.574 6...",2025-04-24 13:23:09,2.32,2025-04-24 13:23:09,2025-04-24 13:23:09,65b0ceee-ec5b-486b-a6d5-ce7c536a12da
2,c:\Projects\sosnovski-identifier\notebooks\inp...,0.01,0,"POLYGON ((576079.209 6068075.69, 576079.18 606...",2025-04-24 13:23:09,2.56,2025-04-24 13:23:09,2025-04-24 13:23:09,6c40bd16-bc19-4e86-be18-98b18a22c22f
3,c:\Projects\sosnovski-identifier\notebooks\inp...,0.01,0,"POLYGON ((589961.561 6099139.393, 589961.527 6...",2025-04-24 13:23:09,1.55,2025-04-24 13:23:09,2025-04-24 13:23:09,cde8e159-c01a-455e-b7a0-778890e3aed7
4,c:\Projects\sosnovski-identifier\notebooks\inp...,0.01,0,"POLYGON ((589980.474 6099139.349, 589980.328 6...",2025-04-24 13:23:09,22.58,2025-04-24 13:23:09,2025-04-24 13:23:09,6e715c4e-ea93-47ed-9090-4417078d864a


In [13]:
# Plotting the geodataframe
gdf.explore()

In [14]:
# Transform the GeoDataFrame to a dictionary format suitable for ArcGIS
geojson_data = gdf.to_geo_dict(drop_id=True)

In [15]:
# Get features from the GeoJSON data
features = geojson_data.get("features", [])

In [16]:
# Token authentication
token = authenticate(portal_url=portal_url, username=username, password=password)

In [17]:
# Wrangling the features for ArcGIS
agol_features = []
for feature in features:
    coords = feature["geometry"]["coordinates"]
    rings = wrangling_geometry(coords)
    agol_feature = {
        "geometry": rings,
        "attributes": feature["properties"],
    }
    agol_features.append(agol_feature)

In [19]:
insert_features_to_arcgis(feature_service_url, token, agol_features)

{'addResults': [{'objectId': 1,
   'uniqueId': 1,
   'globalId': None,
   'success': True},
  {'objectId': 2, 'uniqueId': 2, 'globalId': None, 'success': True},
  {'objectId': 3, 'uniqueId': 3, 'globalId': None, 'success': True},
  {'objectId': 4, 'uniqueId': 4, 'globalId': None, 'success': True},
  {'objectId': 5, 'uniqueId': 5, 'globalId': None, 'success': True},
  {'objectId': 6, 'uniqueId': 6, 'globalId': None, 'success': True},
  {'objectId': 7, 'uniqueId': 7, 'globalId': None, 'success': True},
  {'objectId': 8, 'uniqueId': 8, 'globalId': None, 'success': True},
  {'objectId': 9, 'uniqueId': 9, 'globalId': None, 'success': True}]}