In [62]:
import os
import sys
from pathlib import Path

import boto3
import pandas as pd
import requests
from mypy_boto3_s3 import S3Client

os.chdir("/Users/mrestrepo/git/_personal/proyecto-de-grado")

bucket = "cow-detect-maia"

key_pattern = "{region}/{flyover}/{file_name}"
local_base = Path("data/patches-512-ol-160-m01/train")
sample_size = 20
API_BASE_URL = "http://localhost:8000"

In [22]:
df = pd.read_csv("data/patches-512-ol-160-m01/train_points_gt.csv")

by_img = (
    df.groupby(["images", "labels"])
    .agg({"x": "count"})
    .rename(columns={"x": "count"})
    .sort_values("count", ascending=False)
)
by_img = by_img[by_img["count"] > 5]
by_img.shape

(325, 1)

In [63]:
by_img

Unnamed: 0_level_0,Unnamed: 1_level_0,count
images,labels,Unnamed: 2_level_1
L_09_05_16_DSC10034_95.JPG,2,22
L_09_05_16_DSC10034_96.JPG,2,22
L_07_05_16_DSC00414_33.JPG,2,22
L_11_05_16_DSC01346_104.JPG,2,20
L_11_05_16_DSC01346_103.JPG,2,19
...,...,...
0af7b1ea3a107e511353adbaba10c2e55a0bddf2_132.JPG,6,6
L_07_05_16_DSC00647_130.JPG,1,6
L_09_05_16_DSC10176_86.JPG,2,6
S_10_05_16_DSC00824_17.JPG,2,6


In [64]:
session = boto3.Session(region_name="us-east-1", profile_name="dvc-user")
s3: S3Client = session.client("s3")

In [65]:
by_img.sample(sample_size)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
images,labels,Unnamed: 2_level_1
L_07_05_16_DSC00414_50.JPG,2,10
L_07_05_16_DSC00646_11.JPG,1,7
L_11_05_16_DSC01397_28.JPG,3,8
192c864e5739ae7f51bd00a54598e38139405e7f_147.JPG,6,11
0bcb68603f1d04e6da66bac87e722b61cb2b6b69_95.JPG,6,10
045498edab1c4c373cb7eff56cdc4f57ab75e772_5.JPG,6,6
0df8cbe13350f5b20d9d4156577709895c315e85_8.JPG,6,6
L_11_05_16_DSC01346_104.JPG,2,20
272c2cae1374216920ef579815b1277c9b59f3f1_33.JPG,6,7
0df8cbe13350f5b20d9d4156577709895c315e85_24.JPG,6,8


In [67]:
import numpy as np

resp = requests.get(f"{API_BASE_URL}/app-info")
model_name = resp.json()["model_info"]["model_metadata"]["name"]
print(f"{model_name=}")
region = {"HerdNet": "Chad-HN", "FasterRCNNResNetFPN": "Chad-FRC"}[model_name]
print(f"{region=}")
np.random.seed(42)
for flyover in ["2025-06-30", "2025-07-31", "2025-08-31", "2025-09-30", "2025-10-31"]:
    sample = by_img.sample(sample_size).reset_index()
    for _, row in sample.iterrows():
        file_name = row["images"]
        print(f"{flyover=}, {file_name=}")
        key = key_pattern.format(region=region, flyover=flyover, file_name=row["images"])

        s3.upload_file(local_base / file_name, bucket, key)

        payload = {"s3_path": f"s3://{bucket}/{key}"}
        resp = requests.post(f"{API_BASE_URL}/predict", json=payload)
        print(resp.status_code)

model_name='HerdNet'
region='Chad-HN'
flyover='2025-06-30', file_name='079557329c366472fd970ee021e37abe2e44c7ca_34.JPG'
200
flyover='2025-06-30', file_name='S_10_05_16_DSC00472_171.JPG'
200
flyover='2025-06-30', file_name='S_09_05_16_DSC00412_25.JPG'
200
flyover='2025-06-30', file_name='L_09_05_16_DSC10034_76.JPG'
200
flyover='2025-06-30', file_name='146396c157f95fd6b6c113e5118e97546d69a431_72.JPG'
200
flyover='2025-06-30', file_name='0fd5ba32f00698a140e080008c2af81b2ae9fa5c_152.JPG'
200
flyover='2025-06-30', file_name='026ae7f5cddac223d6e364b7c16779b271cb3198_144.JPG'
200
flyover='2025-06-30', file_name='09e1e0ddb23a9c3e5ac62b552a856111b5f11942_119.JPG'
200
flyover='2025-06-30', file_name='L_11_05_16_DSC01386_100.JPG'
200
flyover='2025-06-30', file_name='L_09_05_16_DSC10034_24.JPG'
200
flyover='2025-06-30', file_name='14aa6dc307cfb871cedebf75bf6d7ec6df7a2c00_21.JPG'
200
flyover='2025-06-30', file_name='S_10_05_16_DSC00472_157.JPG'
200
flyover='2025-06-30', file_name='00f062e946ccdb29a