# HRNet

#### Organizar archivo json a partir de xml de CVAT

In [None]:
import json
import numpy as np
from lxml import etree
from pathlib import Path
from tqdm import tqdm

# === CONFIGURA TUS RUTAS AQU√ç ===
xml_input_path = Path("/data/Datasets/Fish/2022/2022_2023_General.xml")
output_path = Path("/data/Datasets/Fish/2022/fisk_8kpts.json")
padding = 10  # p√≠xeles extra para las bboxes

# === PARSEAR XML ===
with open(xml_input_path, "rb") as f:
    tree = etree.parse(f)

images = tree.xpath("//image")

# === INICIALIZAR ESTRUCTURA COCO ===
coco_output = {
    "images": [],
    "annotations": [],
    "categories": [{
        "id": 1,
        "name": "fish",
        "keypoints": [str(i) for i in range(1, 9)],
        "skeleton": []
    }]
}

annotation_id = 1

def compute_bbox_from_keypoints(kpts, image_width, image_height, pad=10):
    points = np.array(kpts).reshape(-1, 3)
    visible = points[:, 2] > 0
    if not np.any(visible):
        return [0, 0, image_width, image_height]
    x_coords = points[visible, 0]
    y_coords = points[visible, 1]
    x_min = max(0, x_coords.min() - pad)
    y_min = max(0, y_coords.min() - pad)
    x_max = min(image_width, x_coords.max() + pad)
    y_max = min(image_height, y_coords.max() + pad)
    return [float(x_min), float(y_min), float(x_max - x_min), float(y_max - y_min)]

# === PROCESAR IM√ÅGENES Y ANOTACIONES ===
for img_elem in tqdm(images):
    img_id = int(img_elem.attrib["id"])
    img_name = img_elem.attrib["name"]
    width = int(img_elem.attrib["width"])
    height = int(img_elem.attrib["height"])

    coco_output["images"].append({
        "id": img_id,
        "file_name": img_name,
        "width": width,
        "height": height
    })

    kpts = [0] * 8 * 3
    num_keypoints = 0

    for point in img_elem.xpath("points"):
        label = point.attrib["label"]
        if label.isdigit() and 1 <= int(label) <= 8:
            idx = int(label) - 1
            x, y = map(float, point.attrib["points"].split(","))
            kpts[idx * 3: idx * 3 + 3] = [x, y, 2]
            num_keypoints += 1

    if num_keypoints > 0:
        bbox = compute_bbox_from_keypoints(kpts, width, height, padding)
        coco_output["annotations"].append({
            "id": annotation_id,
            "image_id": img_id,
            "category_id": 1,
            "keypoints": kpts,
            "num_keypoints": num_keypoints,
            "bbox": bbox,
            "area": bbox[2] * bbox[3],
            "iscrowd": 0
        })
        annotation_id += 1

# === GUARDAR JSON ===
with open(output_path, "w") as f:
    json.dump(coco_output, f, indent=2)

print(f"Listo: {output_path}")


#### Dividir datos: entrenamiento y validaci√≥n

In [None]:
import json
import random
from pathlib import Path

# Load full annotation file (Compressed body as example)
input_path = Path("/data/Datasets/Fish/2022/fisk_8kpts.json")
with input_path.open("r") as f:
    data = json.load(f)

# Shuffle and split
random.seed(42)
image_ids = list({img["id"] for img in data["images"]})
random.shuffle(image_ids)
split_index = int(0.9 * len(image_ids))
train_ids = set(image_ids[:split_index])
val_ids = set(image_ids[split_index:])

# Build separate image/annotation lists
def split_coco(data, selected_ids):
    return {
        "info": data.get("info", {}),
        "licenses": data.get("licenses", []),
        "images": [img for img in data["images"] if img["id"] in selected_ids],
        "annotations": [ann for ann in data["annotations"] if ann["image_id"] in selected_ids],
        "categories": data["categories"]
    }

train_data = split_coco(data, train_ids)
val_data = split_coco(data, val_ids)

# Save split files
train_path = "/data/Datasets/Fish/2022/fish8kpt_train.json"
val_path = "/data/Datasets/Fish/2022/fish8kpt_val.json"
with open(train_path, "w") as f:
    json.dump(train_data, f, indent=2)
with open(val_path, "w") as f:
    json.dump(val_data, f, indent=2)

train_path, val_path

# Training_HRNet

Modificar td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256_train.py en funci√≥n de :

1. N√∫mero de Keypoints
2. Otras modificaciones de parametros segun sea el caso: Faso 02

Nota: .py fue modificado unicamnete en le numero de KP. Hay m√©trcias de evaluacion como PCKaccuracy y AUC que no son relevantes. Asimismo, hay data augmentation. 

In [None]:
!python tools/train.py \
  /home/arumota_pupils/Josue/1PECES/MMPose/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256_train.py 
  
!export PYTHONPATH=$(pwd) && python tools/train.py configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256_train.py 

# Inferencia_HRNet

Inferencia considerando:

1. Una imagen
2. Varias im√°genes
3. Metrcias de evaluacion RSME, MAE, R2 ...

In [None]:
#1. Una imagen

!python demo/topdown_img_demo.py \
    /data/Datasets/Fish/FIB/FIB/inference/IMG_0129-Editar.jpg \
    configs/animal_2d_keypoint/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256.py \
    checkpoints/P3_fishes_hrnet_w32.pth \
    --out-img-root vis_results/ \
    --draw-heatmap

In [None]:
#2. Varias imagenes
!python demo/image_demo_hrnet.py \
  /data/Datasets/Fish/FIB/FIB/inference/IMG_0129-Editar.jpg \
  configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256.py \
  checkpoints/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256-76c3999f_20230519.pth \
  --out-file vis_results/fish_skeleton.jpg \
  --draw-heatmap \
  --show-kpt-idx \
  --device cuda


In [None]:
import os
import glob
import subprocess
import json

# Paths
input_folder = '/data/Datasets/Fish/2022/images/val_fish/'
output_folder = '/data/Datasets/Fish/2022/output_hrnet/'
config_path = 'configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256_train.py'
checkpoint_path = 'work_dirs/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256_train/epoch_600.pth'
merged_json_path = os.path.join(output_folder, 'all_keypoints_predicted.json')

os.makedirs(output_folder, exist_ok=True)

# Get image list
image_files = sorted(glob.glob(os.path.join(input_folder, '*.[jp][pn]g')))

# Acumulador para resultados
all_predictions = []

# Loop por imagen
for img_path in image_files:
    img_name = os.path.basename(img_path)
    out_base = os.path.join(output_folder, img_name)
    out_json = out_base.replace('.jpg', '_keypoints.json').replace('.png', '_keypoints.json')

    cmd = [
        "python", "demo/image_demo.py",
        img_path,
        config_path,
        checkpoint_path,
        "--out-file", out_base,
        "--draw-heatmap",
        "--show-kpt-idx"
    ]

    print(f"üîÅ Procesando {img_name}...")
    subprocess.run(cmd)

    # Cargar JSON individual generado por el demo
    if os.path.exists(out_json):
        with open(out_json, 'r') as f:
            pred = json.load(f)
            all_predictions.append(pred)
    else:
        print(f"‚ö†Ô∏è No se encontr√≥ JSON para {img_name}")

# Guardar todos en un √∫nico archivo
with open(merged_json_path, 'w') as f:
    json.dump(all_predictions, f, indent=4)

print(f"\n‚úÖ JSON consolidado guardado en: {merged_json_path}")


#### Metrcias de evaluacion HRNEt

In [None]:
import json
import numpy as np
import pandas as pd
from pathlib import Path

# Rutas de entrada
pred_file = Path("/data/Datasets/Fish/2022/output_hrnet/all_keypoints_predicted.json")
gt_file = Path("/data/Datasets/Fish/2022/fish8kpt_val.json")

# Cargar JSONs
with pred_file.open("r") as f:
    pred_data = json.load(f)

with gt_file.open("r") as f:
    gt_data = json.load(f)

# Indexar ground-truth por nombre de imagen
image_id_to_name = {img["id"]: img["file_name"] for img in gt_data["images"]}
annotations_by_name = {
    image_id_to_name[ann["image_id"]]: np.array(ann["keypoints"]).reshape(-1, 3)
    for ann in gt_data["annotations"]
}

# Armar dataset completo
rows = []
for pred_item in pred_data:
    name = pred_item["image"]
    gt_kpts = annotations_by_name.get(name)
    if gt_kpts is None:
        print(f"‚ö†Ô∏è Imagen {name} no est√° en GT.")
        continue

    for idx, pred_kpt in enumerate(pred_item["keypoints"]):
        gt_x, gt_y = gt_kpts[idx][:2]
        pred_x, pred_y = pred_kpt["x"], pred_kpt["y"]
        score = pred_kpt["score"]
        error = np.linalg.norm([gt_x - pred_x, gt_y - pred_y])

        rows.append({
            "image": name,
            "keypoint": pred_kpt["name"],
            "gt_x": gt_x,
            "gt_y": gt_y,
            "pred_x": pred_x,
            "pred_y": pred_y,
            "score": score,
            "euclidean_error": error
        })

# Guardar CSV
df_all = pd.DataFrame(rows)
df_all.to_csv("/data/Datasets/Fish/2022/hrnet/keypoints_detailed_comparison.csv", index=False)
print("‚úÖ Archivo generado: keypoints_detailed_comparison.csv")


In [None]:
import json
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Rutas
pred_file = Path("/data/Datasets/Fish/2022/output_hrnet/all_keypoints_predicted.json")
gt_file = Path("/data/Datasets/Fish/2022/fish8kpt_val.json")

# Cargar archivos
with pred_file.open("r") as f:
    pred_data = json.load(f)

with gt_file.open("r") as f:
    gt_data = json.load(f)

# Crear √≠ndice de anotaciones por imagen
image_id_to_name = {img['id']: img['file_name'] for img in gt_data['images']}
annotations_by_name = {
    image_id_to_name[ann['image_id']]: np.array(ann['keypoints']).reshape(-1, 3)[:, :2]
    for ann in gt_data['annotations']
}

# Comparaci√≥n punto a punto
rows = []
gt_all = []
pred_all = []

for item in pred_data:
    name = item["image"]
    pred_kpts = {kp["name"]: (kp["x"], kp["y"]) for kp in item["keypoints"]}
    gt_kpts = annotations_by_name.get(name)

    if gt_kpts is None:
        print(f"‚ö†Ô∏è Imagen {name} no encontrada en GT.")
        continue

    for idx, (gt_xy, (kpt_name, pred_xy)) in enumerate(zip(gt_kpts, pred_kpts.items())):
        error = np.linalg.norm(np.array(pred_xy) - np.array(gt_xy))
        rows.append({
            "image": name,
            "keypoint": kpt_name,
            "gt_x": gt_xy[0],
            "gt_y": gt_xy[1],
            "pred_x": pred_xy[0],
            "pred_y": pred_xy[1],
            "euclidean_error": error
        })
        gt_all.extend(gt_xy)
        pred_all.extend(pred_xy)

df = pd.DataFrame(rows)

# --- 1. M√©tricas globales por keypoint ---
trait_metrics = []
for kpt in df["keypoint"].unique():
    sub = df[df["keypoint"] == kpt]
    gt_xy = sub[["gt_x", "gt_y"]].values
    pred_xy = sub[["pred_x", "pred_y"]].values
    trait_metrics.append({
        "keypoint": kpt,
        "MAE": mean_absolute_error(gt_xy, pred_xy),
        "RMSE": np.sqrt(mean_squared_error(gt_xy, pred_xy)),
        "R2": r2_score(gt_xy, pred_xy)
    })
df_trait = pd.DataFrame(trait_metrics)
df_trait.to_csv("/data/Datasets/Fish/2022/hrnet/metrics_per_keypoint.csv", index=False)

# --- 2. M√©tricas por imagen ---
image_metrics = []
for img in df["image"].unique():
    sub = df[df["image"] == img]
    gt_xy = sub[["gt_x", "gt_y"]].values
    pred_xy = sub[["pred_x", "pred_y"]].values
    image_metrics.append({
        "image": img,
        "MAE": mean_absolute_error(gt_xy, pred_xy),
        "RMSE": np.sqrt(mean_squared_error(gt_xy, pred_xy)),
        "R2": r2_score(gt_xy, pred_xy)
    })
df_image = pd.DataFrame(image_metrics)
df_image.to_csv("/data/Datasets/Fish/2022/hrnet/metrics_per_image.csv", index=False)

# --- 3. M√©tricas por keypoint e imagen ---
trait_image_metrics = []
for (img, kpt), sub in df.groupby(["image", "keypoint"]):
    gt_xy = sub[["gt_x", "gt_y"]].values
    pred_xy = sub[["pred_x", "pred_y"]].values
    trait_image_metrics.append({
        "image": img,
        "keypoint": kpt,
        "MAE": mean_absolute_error(gt_xy, pred_xy),
        "RMSE": np.sqrt(mean_squared_error(gt_xy, pred_xy)),
        "R2": r2_score(gt_xy, pred_xy) if len(gt_xy) > 1 else None
    })
df_trait_image = pd.DataFrame(trait_image_metrics)
df_trait_image.to_csv("/data/Datasets/Fish/2022/hrnet/metrics_per_keypoint_image.csv", index=False)

print("‚úÖ ¬°Listo! Se guardaron los tres archivos CSV:")
print("- metrics_per_keypoint.csv")
print("- metrics_per_image.csv")
print("- metrics_per_keypoint_image.csv")

# end

In [None]:
!python demo/topdown_img_demo.py \
    /data/Datasets/Fish/FIB/FIB/inference/IMG_0129-Editar.jpg \
    configs/animal_2d_keypoint/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256.py \
    checkpoints/P3_fishes_hrnet_w32.pth \
    --out-img-root vis_results/ \
    --draw-heatmap

usage: topdown_demo_with_mmdet.py [-h] [--input INPUT] [--show]
                                  [--output-root OUTPUT_ROOT]
                                  [--save-predictions] [--device DEVICE]
                                  [--det-cat-id DET_CAT_ID]
                                  [--bbox-thr BBOX_THR] [--nms-thr NMS_THR]
                                  [--kpt-thr KPT_THR] [--draw-heatmap]
                                  [--show-kpt-idx]
                                  [--skeleton-style {mmpose,openpose}]
                                  [--radius RADIUS] [--thickness THICKNESS]
                                  [--show-interval SHOW_INTERVAL]
                                  [--alpha ALPHA] [--draw-bbox]
                                  det_config det_checkpoint pose_config
                                  pose_checkpoint
topdown_demo_with_mmdet.py: error: unrecognized arguments: --out-img-root


In [8]:
!python demo/image_demo_hrnet.py \
  /data/Datasets/Fish/FIB/FIB/inference/IMG_0129-Editar.jpg \
  configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256.py \
  checkpoints/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256-76c3999f_20230519.pth \
  --out-file vis_results/fish_skeleton.jpg \
  --draw-heatmap \
  --show-kpt-idx \
  --device cuda


Loads checkpoint by local backend from path: checkpoints/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256-76c3999f_20230519.pth


DEBUG: Loaded model.dataset_meta keys = dict_keys(['dataset_name', 'num_keypoints', 'keypoint_id2name', 'keypoint_name2id', 'upper_body_ids', 'lower_body_ids', 'flip_indices', 'flip_pairs', 'keypoint_colors', 'num_skeleton_links', 'skeleton_links', 'skeleton_link_colors', 'dataset_keypoint_weights', 'sigmas', 'CLASSES']) 


04/11 06:22:45 - mmengine - [4m[97mINFO[0m - The output image has been saved at vis_results/fish_skeleton.jpg


## Training

In [11]:
!python tools/train.py \
  /home/arumota_pupils/Josue/1PECES/MMPose/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256_train.py 
  
!export PYTHONPATH=$(pwd) && python tools/train.py configs/animal_2d_keypoint/topdown_heatmap/ak/td-hm_hrnet-w32_8xb32-300e_animalkingdom_P3_fish-256x256_train.py 

05/04 19:33:58 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 1043423584
    GPU 0: NVIDIA GeForce RTX 4090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 12.1, V12.1.105
    GCC: gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
    PyTorch: 2.2.1
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.3.2 (Git Hash 2dc95a2ad0841e29db8b22fbccaf3e5da7992b01)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,co

# VitPose

## Training script

Must be copied on a terminal without the ! mark

In [None]:
!cd /home/arumota_pupils/Josue/1PECES/MMPose/mmpose
    
!python tools/train.py \
    /home/arumota_pupils/Josue/1PECES/MMPose/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-fish9_8xb32-100etrain.py

05/06 09:59:52 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 819089920
    GPU 0: NVIDIA GeForce RTX 4090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 12.1, V12.1.105
    GCC: gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
    PyTorch: 2.2.1
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.3.2 (Git Hash 2dc95a2ad0841e29db8b22fbccaf3e5da7992b01)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 12.1
  - NVCC architecture flags: -gencode;arch=compute_50,cod

## Inference over weights

Inference over a single image

In [None]:
# 1. La imagen a inferir (Ruta)
# 2. Configuraci√≥n usada para entrenar (Ruta)
# 3. Ruta de los pesos

!python demo/image_demo.py \
    "/data/Datasets/Fish/2022/images/val_fish/DSC03012_10032023194229.jpg" \
    "configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-fish9_8xb32-100etrain.py" \
    "/data/Josue/weights/Fish/epoch_final_vit.pth" \
    --out-file "work_dirs/images/test_file.png" \
    --draw-heatmap \
    --show-kpt-idx

: 

Inference over a set of image definied over the input_folder path

In [None]:
import os
import glob

# Define paths
input_folder = '/data/Datasets/Fish/2022/images/val_fish/'
output_folder = '/data/Datasets/Fish/2022/output/'
config_path = 'configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-fish9_8xb32-100etrain.py'
checkpoint_path = '/data/Josue/weights/Fish/epoch_final_vit.pth'

# Create output directory if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Get all image files (jpg/png/jpeg)
image_files = sorted(glob.glob(os.path.join(input_folder, '*.[jp][pn]g')))

# Loop through and run demo script
for img_path in image_files:
    img_name = os.path.basename(img_path)
    out_file = os.path.join(output_folder, img_name)

    !python demo/image_demo.py \
        "{img_path}" \
        "{config_path}" \
        "{checkpoint_path}" \
        --out-file "{out_file}" \
        --draw-heatmap \
        --show-kpt-idx

In [None]:
# Generate Predictions
!python tools/test.py \
    configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-fish9_8xb32-100etrain.py \
    /data/Josue/weights/Fish/epoch_600.pth \
    --dump /data/Josue/weights/Fish/predictions.pkl

Metrcias

In [None]:
from mmpose.apis import inference_topdown, init_model
from mmengine.dataset import Compose
from mmcv import imread
import os
import glob
import json

: 

In [None]:
from mmpose.apis import inference_topdown, init_model
from mmengine.dataset import Compose
from mmcv import imread
import os
import glob
import json

# --- CONFIGURACIONES ---
config_path = 'configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-fish9_8xb32-100etrain.py'
checkpoint_path = '/data/Josue/weights/Fish/epoch_final_vit.pth'
input_folder = '/data/Datasets/Fish/2022/images/val_fish/'
output_json = '/data/Datasets/Fish/2022/predicted_keypoints.json'

# --- INICIALIZAR MODELO ---
model = init_model(config_path, checkpoint_path, device='cuda:0')

# --- PROCESAR TODAS LAS IM√ÅGENES ---
image_files = sorted(glob.glob(os.path.join(input_folder, '*.[jp][pn]g')))
results = {}

for img_path in image_files:
    img = imread(img_path)
    h, w, _ = img.shape
    detection_result = [{'bbox': [0, 0, w, h]}]  # Asume una caja por imagen

    pose_results = inference_topdown(model, img, detection_result)

    if pose_results:
        keypoints = pose_results[0]['keypoints']  # (num_kpts, 3)
        results[os.path.basename(img_path)] = keypoints.tolist()

# --- GUARDAR ---
with open(output_json, 'w') as f:
    json.dump(results, f, indent=4)

print(f"Keypoints guardados en: {output_json}")


In [None]:
import json
import random
import numpy as np
import matplotlib.pyplot as plt
from mmengine import load
from PIL import Image
from pathlib import Path

# Rutas a archivos
val_json_path = Path("/data/Datasets/Fish/2022/fish8kpt_val.json")
results_path = Path("/data/Josue/weights/Fish/predictions.pkl")

# Cargar anotaciones COCO y resultados
with open(val_json_path, "r") as f:
    val_data = json.load(f)
results = load(results_path)

# Mapeo de image_id a nombre de archivo
id_to_filename = {img["id"]: img["file_name"] for img in val_data["images"]}
image_dir = val_json_path.parent / "images"

# Escoger una muestra aleatoria
sample = random.choice(results)
img_id = sample.metainfo['img_id']  # ‚úÖ acceder al atributo correctamente
img_name = id_to_filename[img_id]
img_path = image_dir / img_name
img = Image.open(img_path)

# Obtener keypoints de la primera instancia
kpts = sample.pred_instances.keypoints

# Visualizar
plt.figure(figsize=(8, 6))
plt.imshow(img)
plt.axis("off")

for x, y in kpts[0]:
    if 0 <= x <= img.width and 0 <= y <= img.height:
        plt.scatter(x, y, c='red', s=40)
        plt.text(x + 2, y + 2, f"{int(x)},{int(y)}", fontsize=8, color='yellow')

plt.title(f"Predicted Keypoints ‚Äì {img_name}")
plt.show()


In [None]:
import json
import numpy as np
from lxml import etree
from pathlib import Path
from tqdm import tqdm

# === CONFIGURA TUS RUTAS AQU√ç ===
xml_input_path = Path("/data/Datasets/Fish/2022/2022_2023_General.xml")
output_path = Path("/data/Datasets/Fish/2022/fisk_8kpts.json")
padding = 10  # p√≠xeles extra para las bboxes

# === PARSEAR XML ===
with open(xml_input_path, "rb") as f:
    tree = etree.parse(f)

images = tree.xpath("//image")

# === INICIALIZAR ESTRUCTURA COCO ===
coco_output = {
    "images": [],
    "annotations": [],
    "categories": [{
        "id": 1,
        "name": "fish",
        "keypoints": [str(i) for i in range(1, 9)],
        "skeleton": []
    }]
}

annotation_id = 1

def compute_bbox_from_keypoints(kpts, image_width, image_height, pad=10):
    points = np.array(kpts).reshape(-1, 3)
    visible = points[:, 2] > 0
    if not np.any(visible):
        return [0, 0, image_width, image_height]
    x_coords = points[visible, 0]
    y_coords = points[visible, 1]
    x_min = max(0, x_coords.min() - pad)
    y_min = max(0, y_coords.min() - pad)
    x_max = min(image_width, x_coords.max() + pad)
    y_max = min(image_height, y_coords.max() + pad)
    return [float(x_min), float(y_min), float(x_max - x_min), float(y_max - y_min)]

# === PROCESAR IM√ÅGENES Y ANOTACIONES ===
for img_elem in tqdm(images):
    img_id = int(img_elem.attrib["id"])
    img_name = img_elem.attrib["name"]
    width = int(img_elem.attrib["width"])
    height = int(img_elem.attrib["height"])

    coco_output["images"].append({
        "id": img_id,
        "file_name": img_name,
        "width": width,
        "height": height
    })

    kpts = [0] * 8 * 3
    num_keypoints = 0

    for point in img_elem.xpath("points"):
        label = point.attrib["label"]
        if label.isdigit() and 1 <= int(label) <= 8:
            idx = int(label) - 1
            x, y = map(float, point.attrib["points"].split(","))
            kpts[idx * 3: idx * 3 + 3] = [x, y, 2]
            num_keypoints += 1

    if num_keypoints > 0:
        bbox = compute_bbox_from_keypoints(kpts, width, height, padding)
        coco_output["annotations"].append({
            "id": annotation_id,
            "image_id": img_id,
            "category_id": 1,
            "keypoints": kpts,
            "num_keypoints": num_keypoints,
            "bbox": bbox,
            "area": bbox[2] * bbox[3],
            "iscrowd": 0
        })
        annotation_id += 1

# === GUARDAR JSON ===
with open(output_path, "w") as f:
    json.dump(coco_output, f, indent=2)

print(f"Listo: {output_path}")


In [None]:
import json
import random
import numpy as np
import matplotlib.pyplot as plt
from mmengine import load
from PIL import Image
from pathlib import Path

# Cargar el archivo val.json
val_json_path = Path("/data/Datasets/Fish/2022/fish8kpt_val.json")
with open(val_json_path, "r") as f:
    val_data = json.load(f)

# Cargar las predicciones
results = load("/data/Josue/weights/Fish/epoch_600.pth")  # Aseg√∫rate que este path es correcto o reempl√°zalo

# Mapear image_id a file_path
id_to_filename = {img["id"]: img["file_name"] for img in val_data["images"]}
image_dir = Path(val_json_path).parent  # /data/Datasets/Fish/2022

# Seleccionar una predicci√≥n aleatoria que est√© en el val set
val_ids = set(id_to_filename.keys())
sample = next(r for r in results if r.img_id in val_ids)

# Obtener keypoints predichos
kpts = sample.pred_instances.keypoints  # (K, 3)
img_path = image_dir / id_to_filename[sample.img_id]
img = Image.open(img_path)

# Mostrar imagen con keypoints
plt.figure(figsize=(8, 6))
plt.imshow(img)
plt.axis("off")

for x, y, v in kpts:
    if v > 0:
        plt.scatter(x, y, c='red', s=40)
        plt.text(x + 2, y + 2, f"{int(x)},{int(y)}", fontsize=8, color='yellow')

plt.title(f"Predicted Keypoints for Image ID: {sample.img_id}")
plt.show()


In [None]:
!python demo/image_demo.py \
  /data/Datasets/Fish/FIB/FIB/inference/IMG_0181-Editar.jpg \
  configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-fish9_8xb32-100etest.py \
  work_dirs/td-hm_ViTPose-fish9_8xb32-100etrain/epoch_600.pth \
  --out-file vis_results/vitpose_600epoch/fish_infer_0181.jpg \
  --draw-heatmap \
  --show-kpt-idx

# Formatting code

## Splitting into train/test with a 10 ratio

In [6]:
import json
import random
from pathlib import Path

# Load full annotation file (Compressed body as example)
input_path = Path("/data/Datasets/Fish/2022/fisk_8kpts.json")
with input_path.open("r") as f:
    data = json.load(f)

# Shuffle and split
random.seed(42)
image_ids = list({img["id"] for img in data["images"]})
random.shuffle(image_ids)
split_index = int(0.9 * len(image_ids))
train_ids = set(image_ids[:split_index])
val_ids = set(image_ids[split_index:])

# Build separate image/annotation lists
def split_coco(data, selected_ids):
    return {
        "info": data.get("info", {}),
        "licenses": data.get("licenses", []),
        "images": [img for img in data["images"] if img["id"] in selected_ids],
        "annotations": [ann for ann in data["annotations"] if ann["image_id"] in selected_ids],
        "categories": data["categories"]
    }

train_data = split_coco(data, train_ids)
val_data = split_coco(data, val_ids)

# Save split files
train_path = "/data/Datasets/Fish/2022/fish8kpt_train.json"
val_path = "/data/Datasets/Fish/2022/fish8kpt_val.json"
with open(train_path, "w") as f:
    json.dump(train_data, f, indent=2)
with open(val_path, "w") as f:
    json.dump(val_data, f, indent=2)

train_path, val_path

('/data/Datasets/Fish/2022/fish8kpt_train.json',
 '/data/Datasets/Fish/2022/fish8kpt_val.json')