In [1]:
!pip install ultralytics

import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from ultralytics import YOLO
from scipy.spatial import distance
from tqdm import tqdm
from itertools import combinations
import os

# --- 1. Configura√ß√µes de Caminhos ---
MODEL_PATH = 'runs/pose/yolov8_cattle_pose/weights/best.pt'
DATASET_PATH = '../data/raw/dataset_classifica√ß√£o'
OUTPUT_DIR = '../data/processed'
OUTPUT_CSV = f'{OUTPUT_DIR}/biometria_v1.csv'

os.makedirs(OUTPUT_DIR, exist_ok=True)

# Carregar modelo treinado
model = YOLO(MODEL_PATH)

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [2]:
# --- 2. Fun√ß√µes Utilit√°rias de Geometria ---

def calcular_angulo_3p(p1, p2, p3):
    """Calcula o √¢ngulo interno no ponto p2 (Invariante √† rota√ß√£o)"""
    ba = p1 - p2
    bc = p3 - p2
    norm_ba = np.linalg.norm(ba)
    norm_bc = np.linalg.norm(bc)
    if norm_ba == 0 or norm_bc == 0: return 0
    cosine_angle = np.dot(ba, bc) / (norm_ba * norm_bc + 1e-6)
    return np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))

def extrair_geometria_completa(kp, ref_dist):
    """Gera 28 dist√¢ncias normalizadas + √¢ngulos + √°reas"""
    geo = {}
    
    # A. Todas as 28 dist√¢ncias poss√≠veis (C(8,2))
    # Normalizadas pela ref_dist para manter invari√¢ncia de escala
    for i, j in combinations(range(8), 2):
        geo[f'geo_dist_{i}_{j}'] = distance.euclidean(kp[i], kp[j]) / ref_dist
        
    # B. √Çngulos Estruturais
    geo['geo_angle_spine'] = calcular_angulo_3p(kp[0], kp[1], kp[5]) # Withers-Back-Tail
    geo['geo_angle_rump'] = calcular_angulo_3p(kp[2], kp[5], kp[3])  # Hook-Tail-Hook
    geo['geo_angle_pelvis'] = calcular_angulo_3p(kp[6], kp[5], kp[7]) # Pin-Tail-Pin
    
    # C. √Årea do Quadril (Normalizada)
    poly_hip = np.array([kp[2], kp[6], kp[7], kp[3]], dtype=np.int32)
    geo['geo_area_hip'] = cv2.contourArea(poly_hip) / (ref_dist**2)
    
    return geo

In [3]:
# --- 3. Loop Principal de Processamento ---

data_rows = []
pastas_animais = sorted([p for p in Path(DATASET_PATH).iterdir() if p.is_dir()])

for pasta in pastas_animais:
    print(f"üìÇ Processando ID: {pasta.name}")
    imagens = list(pasta.glob('*.jpg'))
    
    for img_p in tqdm(imagens):
        # Executar infer√™ncia de Pose
        results = model(img_p, verbose=False)[0]
        
        # Validar se o YOLO detectou os 8 keypoints necess√°rios
        if not results.keypoints or len(results.keypoints.xy[0]) < 8:
            continue
            
        kp = results.keypoints.xy[0].cpu().numpy()
        
        # Dist√¢ncia de Refer√™ncia (Withers [0] at√© Tail Head [5])
        # Essencial para normalizar o tamanho do animal independente da dist√¢ncia da c√¢mera
        ref_dist = distance.euclidean(kp[0], kp[5])
        if ref_dist < 1.0: 
            continue

        # Iniciar dicion√°rio da amostra
        feat = {
            'cow_id': pasta.name, 
            'file_name': img_p.name
        }

        # --- Extra√ß√£o Geom√©trica ---
        # Foco total na morfologia e propor√ß√µes corporais
        feat.update(extrair_geometria_completa(kp, ref_dist))
        
        data_rows.append(feat)

üìÇ Processando ID: 1106


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:02<00:00, 18.90it/s]


üìÇ Processando ID: 1122


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:00<00:00, 53.99it/s]


üìÇ Processando ID: 1221


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 41.14it/s]


üìÇ Processando ID: 1287


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 44.26it/s]


üìÇ Processando ID: 1288


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:00<00:00, 50.25it/s]


üìÇ Processando ID: 1294


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:00<00:00, 52.22it/s]


üìÇ Processando ID: 1321


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 44.20it/s]


üìÇ Processando ID: 1323


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 44.82it/s]


üìÇ Processando ID: 1325


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 40.92it/s]


üìÇ Processando ID: 1328


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 48.50it/s]


üìÇ Processando ID: 1351


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:00<00:00, 53.10it/s]


üìÇ Processando ID: 1356


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 40.02it/s]


üìÇ Processando ID: 1362


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 36.65it/s]


üìÇ Processando ID: 1377


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 47.58it/s]


üìÇ Processando ID: 1386


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 49.91it/s]


üìÇ Processando ID: 1391


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 49.12it/s]


üìÇ Processando ID: 1397


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 48.17it/s]


üìÇ Processando ID: 1399


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 44.53it/s]


üìÇ Processando ID: 1439


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 47.60it/s]


üìÇ Processando ID: 1440


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 45.53it/s]


üìÇ Processando ID: 1446


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 42.18it/s]


üìÇ Processando ID: 1456


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 34.78it/s]


üìÇ Processando ID: 1463


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 42.94it/s]


üìÇ Processando ID: 1464


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 40.14it/s]


üìÇ Processando ID: 1473


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 37.56it/s]


üìÇ Processando ID: 1491


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 46.93it/s]


üìÇ Processando ID: 1495


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 49.65it/s]


üìÇ Processando ID: 1531


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 37.94it/s]


üìÇ Processando ID: 1539


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:00<00:00, 54.68it/s]


üìÇ Processando ID: 1562


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:01<00:00, 38.39it/s]


In [4]:
# --- 4. Salvamento ---
df_final = pd.DataFrame(data_rows)
df_final.to_csv(OUTPUT_CSV, index=False)

print(f"\n‚úÖ Dataset Biom√©trico (Geometria) Finalizado!")
print(f"   - Total de imagens processadas: {len(df_final)}")
print(f"   - Total de Features Geom√©tricas: {df_final.shape[1] - 2}")
print(f"   - Arquivo salvo em: {OUTPUT_CSV}")


‚úÖ Dataset Biom√©trico (Geometria) Finalizado!
   - Total de imagens processadas: 1500
   - Total de Features Geom√©tricas: 32
   - Arquivo salvo em: ../data/processed/biometria_v1.csv


In [5]:
df_final.head(10)

Unnamed: 0,cow_id,file_name,geo_dist_0_1,geo_dist_0_2,geo_dist_0_3,geo_dist_0_4,geo_dist_0_5,geo_dist_0_6,geo_dist_0_7,geo_dist_1_2,...,geo_dist_4_5,geo_dist_4_6,geo_dist_4_7,geo_dist_5_6,geo_dist_5_7,geo_dist_6_7,geo_angle_spine,geo_angle_rump,geo_angle_pelvis,geo_area_hip
0,1106,20260107_214903_baia23_VIPWX.jpg,0.325471,0.765783,0.781461,0.828177,1.0,1.081689,1.056427,0.46601,...,0.172204,0.283435,0.231079,0.150356,0.077097,0.185962,177.307095,76.176303,105.077868,0.092166
1,1106,RLC1_00_20260115063543_baia10_RLC1.jpg,0.286742,0.755432,0.742035,0.803624,1.0,1.069528,1.070721,0.474801,...,0.201337,0.265996,0.31879,0.091218,0.159553,0.208808,175.472353,64.238606,109.722126,0.092245
2,1106,RLC1_00_20260102062643_baia4_RLC1.jpg,0.293877,0.786827,0.78572,0.838147,1.0,1.056137,1.049251,0.503778,...,0.162235,0.225187,0.233852,0.093027,0.109192,0.176119,178.546327,77.327432,120.92732,0.079485
3,1106,20260101_065012_baia19_IPC2.jpg,0.282255,0.751121,0.773503,0.817199,1.0,1.10284,1.085883,0.487965,...,0.184126,0.318477,0.271064,0.16595,0.10775,0.204564,177.725947,75.134301,94.324588,0.108833
4,1106,20260105_150019_baia16_IPC1.jpg,0.263838,0.747126,0.762859,0.80551,1.0,1.120284,1.094221,0.506401,...,0.19627,0.350954,0.293403,0.186023,0.12958,0.243645,175.495157,72.029399,99.512639,0.118681
5,1106,20260106_064606_baia23_VIPWX.jpg,0.297927,0.766633,0.775421,0.831913,1.0,1.060828,1.061913,0.489911,...,0.168332,0.252866,0.235516,0.123552,0.090349,0.178447,177.054327,75.906452,112.14368,0.090046
6,1106,RLC1_00_20260115062941_baia10_RLC1.jpg,0.305655,0.754468,0.738675,0.796426,1.0,1.073209,1.057477,0.451838,...,0.207956,0.277441,0.305044,0.101761,0.140343,0.205706,170.630959,63.158967,115.42599,0.090251
7,1106,RLC3_00_20260113214502_baia6_RLC3.jpg,0.316836,0.791402,0.793313,0.84843,1.0,1.058391,1.061165,0.492059,...,0.151731,0.224516,0.230555,0.110766,0.107737,0.18804,179.136559,77.833874,118.757941,0.079781
8,1106,20260101_064610_baia19_IPC2.jpg,0.301132,0.753302,0.768439,0.820876,1.0,1.090551,1.074713,0.480691,...,0.180208,0.302658,0.255091,0.157565,0.090158,0.186906,174.990848,75.18149,93.993602,0.103606
9,1106,RLC2_00_20260107064727_baia8_RLC2.jpg,0.297073,0.770586,0.771374,0.824313,1.0,1.05827,1.057291,0.491949,...,0.177292,0.242945,0.26252,0.114922,0.120798,0.210993,178.580889,76.00881,127.025364,0.093719
