In [1]:
!pip install scikit-image

import cv2
import os
import pandas as pd
import numpy as np
from skimage.feature import hog, local_binary_pattern

# Configurações de caminhos
PATH_INPUT_CSV = '../data/processed/biometria_v1.csv'
PATH_IMAGES = '../data/raw/dataset_classificação/'
PATH_OUTPUT = '../data/processed/biometria_final.csv'

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [2]:
# 1. Razões Geométricas (Invariância de Escala)
def calcular_ratios(dist):
    # dist deve ser um array com as 28 distâncias euclidianas
    ratios = [
        dist[0] / (dist[1] + 1e-6),   # Comprimento vs Largura
        dist[5] / (dist[0] + 1e-6),   # Proporção da Garupa
        dist[10] / (dist[5] + 1e-6)   # Simetria Lateral
    ]
    return np.array(ratios)

# 2. HOG (Contorno Estrutural)
def extrair_hog(img_gray):
    img_resized = cv2.resize(img_gray, (128, 128))
    fd = hog(img_resized, orientations=9, pixels_per_cell=(16, 16),
             cells_per_block=(2, 2), visualize=False)
    return fd

# 3. LBP (Textura Local do Pelo)
def extrair_lbp(img_gray):
    img_resized = cv2.resize(img_gray, (128, 128))
    lbp = local_binary_pattern(img_resized, 8, 1, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 11), range=(0, 10))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7) # Normalização
    return hist

In [3]:
# Carregar o dataset do notebook 01
df_v1 = pd.read_csv(PATH_INPUT_CSV)
dataset_rows = []

# Identificar quais são as colunas de distância disponíveis
# Vamos buscar qualquer coluna que contenha 'dist' ou 'geo' no nome
colunas_distancia = [c for c in df_v1.columns if 'dist' in c.lower() or 'geo' in c.lower()]

# Se você sabe que são exatamente 28, mas os nomes são diferentes, 
# o código abaixo vai tentar pegar as 28 primeiras colunas técnicas.
if len(colunas_distancia) < 28:
    print(f"⚠️ Aviso: Encontrei apenas {len(colunas_distancia)} colunas de geometria.")
else:
    print(f"✅ Colunas de geometria detectadas: {len(colunas_distancia)}")

print(f"Processando {len(df_v1)} registros...")

for index, row in df_v1.iterrows():
    filename = row['file_name']
    cow_id = str(row['cow_id'])
    path_img = os.path.join(PATH_IMAGES, cow_id, filename)
    
    try:
        img = cv2.imread(path_img)
        if img is None:
            continue
            
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # --- AJUSTE DAS DISTÂNCIAS ---
        # Pegamos os valores das colunas de distância que existem no CSV
        dist_values = row[colunas_distancia].values.astype(float)
        
        # Se por acaso o array não tiver 28 elementos, preenchemos com 0 
        # para a função 'calcular_ratios' não quebrar
        dist_full = np.zeros(28)
        limit = min(len(dist_values), 28)
        dist_full[:limit] = dist_values[:limit]
        
        # 2. Calcular novas features
        feat_ratios = calcular_ratios(dist_full)
        feat_hog = extrair_hog(gray)
        feat_lbp = extrair_lbp(gray)
        mean_val = cv2.mean(img)[:3]
        
        # 3. Montar dicionário
        registro = {
            'cow_id': cow_id,
            'file_name': filename,
            'img_color_mean_b': mean_val[0],
            'img_color_mean_g': mean_val[1],
            'img_color_mean_r': mean_val[2]
        }
        
        # Salva as distâncias originais (com os nomes que vieram do V1)
        for i, col in enumerate(colunas_distancia):
            registro[col] = dist_values[i]
            
        # Adiciona ratios, HOG e LBP
        for i, v in enumerate(feat_ratios): registro[f'geo_ratio_{i}'] = v
        for i, v in enumerate(feat_hog):    registro[f'img_hog_{i}'] = v
        for i, v in enumerate(feat_lbp):    registro[f'img_lbp_{i}'] = v
        
        dataset_rows.append(registro)
        
    except Exception as e:
        print(f"❌ Erro no arquivo {filename}: {e}")
        continue

df_final = pd.DataFrame(dataset_rows)
df_final.to_csv(PATH_OUTPUT, index=False)
print(f"\n✅ Concluído! Shape: {df_final.shape}")

✅ Colunas de geometria detectadas: 32
Processando 1500 registros...

✅ Concluído! Shape: (1500, 1814)


In [4]:
df_final.head(10)

Unnamed: 0,cow_id,file_name,img_color_mean_b,img_color_mean_g,img_color_mean_r,geo_dist_0_1,geo_dist_0_2,geo_dist_0_3,geo_dist_0_4,geo_dist_0_5,...,img_lbp_0,img_lbp_1,img_lbp_2,img_lbp_3,img_lbp_4,img_lbp_5,img_lbp_6,img_lbp_7,img_lbp_8,img_lbp_9
0,1106,20260107_214903_baia23_VIPWX.jpg,67.603021,67.603021,67.603021,0.325471,0.765783,0.781461,0.828177,1.0,...,0.048645,0.080383,0.052917,0.115295,0.173645,0.141907,0.073547,0.082825,0.113464,0.117371
1,1106,RLC1_00_20260115063543_baia10_RLC1.jpg,47.726391,47.726391,47.726391,0.286742,0.755432,0.742035,0.803624,1.0,...,0.118347,0.102661,0.056274,0.064697,0.069275,0.065735,0.062866,0.099609,0.143677,0.216858
2,1106,RLC1_00_20260102062643_baia4_RLC1.jpg,75.212367,75.212367,75.212367,0.293877,0.786827,0.78572,0.838147,1.0,...,0.109375,0.098816,0.061523,0.074036,0.083374,0.075134,0.065002,0.098145,0.130005,0.20459
3,1106,20260101_065012_baia19_IPC2.jpg,67.955146,67.955146,67.955146,0.282255,0.751121,0.773503,0.817199,1.0,...,0.038391,0.066101,0.050293,0.119751,0.19342,0.15686,0.07428,0.082214,0.116943,0.101746
4,1106,20260105_150019_baia16_IPC1.jpg,109.388381,109.388381,109.388381,0.263838,0.747126,0.762859,0.80551,1.0,...,0.035828,0.070129,0.055115,0.122864,0.224792,0.160645,0.080383,0.071594,0.083862,0.094788
5,1106,20260106_064606_baia23_VIPWX.jpg,111.447377,111.447377,111.447377,0.297927,0.766633,0.775421,0.831913,1.0,...,0.046814,0.080139,0.055176,0.114746,0.206421,0.147644,0.07843,0.078247,0.077576,0.114807
6,1106,RLC1_00_20260115062941_baia10_RLC1.jpg,45.11545,45.11545,45.11545,0.305655,0.754468,0.738675,0.796426,1.0,...,0.120178,0.101074,0.056335,0.062927,0.065979,0.0672,0.062988,0.102051,0.144592,0.216675
7,1106,RLC3_00_20260113214502_baia6_RLC3.jpg,58.320431,58.320431,58.320431,0.316836,0.791402,0.793313,0.84843,1.0,...,0.102478,0.096985,0.056763,0.07489,0.089417,0.080017,0.07019,0.09906,0.129028,0.201172
8,1106,20260101_064610_baia19_IPC2.jpg,68.567581,68.567581,68.567581,0.301132,0.753302,0.768439,0.820876,1.0,...,0.035522,0.069397,0.049438,0.120483,0.201294,0.154297,0.077332,0.07666,0.113953,0.101624
9,1106,RLC2_00_20260107064727_baia8_RLC2.jpg,66.638115,66.638196,66.63794,0.297073,0.770586,0.771374,0.824313,1.0,...,0.102539,0.095581,0.05957,0.070374,0.089661,0.081848,0.067139,0.096497,0.135742,0.20105
