In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, cohen_kappa_score
import os

import cv2
import matplotlib.pyplot as plt

from utils import plot_confusion_matrix


**Seteo parámetros, directorios y funciones**

In [2]:
# Paths
BASE_DIR = '../'
PATH_TO_TRAIN = os.path.join(BASE_DIR, "input/petfinder-adoption-prediction/train/train.csv")
PATH_TO_IMAGES_DIR = os.path.join(BASE_DIR, "input/petfinder-adoption-prediction/train_images")
PATH_TO_TEMP_FILES = os.path.join(BASE_DIR, "work/optuna_temp_artifacts")
PATH_TO_OPTUNA_ARTIFACTS = os.path.join(BASE_DIR, "work/optuna_artifacts")


SEED = 42
BATCH_SIZE = 50
TEST_SIZE = 0.2

# Armo el nuevo directorio de train
new_train_directory = os.path.join(BASE_DIR, 'work/train_images_classes')
os.makedirs(new_train_directory, exist_ok=True) # si ya existe el nombre, lo deja como está

# Armo el nuevo directorio de validación
new_val_directory = os.path.join(BASE_DIR, 'work/val_images_classes')
os.makedirs(new_val_directory, exist_ok=True)

# Definir las clases ordenadas
class_names = ['0', '1', '2', '3', '4']

# Mapear las etiquetas de las clases a números enteros consecutivos
class_to_idx = {class_name: i for i, class_name in enumerate(class_names)}



In [3]:

def visualize_pet(pet_id):
    path_to_image = os.path.join(PATH_TO_IMAGES_DIR, f'{pet_id}-1.jpg') # Irá a la primera imagen de la mascota
    # Cargar la imagen
    image_to_show = cv2.imread(path_to_image)
    # Convertir a formato RGB
    image_to_show = cv2.cvtColor(image_to_show, cv2.COLOR_BGR2RGB)
    # Visualizar la imagen
    plt.imshow(image_to_show)
    plt.axis('off')  # No mostrar los ejes
    plt.show()

def visualize_image(image):
    # Convierte la imagen a un formato de enteros (CV_8U)
    image = cv2.convertScaleAbs(image)
    image= cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Visualizar la imagen
    plt.imshow(image.astype(np.uint8))
    plt.axis('off')  # No mostrar los ejes
    plt.show()


**Cargo y Proceso Data**

Nota: Pytorch necesita que estén las imágenes en los distintos directorios según su clase y su participación en el training

In [4]:
# Cargo
train_df = pd.read_csv(PATH_TO_TRAIN)

# Split para validación
train_data, val_data = train_test_split(train_df,
                               test_size = TEST_SIZE,
                               random_state = SEED,
                               stratify = train_df.AdoptionSpeed)




In [None]:
# Add prediction column
val_data['pred'] = -1

# Loop through dataset
for idx, row in val_data.iterrows():
    # Display image
    img_path = f"../work/val_images_classes/{row['AdoptionSpeed']}/{row['PetID']}-1.jpg"
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(8,8))
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    
    # Get user input
    try:
        pred = int(input(f"Enter adoption speed prediction (0-4) for pet {row['PetID']}, or press Ctrl+C to stop: "))
        if pred not in range(5):
            print("\nStopping annotation...")
            break
        val_data.loc[idx, 'pred'] = pred
    except:
        print("\nStopping annotation...")
        break

mask = val_data['pred'] != -1
if mask.any():
    # Calculate and display Cohen's Kappa
    kappa = cohen_kappa_score(val_data.loc[mask, 'AdoptionSpeed'], 
                            val_data.loc[mask, 'pred'],
                            weights='quadratic')
    print(f"\nQuadratic Cohen's Kappa Score: {kappa:.3f}")
    
    # Plot confusion matrix
    display(plot_confusion_matrix(val_data.loc[mask, 'AdoptionSpeed'],
                       val_data.loc[mask, 'pred']))
else:
    print("\nNo predictions were made")

**Entreno**