# Detect animals, persons or vehicles in Camera Trap images

Tools: Pytorch-Wildlife

## Installation instructions:
1. Create conda environment 
```
conda create -n pytorch_wildlife python=3.8 -y 
conda activate pytorch_wildlife

```
2. Install using `pip install PytorchWildlife`

## Detection model: MegaDetector V6

MegaDetector repository: https://github.com/agentmorris/MegaDetector/tree/main

MegaDetector package: https://pypi.org/project/megadetector/

In [None]:
# conda create -n pytorch_wildlife python=3.8 -y
# conda activate pytorch_wildlife
# pip install PytorchWildlife
# python3 -m ipykernel install --user --name pytorch_wildlife --display-name "Python pytorch_wildlife" # genera un kernel a partir del ambiente
# Use the pytorch_wildlife environment kernel

In [None]:
# Image Detector PyTorchWildlife

# --- Import packages ---

import os
import numpy as np
import os
from PIL import Image
import torch
from torch.utils.data import DataLoader
from PytorchWildlife.models import detection as pw_detection
from PytorchWildlife.models import classification as pw_classification
from PytorchWildlife.data import transforms as pw_trans
from PytorchWildlife.data import datasets as pw_data 
from PytorchWildlife import utils as pw_utils
import wget
import pandas as pd
import supervision as sv
import time

Metricas de presición del modelo entrenado:

In [11]:
metrics = pd.read_csv('finetunning_files/PW_FT_classification/log/logs/Plain/Plain_Crop_res50_plain_101224_test/version_0/metrics.csv')
metrics[metrics['Custom-classification-epoch'] == 29.0]

Unnamed: 0,Custom-classification-epoch,Custom-classification-lr-SGD/pg1,Custom-classification-lr-SGD/pg2,Custom-classification-train_loss,Custom-classification-valid_mac_acc,Custom-classification-valid_mic_acc,step
314,29.0,,,0.411263,,,1429
316,29.0,,,0.271831,,,1439
318,29.0,,,0.296611,,,1449
320,29.0,,,0.279177,,,1459
322,29.0,,,0.530753,,,1469
323,29.0,,,,,53.271027,1469


Cargar clases incluidas en el modelo

In [5]:
class_names = {
    0: 'AVES',
    1: 'BASSSEISCUS_ASTUTUS',
    2: 'CONEPATUS_LEUCONOTUS',
    3: 'DIDELPHIS_VIRGINIATA',
    4: 'GANADO',
    5: 'LEOPARDU_wiedii',
    6: 'LYNX_RUFUS',
    7: 'MEPITIS_MACROURA',
    8: 'NASUA_NARICA',
    9: 'ODOCOILEUS_VIRGINIANUS',
    10: 'PECARI_TAJACU',
    11: 'PUMA_CONCOLOR',
    12: 'SCIURUS_OCOLATUS',
    13: 'SPILOGALE_GRACILIS',
    14: 'SYLVILAGUS_SP',
    15: 'UROCYON_CINEREORGENTEUS'
}

Inicializar el modelo de detección (MegaDetectorV6) y el modelo de clasificación que entrenamos.

In [7]:
# --- Model initialization ---

DEVICE = "cpu"  # Use "cuda" if GPU is available "cpu" if no GPU is available
detection_model = pw_detection.MegaDetectorV6(device=DEVICE, pretrained=True, version="yolov9c")
classification_model = pw_classification.CustomWeights(weights='finetunning_files/PW_FT_classification/weights/logs/Plain/Crop_res50_plain_101224_test-0-epoch=14-valid_mac_acc=52.63.ckpt', class_names=class_names, device=DEVICE)

Ultralytics 8.3.48 🚀 Python-3.8.20 torch-2.4.1+cu121 CPU (Intel Xeon E5-2697 v4 2.30GHz)
YOLOv9c summary (fused): 384 layers, 25,321,561 parameters, 0 gradients, 102.3 GFLOPs


### Identificación de lotes de imagenes

In [None]:
%timeit
# --- Batch image detection ---

tgt_folder_path = os.path.join("/mnt", "STORAGE", "csar", "pipo_images", "PUMA_CONCOLOR_2022")
results = detection_model.batch_image_detection(tgt_folder_path, batch_size=16)


/mnt/STORAGE/csar/pipo_images/PUMA_CONCOLOR_2022


  0%|                                                                                                                                                        | 0/2 [00:00<?, ?it/s]


0: 640x640 1 animal, 426.7ms
1: 640x640 1 animal, 426.7ms
2: 640x640 1 animal, 426.7ms
3: 640x640 1 animal, 426.7ms
4: 640x640 1 animal, 426.7ms
5: 640x640 1 animal, 426.7ms
6: 640x640 1 animal, 426.7ms
7: 640x640 1 animal, 426.7ms
8: 640x640 1 animal, 426.7ms
9: 640x640 1 animal, 426.7ms
10: 640x640 1 animal, 426.7ms
11: 640x640 1 animal, 426.7ms
12: 640x640 1 animal, 426.7ms
13: 640x640 2 animals, 426.7ms
14: 640x640 2 animals, 426.7ms
15: 640x640 1 animal, 426.7ms
Speed: 3.4ms preprocess, 426.7ms inference, 0.5ms postprocess per image at shape (16, 3, 640, 640)


 50%|████████████████████████████████████████████████████████████████████████                                                                        | 1/2 [00:17<00:17, 17.33s/it]


0: 512x640 1 animal, 328.3ms
1: 512x640 2 animals, 328.3ms
2: 512x640 1 animal, 328.3ms
3: 512x640 1 animal, 328.3ms
4: 512x640 1 animal, 328.3ms
5: 512x640 1 animal, 328.3ms
6: 512x640 1 animal, 328.3ms
7: 512x640 1 animal, 328.3ms
Speed: 2.7ms preprocess, 328.3ms inference, 1.1ms postprocess per image at shape (8, 3, 512, 640)



00%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:24<00:00, 12.44s/it]

In [None]:
pw_utils.save_detection_images(results, os.path.join("..","output","PUMA_CONCOLOR_2022"), overwrite=True)

### Detección y clasificación de videos

In [None]:
# Video detection
SOURCE_VIDEO_PATH = os.path.join("/mnt", "STORAGE", "csar", "pipo_images", "PUMA_CONCOLOR_2022", "IMG_0004 (2).MP4")
TARGET_VIDEO_PATH =  os.path.join("..","output","videos","PUMA_CONCOLOR_2022","IMG_0004 (2).MP4")
box_annotator = sv.BoxAnnotator(thickness=4)
lab_annotator = sv.LabelAnnotator(text_color=sv.Color.BLACK, text_thickness=4, text_scale=2)

In [None]:
# Función para detectar y clasificar frames en los videos.
def callback(frame: np.ndarray, index: int) -> np.ndarray:
    """
    Callback function to process each video frame for detection and classification.
    
    Parameters:
    - frame (np.ndarray): Video frame as a numpy array.
    - index (int): Frame index.
    
    Returns:
    annotated_frame (np.ndarray): Annotated video frame.

    The function performs the following steps:
    1. Detect objects in the frame using the detection model.
    2. For each detected object, crop the image and classify it using the classification model.
    3. Annotate the frame with detection and classification results.
    """
    
    results_det = detection_model.single_image_detection(frame, img_path=index)

    labels = []

    for xyxy in results_det["detections"].xyxy:
        cropped_image = sv.crop_image(image=frame, xyxy=xyxy)
        results_clf = classification_model.single_image_classification(cropped_image)
        labels.append("{} {:.2f}".format(results_clf["prediction"], results_clf["confidence"]))

    annotated_frame = lab_annotator.annotate(
        scene=box_annotator.annotate(
            scene=frame,
            detections=results_det["detections"],
        ),
        detections=results_det["detections"],
        labels=labels,
    )
    
    return annotated_frame 

In [None]:
tiempo_inicial = time.time()
pw_utils.process_video(source_path=SOURCE_VIDEO_PATH, target_path=TARGET_VIDEO_PATH, callback=callback, target_fps=10)
tiempo_final = time.time()
tiempo_exec = tiempo_final - tiempo_inicial
print("Execution time:", tiempo_exec, "seconds")