In [None]:
import os
import yaml
import json
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Any, TypedDict

from os import PathLike
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split


In [24]:
def load_path_config(path_to_config):
    with open(path_to_config, 'r') as f:
        path_config = yaml.safe_load(f)
    return {k: Path(v) for k, v in path_config.items()}


In [None]:
from megadetector.detection.run_detector import load_detector
from megadetector.detection.run_detector_batch import process_images, write_results_to_file

class MegaDetectorRunner:
    def __init__(
            self, 
            model_path: str | PathLike, 
            confidence: float = 0.25
            ):
        
        self.model = load_detector(str(model_path))
        self.confidence = confidence

    def run_on_images(
            self,
            images: list[PathLike],
            output_file_path: PathLike = None,
            ):

        results = process_images(
            im_files=images,
            detector=self.model,
            confidence_threshold=self.confidence,
            quiet=True
        )

        for r in results:
            r["file"] = r["file"].name
        
        if output_file_path is not None:
            with open(output_file_path, "w") as f:
                json.dump(results, f, indent=2)          


In [155]:
class MammaliaData(Dataset):
    def __init__(
            self,
            path_labelfiles: str | PathLike,
            path_to_dataset: str | PathLike,
            path_to_detector_output: str | PathLike,
            categories_to_drop: list[str] = None,
            apply_detector: str | bool = False,
            md_confidence: float = 0.25,
            
            is_pred: bool = False,
            ):
        super().__init__()

        path_labelfiles = Path(path_labelfiles)
        if not path_labelfiles.exists():
            raise ValueError("The path to the label files does not exist.")
        self.path_labelfiles = path_labelfiles
        
        path_to_dataset = Path(path_to_dataset)
        if not path_to_dataset.exists():
            raise ValueError("The path to the dataset does not exist.")
        self.path_to_dataset = path_to_dataset
        
        path_to_detector_output = Path(path_to_detector_output)
        if apply_detector is False:
            if not path_to_detector_output.exists():
                raise ValueError("The path to the detector output does not exist. Please fix or set apply_detector to True by selecting a model.")
        else:    
            if not path_to_detector_output.exists():
                os.makedirs(path_to_detector_output)
            elif any(path_to_detector_output.iterdir()):
                raise ValueError("The path to the detector output contains files. Please clear or choose a different path.")
        self.path_to_detector_output = path_to_detector_output
        
        self.labelfiles = self.getting_all_files_of_type(self.path_labelfiles, file_type='.csv')
        self.categories_to_drop = categories_to_drop if categories_to_drop is not None else []

        self.ds = self.reading_all_metadata(
            list_of_files=self.labelfiles, 
            categories_to_drop=self.categories_to_drop
            )

        self.ds_train, self.ds_test = train_test_split(
            self.ds,
            test_size=0.2,
            random_state=55,
            stratify=self.ds['label2'],
            )
        
        self.apply_detector = apply_detector
        self.md_confidence = md_confidence
        
        if self.apply_detector is not False:    
            self.run_detector()

    def getting_all_files_of_type(
            self, 
            path: str | PathLike, 
            file_type: str = None, 
            get_full_path: bool = True
            ) -> list[str]:
        
        path = Path(path)
        files = []
        for file in os.listdir(path):
            if file_type is None or file.endswith(file_type):
                if get_full_path:
                    files.append(path / file)
                else:
                    files.append(file)
        return files
    
    def reading_all_metadata(
            self,
            list_of_files: list[PathLike],
            categories_to_drop: list[str]
            ) -> pd.DataFrame:
        
        metadata = pd.DataFrame()
        for file in list_of_files:
            metadata = pd.concat([metadata, pd.read_csv(file)], ignore_index=True)
            metadata = metadata.dropna(subset=['label2'])
            metadata = metadata[~metadata['label2'].isin(categories_to_drop)]
        return metadata
    
    def get_all_images_of_sequence(
            self, 
            seq_id: int,
            full_path: bool = False
            )-> list[PathLike]:
        row = self.ds.loc[self.ds['seq_id'] == seq_id].squeeze()
        seq_path = Path(row['Directory'])
        all_files = row['all_files'].split(',')
        if full_path:
            return [self.path_to_dataset / seq_path / file for file in all_files]
        else:
            return all_files

    def run_detector(
            self,
            ) -> None:
        
        runner = MegaDetectorRunner(
            model_path=self.apply_detector,
            confidence=self.md_confidence
            )

        sequences = self.ds['seq_id'].unique().tolist()

        for seq_id in sequences:
            seq_images = self.get_all_images_of_sequence(seq_id, full_path=True)
            output_file_path = self.path_to_detector_output / f"{seq_id}.json"
            runner.run_on_images(
                images=seq_images,
                output_file_path=output_file_path
                )

    def __len__(self) -> int: # still to be implemented
        pass

    def __getitem__(self, index: int) -> Any: # still to be implemented
        pass


In [157]:
from PIL import Image
image = '/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_04/Testwoche1/KF07/IMG_0001.JPG'

img = Image.open(image)
width, height = img.size

print(f"Resolution: {width}x{height}")

Resolution: 2048x1536


In [146]:
runner = MegaDetectorRunner("MDV5A")

Fusing layers... 
Fusing layers... 


Bypassing download of already-downloaded file md_v5a.0.0.pt
Model v5a.0.0 available at /tmp/megadetector_models/md_v5a.0.0.pt
Bypassing imports for model type yolov5
Loading PT detector with compatibility mode classic


Model summary: 733 layers, 140054656 parameters, 0 gradients, 208.8 GFLOPs
Model summary: 733 layers, 140054656 parameters, 0 gradients, 208.8 GFLOPs


In [147]:
images = dataset.get_all_images_of_sequence(1002472, full_path=True)


In [149]:
runner.run_on_images(
    images=images,
    output_file_path=Path("/cfs/earth/scratch/kraftjul/BA/code/dev/1002472.json")
)

Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0187.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0188.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0189.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0190.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0191.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0192.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0193.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0194.JPG
Processing image

In [25]:
config_path = '/cfs/earth/scratch/kraftjul/BA/code/path_config.yml'
paths = load_path_config(config_path)

In [22]:
paths

{'dataset': PosixPath('/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset'),
 'labels': PosixPath('/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/info/labels'),
 'output': PosixPath('/cfs/earth/scratch/kraftjul/BA/output')}

In [156]:
dataset = MammaliaData(
    path_to_dataset=paths['dataset'],
    path_labelfiles='/cfs/earth/scratch/kraftjul/BA/output/test_set/',
    path_to_detector_output='/cfs/earth/scratch/kraftjul/BA/output/test_MD_out',
    apply_detector="MDV5A",
    )

Fusing layers... 
Fusing layers... 


Bypassing download of already-downloaded file md_v5a.0.0.pt
Model v5a.0.0 available at /tmp/megadetector_models/md_v5a.0.0.pt
Bypassing imports for model type yolov5
Loading PT detector with compatibility mode classic


Model summary: 733 layers, 140054656 parameters, 0 gradients, 208.8 GFLOPs
Model summary: 733 layers, 140054656 parameters, 0 gradients, 208.8 GFLOPs


Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0187.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0188.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0189.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0190.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0191.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0192.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0193.JPG
Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H550HF08161327_1/IMG_0194.JPG
Processing image

In [110]:
# extract list of all sequences from pd_dataframe


dataset.get_all_images_of_sequence(1002472)



[PosixPath('sessions/session_01/H550HF08161327_1/IMG_0187.JPG'),
 PosixPath('sessions/session_01/H550HF08161327_1/IMG_0188.JPG'),
 PosixPath('sessions/session_01/H550HF08161327_1/IMG_0189.JPG'),
 PosixPath('sessions/session_01/H550HF08161327_1/IMG_0190.JPG'),
 PosixPath('sessions/session_01/H550HF08161327_1/IMG_0191.JPG'),
 PosixPath('sessions/session_01/H550HF08161327_1/IMG_0192.JPG'),
 PosixPath('sessions/session_01/H550HF08161327_1/IMG_0193.JPG'),
 PosixPath('sessions/session_01/H550HF08161327_1/IMG_0194.JPG'),
 PosixPath('sessions/session_01/H550HF08161327_1/IMG_0195.JPG')]

In [3]:
# Pick a folder to run MD on recursively, and an output file
image_folder = os.path.expanduser('/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H_1')
output_file = os.path.expanduser('/cfs/earth/scratch/kraftjul/BA/output/megadetector_output_tes2.json')

# Recursively find images
image_file_names = path_utils.find_images(image_folder,recursive=True)

# This will automatically download MDv5a; you can also specify a filename.
results = load_and_run_detector_batch('MDV5A', image_file_names)

# Write results to a format that Timelapse and other downstream tools like.
write_results_to_file(results,
                      output_file,
                      relative_path_base=image_folder)


Bypassing download of already-downloaded file md_v5a.0.0.pt
Model v5a.0.0 available at /tmp/megadetector_models/md_v5a.0.0.pt
PyTorch reports 0 available CUDA devices
GPU available: False
Loading PT detector with compatibility mode classic


Fusing layers... 
Fusing layers... 
[W331 10:32:31.782568958 NNPACK.cpp:62] Could not initialize NNPACK! Reason: Unsupported hardware.
Model summary: 733 layers, 140054656 parameters, 0 gradients, 208.8 GFLOPs
Model summary: 733 layers, 140054656 parameters, 0 gradients, 208.8 GFLOPs


Loaded model in 25.97 seconds


  0%|          | 0/3 [00:00<?, ?it/s]

Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H_1/RCNX0001.JPG


 33%|███▎      | 1/3 [00:01<00:02,  1.32s/it]

Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H_1/RCNX0002.JPG


 67%|██████▋   | 2/3 [00:02<00:01,  1.22s/it]

Processing image /cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H_1/RCNX0003.JPG


100%|██████████| 3/3 [00:03<00:00,  1.22s/it]

Output file saved at /cfs/earth/scratch/kraftjul/BA/output/megadetector_output_tes2.json





{'images': [{'file': 'RCNX0001.JPG',
   'detections': [{'category': '1',
     'conf': 0.771,
     'bbox': [0.2236, 0.5555, 0.1225, 0.1402]},
    {'category': '2', 'conf': 0.184, 'bbox': [0.7783, 0.0215, 0.2216, 0.227]},
    {'category': '2', 'conf': 0.055, 'bbox': [0.0004, 0.0201, 0.2758, 0.3895]},
    {'category': '2',
     'conf': 0.019,
     'bbox': [0.7812, 0.0215, 0.2187, 0.8006]}]},
  {'file': 'RCNX0002.JPG',
   'detections': [{'category': '1',
     'conf': 0.487,
     'bbox': [0.2197, 0.5701, 0.185, 0.1354]},
    {'category': '2', 'conf': 0.087, 'bbox': [0.7802, 0.0215, 0.2197, 0.2187]},
    {'category': '2', 'conf': 0.084, 'bbox': [0.0004, 0.0208, 0.27, 0.3923]},
    {'category': '2', 'conf': 0.031, 'bbox': [0.0004, 0.0222, 0.267, 0.9569]},
    {'category': '2', 'conf': 0.019, 'bbox': [0.7822, 0.0215, 0.2177, 0.7951]},
    {'category': '3', 'conf': 0.015, 'bbox': [0.0, 0.0256, 1.0, 0.9673]},
    {'category': '1', 'conf': 0.005, 'bbox': [0.0078, 0.0222, 0.9916, 0.3201]},
    {'c

In [4]:
image_file_names

['/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H_1/RCNX0001.JPG',
 '/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H_1/RCNX0002.JPG',
 '/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/sessions/session_01/H_1/RCNX0003.JPG']