In [2]:
%load_ext autoreload
%autoreload 2

In [10]:
import os
import shutil
import warnings
import csv
import yaml
import json
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Any, TypedDict

from megadetector.detection.run_detector import load_detector, model_string_to_model_version
from megadetector.detection.run_detector_batch import process_images, write_results_to_file

from os import PathLike
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

from src.dataloader import MammaliaData


In [4]:
def load_path_config(path_to_config):
    with open(path_to_config, 'r') as f:
        path_config = yaml.safe_load(f)
    return {k: Path(v) for k, v in path_config.items()}

paths = load_path_config('/cfs/earth/scratch/kraftjul/BA/code/path_config.yml')


### Running Tests

In [5]:
path_to_testset = Path('/cfs/earth/scratch/kraftjul/BA/data/test_set')
output_path = Path('/cfs/earth/scratch/kraftjul/BA/output')
categories_to_drop=['other', 'glis_glis']

In [6]:
path_to_dataset = paths['dataset']
path_labelfiles = Path('/cfs/earth/scratch/kraftjul/BA/data/test_set_large')
path_to_detector_output = path_labelfiles / 'MD_out'
detector_model = 'mdv5a'
mode = 'train'

from src.dataloader import MammaliaData

dataset = MammaliaData(
    path_to_dataset = path_to_dataset,
    path_labelfiles = path_labelfiles,
    path_to_detector_output = path_to_detector_output,
    detector_model = detector_model,
    mode = mode,
    )

7 sequences of the train set had no detections and will be excluded.
Excluded sequences: [6000161, 6000163, 6000293, 6000530, 6000691, 6000372, 6000953]
1 sequences of the test set had no detections and will be excluded.
Excluded sequences: [6000186]


In [None]:
dataset[0]



Methode changed


### Sampling dataset

In [None]:
# Paths
path_to_labelfiles = Path("/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/info/labels")
dataset_root = Path("/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset")
target_dir = Path("/cfs/earth/scratch/kraftjul/BA/data/test_set_large")
output_metadata_csv = target_dir / "metadata_larger_sample_set.csv"


# Load metadata
metadata = dataset.reading_all_metadata(
    list_of_files=dataset.getting_all_files_of_type(path_to_labelfiles, file_type='.csv'),
    categories_to_drop=['other', 'glis_glis']
)

metadata_filtered = metadata[metadata['n_files']<60]

metadata_sampled = metadata_filtered.groupby("label2", group_keys=False).sample(n=40, random_state=42)

metadata_sampled.to_csv(output_metadata_csv, index=False)

In [7]:
# Paths
path_to_labelfiles = Path("/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/info/labels")
dataset_root = Path("/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset")
target_dir = Path("/cfs/earth/scratch/kraftjul/BA/data/test_set_large")
output_metadata_csv = target_dir / "metadata_larger_sample_set.csv"


# Load metadata
metadata = dataset.reading_all_metadata(
    list_of_files=dataset.getting_all_files_of_type(path_to_labelfiles, file_type='.csv'),
    categories_to_drop=[]
)

In [9]:
metadata

Unnamed: 0,session,SerialNumber,seq_nr,seq_id,Directory,DateTime_start,DateTime_end,duration_seconds,first_file,last_file,n_files,all_files,label,duplicate_label,label2
0,3,H550HF07158839,1,3000000,sessions/session_03/28,2020-03-29T20:57:38Z,2020-03-29T20:57:59Z,21.0,IMG_0001.JPG,IMG_0024.JPG,24,"IMG_0001.JPG,IMG_0002.JPG,IMG_0003.JPG,IMG_000...",apodemus_sp,,apodemus_sp
1,3,H550HF07158878,1,3000003,sessions/session_03/42,2020-03-30T08:50:52Z,2020-03-30T08:50:54Z,2.0,IMG_0001.JPG,IMG_0003.JPG,3,"IMG_0001.JPG,IMG_0002.JPG,IMG_0003.JPG",mustela_erminea,,mustela_erminea
2,3,H550HF07158878,2,3000004,sessions/session_03/42,2020-04-16T01:20:36Z,2020-04-16T01:20:41Z,5.0,IMG_0004.JPG,IMG_0009.JPG,6,"IMG_0004.JPG,IMG_0005.JPG,IMG_0006.JPG,IMG_000...",apodemus_sp,,apodemus_sp
3,3,H550HF07158933,1,3000005,sessions/session_03/7,2020-04-11T14:05:39Z,2020-04-11T14:06:29Z,50.0,IMG_0001.JPG,IMG_0024.JPG,24,"IMG_0001.JPG,IMG_0002.JPG,IMG_0003.JPG,IMG_000...",apodemus_sp,,apodemus_sp
4,3,H550HF07158933,2,3000006,sessions/session_03/7,2020-04-11T14:24:35Z,2020-04-11T14:24:44Z,9.0,IMG_0025.JPG,IMG_0031.JPG,7,"IMG_0025.JPG,IMG_0026.JPG,IMG_0027.JPG,IMG_002...",apodemus_sp,,apodemus_sp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23101,4,H550HF08161368,235,4018684,sessions/session_04/W7-WK06,2020-07-17T23:42:26Z,2020-07-17T23:42:36Z,10.0,IMG_4228.JPG,IMG_4236.JPG,9,"IMG_4228.JPG,IMG_4229.JPG,IMG_4230.JPG,IMG_423...",apodemus_sp,False,apodemus_sp
23102,4,H550HF08161368,236,4018685,sessions/session_04/W7-WK06,2020-07-17T23:45:10Z,2020-07-17T23:45:34Z,24.0,IMG_4237.JPG,IMG_4248.JPG,12,"IMG_4237.JPG,IMG_4238.JPG,IMG_4239.JPG,IMG_424...",apodemus_sp,False,apodemus_sp
23103,4,H550HF08161368,237,4018686,sessions/session_04/W7-WK06,2020-07-18T01:56:54Z,2020-07-18T01:57:18Z,24.0,IMG_4249.JPG,IMG_4263.JPG,15,"IMG_4249.JPG,IMG_4250.JPG,IMG_4251.JPG,IMG_425...",apodemus_sp,False,apodemus_sp
23104,4,H550HF08161368,238,4018687,sessions/session_04/W7-WK06,2020-07-18T02:06:30Z,2020-07-18T02:06:42Z,12.0,IMG_4264.JPG,IMG_4269.JPG,6,"IMG_4264.JPG,IMG_4265.JPG,IMG_4266.JPG,IMG_426...",myodes_glareolus,False,cricetidae
