In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import shutil
import warnings
import csv
import yaml
import json
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Any, TypedDict

from megadetector.detection.run_detector import load_detector, model_string_to_model_version
from megadetector.detection.run_detector_batch import process_images, write_results_to_file

from os import PathLike
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

# from src.dataloader import MammaliaData


In [3]:
def load_path_config(path_to_config):
    with open(path_to_config, 'r') as f:
        path_config = yaml.safe_load(f)
    return {k: Path(v) for k, v in path_config.items()}

paths = load_path_config('/cfs/earth/scratch/kraftjul/BA/code/path_config.yml')


### Running Tests

In [4]:
path_to_testset = Path('/cfs/earth/scratch/kraftjul/BA/data/test_set')
output_path = Path('/cfs/earth/scratch/kraftjul/BA/output')
categories_to_drop=['other', 'glis_glis']

In [5]:
path_to_dataset = paths['dataset']
path_labelfiles = Path('/cfs/earth/scratch/kraftjul/BA/data/test_set_large')
path_to_detector_output = path_labelfiles / 'MD_out'
detector_model = 'mdv5a'
mode = 'train'

from src.dataloader import MammaliaData

dataset = MammaliaData(
    path_to_dataset = path_to_dataset,
    path_labelfiles = path_labelfiles,
    path_to_detector_output = path_to_detector_output,
    detector_model = detector_model,
    mode = mode,
    )

7 sequences of the train set had no detections and will be excluded.
Excluded sequences: [6000161, 6000163, 6000293, 6000530, 6000691, 6000372, 6000953]
1 sequences of the test set had no detections and will be excluded.
Excluded sequences: [6000186]


In [None]:
dataset[0]



Methode changed


### Sampling dataset

In [None]:
# Paths
path_to_labelfiles = Path("/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset/info/labels")
dataset_root = Path("/cfs/earth/scratch/iunr/shared/iunr-mammaliabox/dataset")
target_dir = Path("/cfs/earth/scratch/kraftjul/BA/data/test_set_large")
output_metadata_csv = target_dir / "metadata_larger_sample_set.csv"


# Load metadata
metadata = dataset.reading_all_metadata(
    list_of_files=dataset.getting_all_files_of_type(path_to_labelfiles, file_type='.csv'),
    categories_to_drop=['other', 'glis_glis']
)

metadata_filtered = metadata[metadata['n_files']<60]

metadata_sampled = metadata_filtered.groupby("label2", group_keys=False).sample(n=40, random_state=42)

metadata_sampled.to_csv(output_metadata_csv, index=False)

In [17]:
metadata_sampled

Unnamed: 0,session,SerialNumber,seq_nr,seq_id,Directory,DateTime_start,DateTime_end,duration_seconds,first_file,last_file,n_files,all_files,label,duplicate_label,label2
11567,4,H550HG09194945,233,4007156,sessions/session_04/W2-WK02,2020-06-09T23:21:46Z,2020-06-09T23:22:18Z,32.0,IMG_6154.JPG,IMG_6180.JPG,27,"IMG_6154.JPG,IMG_6155.JPG,IMG_6156.JPG,IMG_615...",apodemus_sp,False,apodemus_sp
15877,4,H550HF07158832,180,4011466,sessions/session_04/W5-KH08,2020-06-28T22:25:32Z,2020-06-28T22:25:38Z,6.0,IMG_5446.JPG,IMG_5454.JPG,9,"IMG_5446.JPG,IMG_5447.JPG,IMG_5448.JPG,IMG_544...",apodemus_sp,False,apodemus_sp
1815,1,H550HF08161305,229,1001887,sessions/session_01/H550HF08161305_2,2019-09-10T02:06:30Z,2019-09-10T02:07:31Z,61.0,IMG_3034.JPG,IMG_3051.JPG,18,"IMG_3034.JPG,IMG_3035.JPG,IMG_3036.JPG,IMG_303...",apodemus_sp,0.0,apodemus_sp
15095,4,H550HF07158933,34,4010684,sessions/session_04/W4-WK02,2020-06-20T23:40:40Z,2020-06-20T23:42:00Z,80.0,IMG_0607.JPG,IMG_0654.JPG,48,"IMG_0607.JPG,IMG_0608.JPG,IMG_0609.JPG,IMG_061...",apodemus_sp,False,apodemus_sp
4586,4,H,77,4000175,sessions/session_04/Testwoche1/KH08,2020-05-11T21:16:28Z,2020-05-11T21:16:30Z,2.0,RCNX1125.JPG,RCNX1127.JPG,3,"RCNX1125.JPG,RCNX1126.JPG,RCNX1127.JPG",apodemus_sp,False,apodemus_sp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21906,4,H550HF07158839,222,4017495,sessions/session_04/W7-R25,2020-07-16T22:03:52Z,2020-07-16T22:03:52Z,0.0,IMG_3736.JPG,IMG_3738.JPG,3,"IMG_3736.JPG,IMG_3737.JPG,IMG_3738.JPG",sorex_sp,False,soricidae
12723,4,H550HG09194886,174,4008312,sessions/session_04/W3-M7,2020-06-18T04:43:02Z,2020-06-18T04:43:02Z,0.0,IMG_3646.JPG,IMG_3648.JPG,3,"IMG_3646.JPG,IMG_3647.JPG,IMG_3648.JPG",crocidura_sp,False,soricidae
20378,4,H550HG09194894,161,4015967,sessions/session_04/W6-R26,2020-07-11T22:25:42Z,2020-07-11T22:25:44Z,2.0,IMG_2272.JPG,IMG_2274.JPG,3,"IMG_2272.JPG,IMG_2273.JPG,IMG_2274.JPG",sorex_sp,False,soricidae
18825,4,H550HF07158832,147,4014414,sessions/session_04/W6-M2,2020-07-05T05:16:24Z,2020-07-05T05:16:26Z,2.0,IMG_3880.JPG,IMG_3882.JPG,3,"IMG_3880.JPG,IMG_3881.JPG,IMG_3882.JPG",crocidura_sp,False,soricidae
