# Analise do dataset diurnas e noturnas

In [None]:
from pathlib import Path
CWD = Path.cwd()
PROJECT_ROOT = CWD.parent if CWD.name.lower() == 'notebooks' else CWD

## Diurnas 


In [None]:
import re

IMAGE_EXTS = {'.jpg', '.jpeg', '.png'}
LABEL_EXTS = {'.txt'}

CLASS_FOLDER_RE = re.compile(r'^(class_\d+|class_BACKGROUND|background)$')

def list_files_recursive(root, exts):
    root_path = Path(root)
    return sorted(
        [p for p in root_path.rglob('*') if p.is_file() and p.suffix.lower() in exts],
        key=lambda p: p.as_posix(),
    )

def has_class_subfolders(root):
    root_path = Path(root)
    if not root_path.exists():
        return False
    for p in root_path.iterdir():
        if p.is_dir() and CLASS_FOLDER_RE.match(p.name):
            return True
    return False

def file_key(path, root, strip_class_folder=False):
    path = Path(path)
    root = Path(root)
    parts = [p for p in path.relative_to(root).parts if p not in ('images', 'labels')]
    if strip_class_folder and len(parts) > 1 and CLASS_FOLDER_RE.match(parts[0]):
        parts = parts[1:]
    parts[-1] = Path(parts[-1]).stem
    return Path(*parts).as_posix()

DAY_DATASET_ROOT = PROJECT_ROOT / 'dataset' / 'Diurnas_resplit_v2'
if not DAY_DATASET_ROOT.exists():
    DAY_DATASET_ROOT = PROJECT_ROOT / 'dataset' / 'Diurnas'

day_train_root = DAY_DATASET_ROOT / 'train'
day_image_root = day_train_root
day_label_root = day_train_root
day_img_files = list_files_recursive(day_image_root, IMAGE_EXTS)
day_img_labels = list_files_recursive(day_label_root, LABEL_EXTS)

print(f'Quantidade de imagens diurnas dentro de {day_image_root}: {len(day_img_files)}')
print(f'Quantidade de labels diurnas dentro de {day_label_root}: {len(day_img_labels)}')


In [None]:
# imagens diurnas de treino com labels correspondentes
strip_day_class_folder = not has_class_subfolders(day_label_root)
day_img_keys = {file_key(p, day_image_root, strip_class_folder=strip_day_class_folder) for p in day_img_files}
day_label_keys = {file_key(p, day_label_root, strip_class_folder=strip_day_class_folder) for p in day_img_labels}
day_imgs_with_labels = day_img_keys & day_label_keys
print(f'Quantidade de imagens diurnas de treino com labels correspondentes: {len(day_imgs_with_labels)}')

# nome das imagens sem labels e localiza??o
day_imgs_without_labels = [
    img for img in day_img_files
    if file_key(img, day_image_root, strip_class_folder=strip_day_class_folder) not in day_label_keys
]
print(f'Quantidade de imagens diurnas de treino sem labels correspondentes: {len(day_imgs_without_labels)}')
for img in day_imgs_without_labels:
    print(f'- {img.relative_to(day_image_root)}')


In [None]:
# Analise de quantidade de imagens por classe nas labels diurnas
day_class_counts = {}
for label_path in day_img_labels:
    with open(label_path, 'r') as f:
        lines = f.readlines()
        if not lines:
            class_id = 'BACKGROUND'
            day_class_counts[class_id] = day_class_counts.get(class_id, 0) + 1
        else:
            for line in lines:
                class_id = line.split()[0]
                day_class_counts[class_id] = day_class_counts.get(class_id, 0) + 1
print('Quantidade de imagens por classe nas labels diurnas:')
for class_id, count in day_class_counts.items():
    print(f'Classe {class_id}: {count} imagens')

# Em porcentagem
total_day_images = len(day_img_labels)
print('\nPorcentagem de imagens por classe nas labels diurnas:')
for class_id, count in day_class_counts.items():
    print(f'Classe {class_id}: {count / total_day_images * 100:.2f}%')

# Grafico de barras da quantidade de imagens por classe
import matplotlib.pyplot as plt
day_classes = list(day_class_counts.keys())
day_counts = list(day_class_counts.values())
plt.bar(day_classes, day_counts)
plt.xlabel('Classes')
plt.ylabel('Quantidade de Imagens')
plt.title('Quantidade de Imagens por Classe nas Labels Diurnas')
plt.show()


### Validacao Diurnas


In [None]:
# Analise de quantidade de Imagens diurnas dentro do split de validacao (busca recursiva)
day_val_root = DAY_DATASET_ROOT / 'valid'
day_val_image_root = day_val_root
day_val_label_root = day_val_root
day_val_img_files = list_files_recursive(day_val_image_root, IMAGE_EXTS)
day_val_img_labels = list_files_recursive(day_val_label_root, LABEL_EXTS)

print(f'Quantidade de imagens diurnas dentro de {day_val_image_root}: {len(day_val_img_files)}')
print(f'Quantidade de labels diurnas dentro de {day_val_label_root}: {len(day_val_img_labels)}')


In [None]:
# imagens diurnas de validacao com labels correspondentes
day_val_labeled_images = set(
    file_key(label, day_val_label_root, strip_class_folder=strip_day_class_folder)
    for label in day_val_img_labels
)
print(f'Quantidade de imagens diurnas de validacao com labels correspondentes: {len(day_val_labeled_images)}')

# nome das imagens sem labels
day_val_unlabeled_images = [
    img for img in day_val_img_files
    if file_key(img, day_val_image_root, strip_class_folder=strip_day_class_folder) not in day_val_labeled_images
]
print('Nomes das imagens diurnas de validacao sem labels correspondentes:')
for img in day_val_unlabeled_images:
    print(img.relative_to(day_val_image_root))


In [None]:
# Analise de quantidade de imagens por classe nas labels diurnas de Validacao (se o LABEL for VAZIO entao classe = BACKGROUND)
day_val_class_counts = {}
for label_path in day_val_img_labels:
    with open(label_path, 'r') as f:
        lines = f.readlines()
        if not lines:
            class_id = 'BACKGROUND'
            day_val_class_counts[class_id] = day_val_class_counts.get(class_id, 0) + 1
        else:
            for line in lines:
                class_id = line.split()[0]
                day_val_class_counts[class_id] = day_val_class_counts.get(class_id, 0) + 1
print('Quantidade de imagens por classe nas labels diurnas de Validacao:')
for class_id, count in day_val_class_counts.items():
    print(f'Classe {class_id}: {count} imagens')

# Em porcentagem
total_day_val_images = len(day_val_img_labels)
print('\nPorcentagem de imagens por classe nas labels diurnas de Validacao:')
for class_id, count in day_val_class_counts.items():
    print(f'Classe {class_id}: {count / total_day_val_images * 100:.2f}%')

# Grafico de barras da quantidade de imagens por classe
import matplotlib.pyplot as plt
day_val_classes = list(day_val_class_counts.keys())
day_val_counts = list(day_val_class_counts.values())
plt.bar(day_val_classes, day_val_counts)
plt.xlabel('Classes')
plt.ylabel('Quantidade de Imagens')
plt.title('Quantidade de Imagens por Classe nas Labels Diurnas (Validacao)')
plt.show()


In [None]:
# Analise de quantidade de Imagens diurnas dentro do split de teste (busca recursiva)
day_test_root = DAY_DATASET_ROOT / 'test'
day_test_image_root = day_test_root
day_test_label_root = day_test_root
day_test_img_files = list_files_recursive(day_test_image_root, IMAGE_EXTS)
day_test_img_labels = list_files_recursive(day_test_label_root, LABEL_EXTS)

print(f'Quantidade de imagens diurnas dentro de {day_test_image_root}: {len(day_test_img_files)}')
print(f'Quantidade de labels diurnas dentro de {day_test_label_root}: {len(day_test_img_labels)}')


In [None]:
# imagens diurnas de teste com labels correspondentes
day_test_labeled_images = set(
    file_key(label, day_test_label_root, strip_class_folder=strip_day_class_folder)
    for label in day_test_img_labels
)
print(f'Quantidade de imagens diurnas de teste com labels correspondentes: {len(day_test_labeled_images)}')

# nome das imagens sem labels
day_test_unlabeled_images = [
    img for img in day_test_img_files
    if file_key(img, day_test_image_root, strip_class_folder=strip_day_class_folder) not in day_test_labeled_images
]
print('Nomes das imagens diurnas de teste sem labels correspondentes:')
for img in day_test_unlabeled_images:
    print(img.relative_to(day_test_image_root))


In [None]:
# Analise de quantidade de imagens por classe nas labels diurnas de Teste (se o LABEL for VAZIO entao classe = BACKGROUND)
day_test_class_counts = {}
for label_path in day_test_img_labels:
    with open(label_path, 'r') as f:
        lines = f.readlines()
        if not lines:
            class_id = 'BACKGROUND'
            day_test_class_counts[class_id] = day_test_class_counts.get(class_id, 0) + 1
        else:
            for line in lines:
                class_id = line.split()[0]
                day_test_class_counts[class_id] = day_test_class_counts.get(class_id, 0) + 1
print('Quantidade de imagens por classe nas labels diurnas de Teste:')
for class_id, count in day_test_class_counts.items():
    print(f'Classe {class_id}: {count} imagens')

# Em porcentagem
total_day_test_images = len(day_test_img_labels)
print(total_day_test_images)
print('Porcentagem de imagens por classe nas labels diurnas de Teste:')
if total_day_test_images:
    for class_id, count in day_test_class_counts.items():
        print(f'Classe {class_id}: {count / total_day_test_images * 100:.2f}%')
else:
    print('Sem labels no split de teste.')

# Grafico de barras da quantidade de imagens por classe
if day_test_class_counts:
    import matplotlib.pyplot as plt
    day_test_classes = list(day_test_class_counts.keys())
    day_test_counts = list(day_test_class_counts.values())
    plt.bar(day_test_classes, day_test_counts)
    plt.xlabel('Classes')
    plt.ylabel('Quantidade de Imagens')
    plt.title('Quantidade de Imagens por Classe nas Labels Diurnas (Teste)')
    plt.show()


In [None]:
# analise conjunta de treino + validacao (+ teste quando existir) diurnas
day_split_label = 'treino + validacao + teste' if total_day_test_images else 'treino + validacao'
total_day_images_combined = total_day_images + total_day_val_images + total_day_test_images
print(f'Total de imagens diurnas ({day_split_label}): {total_day_images_combined}')

# analise conjunta de quantidade por classe (treino + validacao + teste)
combined_day_class_counts = day_class_counts.copy()
for class_id, count in day_val_class_counts.items():
    combined_day_class_counts[class_id] = combined_day_class_counts.get(class_id, 0) + count
for class_id, count in day_test_class_counts.items():
    combined_day_class_counts[class_id] = combined_day_class_counts.get(class_id, 0) + count
print(f'Quantidade conjunta de imagens por classe nas labels diurnas ({day_split_label}):')
for class_id, count in combined_day_class_counts.items():
    print(f'Classe {class_id}: {count} imagens')    

# Em porcentagem conjunta
print(f'Porcentagem conjunta de imagens por classe nas labels diurnas ({day_split_label}):')
for class_id, count in combined_day_class_counts.items():
    print(f'Classe {class_id}: {count / total_day_images_combined * 100:.2f}%')

# Grafico de barras da quantidade conjunta de imagens por classe
import matplotlib.pyplot as plt

combined_day_classes = list(combined_day_class_counts.keys())
combined_day_counts = list(combined_day_class_counts.values())
plt.bar(combined_day_classes, combined_day_counts)
plt.xlabel('Classes')
plt.ylabel('Quantidade de Imagens')
plt.title(f'Quantidade Conjunta de Imagens por Classe nas Labels Diurnas ({day_split_label})')
plt.show()


## Noturnas


In [None]:
# Analise de quantidade de Imagens noturnas dentro de dataset/Noturnas/train (busca recursiva)
noct_image_root = PROJECT_ROOT / 'dataset/Noturnas/train'
noct_label_root = PROJECT_ROOT / 'dataset/Noturnas/train'
noct_img_files = list_files_recursive(noct_image_root, IMAGE_EXTS)
noct_img_labels = list_files_recursive(noct_label_root, LABEL_EXTS)    

print(f'Quantidade de imagens noturnas dentro de {noct_image_root}: {len(noct_img_files)}')
print(f'Quantidade de labels noturnas dentro de {noct_label_root}: {len(noct_img_labels)}')


In [None]:
# imagens noturnas com labels correspondentes
noct_labeled_images = set(file_key(label, noct_label_root) for label in noct_img_labels)
print(f'Quantidade de imagens noturnas com labels correspondentes: {len(noct_labeled_images)}')

# nome das imagens sem labels
noct_unlabeled_images = [
    img for img in noct_img_files
    if file_key(img, noct_image_root) not in noct_labeled_images
]
print('Nomes das imagens noturnas sem labels correspondentes:')
for img in noct_unlabeled_images:
    print(img.relative_to(noct_image_root))


In [None]:
# Analise de quantidade de imagens por classe nas labels noturnas
noct_class_counts = {}
for label_path in noct_img_labels:
    with open(label_path, 'r') as f:
        lines = f.readlines()
        if not lines:
            class_id = 'BACKGROUND'
            noct_class_counts[class_id] = noct_class_counts.get(class_id, 0) + 1
        else:
            for line in lines:
                class_id = line.split()[0]
                noct_class_counts[class_id] = noct_class_counts.get(class_id, 0) + 1
print('Quantidade de imagens por classe nas labels noturnas:')
for class_id, count in noct_class_counts.items():
    print(f'Classe {class_id}: {count} imagens')

# Em porcentagem
total_noct_images = len(noct_img_labels)
print('\nPorcentagem de imagens por classe nas labels noturnas:')
for class_id, count in noct_class_counts.items():
    print(f'Classe {class_id}: {count / total_noct_images * 100:.2f}%')

# Grafico de barras da quantidade de imagens por classe
import matplotlib.pyplot as plt
noct_classes = list(noct_class_counts.keys())
noct_counts = list(noct_class_counts.values())
plt.bar(noct_classes, noct_counts)
plt.xlabel('Classes')
plt.ylabel('Quantidade de Imagens')
plt.title('Quantidade de Imagens por Classe nas Labels Noturnas')
plt.show()


### Validacao Noturnas


In [None]:
# Analise de quantidade de Imagens noturnas dentro de dataset/Noturnas/valid (busca recursiva)
noct_val_image_root = PROJECT_ROOT / 'dataset/Noturnas/valid'
noct_val_label_root = PROJECT_ROOT / 'dataset/Noturnas/valid'
noct_val_img_files = list_files_recursive(noct_val_image_root, IMAGE_EXTS)
noct_val_img_labels = list_files_recursive(noct_val_label_root, LABEL_EXTS)

print(f'Quantidade de imagens noturnas dentro de {noct_val_image_root}: {len(noct_val_img_files)}')
print(f'Quantidade de labels noturnas dentro de {noct_val_label_root}: {len(noct_val_img_labels)}')


In [None]:
# imagens noturnas de validacao com labels correspondentes
noct_val_labeled_images = set(file_key(label, noct_val_label_root) for label in noct_val_img_labels)
print(f'Quantidade de imagens noturnas de validacao com labels correspondentes: {len(noct_val_labeled_images)}')

# nome das imagens sem labels
noct_val_unlabeled_images = [
    img for img in noct_val_img_files
    if file_key(img, noct_val_image_root) not in noct_val_labeled_images
]
print('Nomes das imagens noturnas de validacao sem labels correspondentes:')
for img in noct_val_unlabeled_images:
    print(img.relative_to(noct_val_image_root))


In [None]:
# An?lise de quantidade de imagens por classe nas labels noturnas de Validacao (se o LABEL for VAZIO ent?o classe = BACKGROUND)
noct_val_class_counts = {}
for label_path in noct_val_img_labels:
    with open(label_path, 'r') as f:
        lines = f.readlines()
        if not lines:
            class_id = 'BACKGROUND'
            noct_val_class_counts[class_id] = noct_val_class_counts.get(class_id, 0) + 1
        else:
            for line in lines:
                class_id = line.split()[0]
                noct_val_class_counts[class_id] = noct_val_class_counts.get(class_id, 0) + 1
print('Quantidade de imagens por classe nas labels noturnas de Validacao:')
for class_id, count in noct_val_class_counts.items():
    print(f'Classe {class_id}: {count} imagens')

# Em porcentagem
total_noct_val_images = len(noct_val_img_labels)
print('\nPorcentagem de imagens por classe nas labels noturnas de Validacao:')
for class_id, count in noct_val_class_counts.items():
    print(f'Classe {class_id}: {count / total_noct_val_images * 100:.2f}%')

# Grafico de barras da quantidade de imagens por classe
import matplotlib.pyplot as plt
noct_val_classes = list(noct_val_class_counts.keys())
noct_val_counts = list(noct_val_class_counts.values())
plt.bar(noct_val_classes, noct_val_counts)
plt.xlabel('Classes')
plt.ylabel('Quantidade de Imagens')
plt.title('Quantidade de Imagens por Classe nas Labels Noturnas (Validacao)')
plt.show()


In [None]:
# Analise de quantidade de Imagens noturnas dentro de dataset/Noturnas/test (busca recursiva)
noct_test_root = PROJECT_ROOT / 'dataset/Noturnas/test'
noct_test_image_root = noct_test_root
noct_test_label_root = noct_test_root
noct_test_img_files = list_files_recursive(noct_test_image_root, IMAGE_EXTS)
noct_test_img_labels = list_files_recursive(noct_test_label_root, LABEL_EXTS)

print(f'Quantidade de imagens noturnas dentro de {noct_test_image_root}: {len(noct_test_img_files)}')
print(f'Quantidade de labels noturnas dentro de {noct_test_label_root}: {len(noct_test_img_labels)}')


In [None]:
# imagens noturnas de teste com labels correspondentes
noct_test_labeled_images = set(file_key(label, noct_test_label_root) for label in noct_test_img_labels)
print(f'Quantidade de imagens noturnas de teste com labels correspondentes: {len(noct_test_labeled_images)}')

# nome das imagens sem labels
noct_test_unlabeled_images = [
    img for img in noct_test_img_files
    if file_key(img, noct_test_image_root) not in noct_test_labeled_images
]
print('Nomes das imagens noturnas de teste sem labels correspondentes:')
for img in noct_test_unlabeled_images:
    print(img.relative_to(noct_test_image_root))


In [None]:
# Analise de quantidade de imagens por classe nas labels noturnas de Teste (se o LABEL for VAZIO entao classe = BACKGROUND)
noct_test_class_counts = {}
for label_path in noct_test_img_labels:
    with open(label_path, 'r') as f:
        lines = f.readlines()
        if not lines:
            class_id = 'BACKGROUND'
            noct_test_class_counts[class_id] = noct_test_class_counts.get(class_id, 0) + 1
        else:
            for line in lines:
                class_id = line.split()[0]
                noct_test_class_counts[class_id] = noct_test_class_counts.get(class_id, 0) + 1
print('Quantidade de imagens por classe nas labels noturnas de Teste:')
for class_id, count in noct_test_class_counts.items():
    print(f'Classe {class_id}: {count} imagens')

# Em porcentagem
total_noct_test_images = len(noct_test_img_labels)
print('
Porcentagem de imagens por classe nas labels noturnas de Teste:')
if total_noct_test_images:
    for class_id, count in noct_test_class_counts.items():
        print(f'Classe {class_id}: {count / total_noct_test_images * 100:.2f}%')
else:
    print('Sem labels no split de teste.')

# Grafico de barras da quantidade de imagens por classe
if noct_test_class_counts:
    import matplotlib.pyplot as plt
    noct_test_classes = list(noct_test_class_counts.keys())
    noct_test_counts = list(noct_test_class_counts.values())
    plt.bar(noct_test_classes, noct_test_counts)
    plt.xlabel('Classes')
    plt.ylabel('Quantidade de Imagens')
    plt.title('Quantidade de Imagens por Classe nas Labels Noturnas (Teste)')
    plt.show()


In [None]:
# analise conjunta de treino + validacao (+ teste quando existir) noturnas
noct_split_label = 'treino + validacao + teste' if total_noct_test_images else 'treino + validacao'
total_noct_images_combined = total_noct_images + total_noct_val_images + total_noct_test_images
print(f'Total de imagens noturnas ({noct_split_label}): {total_noct_images_combined}')

# analise conjunta de quantidade por classe (treino + validacao + teste)
combined_noct_class_counts = noct_class_counts.copy()
for class_id, count in noct_val_class_counts.items():
    combined_noct_class_counts[class_id] = combined_noct_class_counts.get(class_id, 0) + count
for class_id, count in noct_test_class_counts.items():
    combined_noct_class_counts[class_id] = combined_noct_class_counts.get(class_id, 0) + count
print(f'
Quantidade conjunta de imagens por classe nas labels noturnas ({noct_split_label}):')
for class_id, count in combined_noct_class_counts.items():
    print(f'Classe {class_id}: {count} imagens')

# Em porcentagem conjunta
print(f'
Porcentagem conjunta de imagens por classe nas labels noturnas ({noct_split_label}):')
for class_id, count in combined_noct_class_counts.items():
    print(f'Classe {class_id}: {count / total_noct_images_combined * 100:.2f}%')
# Grafico de barras da quantidade conjunta de imagens por classe

import matplotlib.pyplot as plt
combined_noct_classes = list(combined_noct_class_counts.keys())
combined_noct_counts = list(combined_noct_class_counts.values())
plt.bar(combined_noct_classes, combined_noct_counts)
plt.xlabel('Classes')
plt.ylabel('Quantidade de Imagens')
plt.title(f'Quantidade Conjunta de Imagens por Classe nas Labels Noturnas ({noct_split_label})')
plt.show()
