In [None]:
import os
import pandas as pd
import yaml
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

dataset_base_path = 'placas-transito-10'
data_yaml_path = os.path.join(dataset_base_path, 'data.yaml')

with open(data_yaml_path, 'r') as f:
    data_config = yaml.safe_load(f)
    class_names = data_config['names']
    print(f"Classes do dataset: {class_names}")
    print(f"Número de classes: {len(class_names)}")

def load_labels(labels_dir, split_name):
    """Carrega anotações YOLO de um diretório para uma lista."""
    data = []
    label_files = [f for f in os.listdir(labels_dir) if f.endswith('.txt')]
    
    for file_name in tqdm(label_files, desc=f'Carregando {split_name}'):
        file_path = os.path.join(labels_dir, file_name)
        with open(file_path, 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                class_id = int(parts[0])
                x_center = float(parts[1])
                y_center = float(parts[2])
                width = float(parts[3])
                height = float(parts[4])
                data.append([file_name, split_name, class_id, x_center, y_center, width, height])
    return data

paths = {
    'train': os.path.join(dataset_base_path, 'train/labels'),
    'valid': os.path.join(dataset_base_path, 'valid/labels'),
    'test': os.path.join(dataset_base_path, 'test/labels')
}

all_data = []
for split, path in paths.items():
    if os.path.exists(path):
        all_data.extend(load_labels(path, split))

columns = ['filename', 'split', 'class_id', 'x_center', 'y_center', 'width', 'height']
df = pd.DataFrame(all_data, columns=columns)

df['class_name'] = df['class_id'].apply(lambda x: class_names[x])

print("\nInformações do DataFrame:")
df.info()

print("\n5 primeiras linhas do DataFrame:")
df.head()

In [None]:
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12, 8)

plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='split', palette='viridis')
plt.title('Número de Anotações por Conjunto (Treino/Validação/Teste)')
plt.xlabel('Conjunto')
plt.ylabel('Contagem de Anotações')
plt.show()

plt.figure(figsize=(15, 8))
sns.countplot(data=df, y='class_name', order=df['class_name'].value_counts().index, palette='viridis')
plt.title('Distribuição de Classes no Dataset Completo')
plt.xlabel('Contagem')
plt.ylabel('Classe')
plt.show()

In [None]:
for class_id, class_name in enumerate(class_names):
	sample = df[df['class_id'] == class_id].sample(1)
	file_name = sample['filename'].values[0]
	split = sample['split'].values[0]
	image_path = os.path.join(dataset_base_path, split, 'images', file_name.replace('.txt', '.jpg'))
	
	if os.path.exists(image_path):
		img = plt.imread(image_path)
		plt.figure(figsize=(6, 6))
		plt.imshow(img)
		plt.title(f'Exemplo da Classe: {class_name} (ID: {class_id})')
		plt.axis('off')
		plt.show()
	else:
		print(f"Imagem não encontrada: {image_path}")