In [1]:
import os
import glob
from collections import Counter
import pandas as pd

In [2]:
dataset_root = r"c:\Users\Pojesh\Documents\OfficialWorks\MV_Project\Dataset\affectnet\YOLO_format"

In [3]:
class_names = [
    "Anger",
    "Contempt",
    "Disgust",
    "Fear",
    "Happy",
    "Neutral",
    "Sad",
    "Surprise",
]

In [4]:
def count_classes_in_folder(folder_path):
    """Count the number of images per class in a folder"""
    class_counts = Counter()
    
    # Get all label files
    label_files = glob.glob(os.path.join(folder_path, "labels", "*.txt"))
    
    for label_file in label_files:
        try:
            with open(label_file, 'r') as f:
                first_line = f.readline().strip()
                if first_line:
                    # The class ID is the first number in the line
                    class_id = int(first_line.split()[0])
                    class_counts[class_id] += 1
        except Exception as e:
            print(f"Error reading {label_file}: {e}")
    
    return class_counts

In [5]:
folders = ["train", "valid", "test"]
results = {}


In [6]:
for folder in folders:
    folder_path = os.path.join(dataset_root, folder)
    if os.path.exists(folder_path):
        counts = count_classes_in_folder(folder_path)
        results[folder] = counts
        print(f"\nCounts for {folder} folder:")
        total = 0
        for class_id, count in sorted(counts.items()):
            if 0 <= class_id < len(class_names):
                print(f"  {class_names[class_id]}: {count} images")
                total += count
            else:
                print(f"  Unknown class {class_id}: {count} images")
                total += count
        print(f"  Total: {total} images")
    else:
        print(f"\nFolder {folder_path} does not exist")


Counts for train folder:
  Anger: 2339 images
  Contempt: 1996 images
  Disgust: 2242 images
  Fear: 2021 images
  Happy: 2154 images
  Neutral: 1616 images
  Sad: 1914 images
  Surprise: 2819 images
  Total: 17101 images

Counts for valid folder:
  Anger: 712 images
  Contempt: 618 images
  Disgust: 672 images
  Fear: 622 images
  Happy: 791 images
  Neutral: 514 images
  Sad: 603 images
  Surprise: 874 images
  Total: 5406 images

Counts for test folder:
  Anger: 383 images
  Contempt: 332 images
  Disgust: 327 images
  Fear: 318 images
  Happy: 399 images
  Neutral: 250 images
  Sad: 278 images
  Surprise: 468 images
  Total: 2755 images


In [7]:
df_data = []
for folder in folders:
    if folder in results:
        row = {"Folder": folder}
        for class_id, name in enumerate(class_names):
            row[name] = results[folder].get(class_id, 0)
        row["Total"] = sum(results[folder].values())
        df_data.append(row)

if df_data:
    df = pd.DataFrame(df_data)
    print("\nSummary Table:")
    print(df.to_string(index=False))


Summary Table:
Folder  Anger  Contempt  Disgust  Fear  Happy  Neutral  Sad  Surprise  Total
 train   2339      1996     2242  2021   2154     1616 1914      2819  17101
 valid    712       618      672   622    791      514  603       874   5406
  test    383       332      327   318    399      250  278       468   2755


In [None]:
'''Dataset Class Distribution(After Augmentation) - 
Summary Table:
         Folder  Anger  Contempt  Disgust  Fear  Happy  Neutral  Sad  Surprise  Total
train_augmented   2819      2819     2819  2819   2819     2819 2819      2819  22552
          valid    712       618      672   622    791      514  603       874   5406
           test    383       332      327   318    399      250  278       468   2755'''