In [10]:
import os
import shutil
import random
from pathlib import Path
import numpy as np
from tqdm import tqdm

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)

In [11]:
# Define paths
source_dir = Path("archive/animals/animals")
output_dir = Path("animal_dataset")

# Create output directories
train_dir = output_dir / "train"
valid_dir = output_dir / "valid"

# Create directories if they don't exist
train_dir.mkdir(parents=True, exist_ok=True)
valid_dir.mkdir(parents=True, exist_ok=True)

print(f"Source directory: {source_dir}")
print(f"Output directory: {output_dir}")
print(f"Train directory: {train_dir}")
print(f"Validation directory: {valid_dir}")

# Check if source directory exists
if not source_dir.exists():
    print(f"Error: Source directory {source_dir} does not exist!")
else:
    print("Source directory found!")
    
# Get list of animal classes
animal_classes = [d.name for d in source_dir.iterdir() if d.is_dir()]
print(f"Found {len(animal_classes)} animal classes: {animal_classes[:10]}..." if len(animal_classes) > 10 else f"Found {len(animal_classes)} animal classes: {animal_classes}")

Source directory: archive\animals\animals
Output directory: animal_dataset
Train directory: animal_dataset\train
Validation directory: animal_dataset\valid
Source directory found!
Found 90 animal classes: ['antelope', 'badger', 'bat', 'bear', 'bee', 'beetle', 'bison', 'boar', 'butterfly', 'cat']...


In [12]:
animal_classes

['antelope',
 'badger',
 'bat',
 'bear',
 'bee',
 'beetle',
 'bison',
 'boar',
 'butterfly',
 'cat',
 'caterpillar',
 'chimpanzee',
 'cockroach',
 'cow',
 'coyote',
 'crab',
 'crow',
 'deer',
 'dog',
 'dolphin',
 'donkey',
 'dragonfly',
 'duck',
 'eagle',
 'elephant',
 'flamingo',
 'fly',
 'fox',
 'goat',
 'goldfish',
 'goose',
 'gorilla',
 'grasshopper',
 'hamster',
 'hare',
 'hedgehog',
 'hippopotamus',
 'hornbill',
 'horse',
 'hummingbird',
 'hyena',
 'jellyfish',
 'kangaroo',
 'koala',
 'ladybugs',
 'leopard',
 'lion',
 'lizard',
 'lobster',
 'mosquito',
 'moth',
 'mouse',
 'octopus',
 'okapi',
 'orangutan',
 'otter',
 'owl',
 'ox',
 'oyster',
 'panda',
 'parrot',
 'pelecaniformes',
 'penguin',
 'pig',
 'pigeon',
 'porcupine',
 'possum',
 'raccoon',
 'rat',
 'reindeer',
 'rhinoceros',
 'sandpiper',
 'seahorse',
 'seal',
 'shark',
 'sheep',
 'snake',
 'sparrow',
 'squid',
 'squirrel',
 'starfish',
 'swan',
 'tiger',
 'turkey',
 'turtle',
 'whale',
 'wolf',
 'wombat',
 'woodpecker',


In [9]:
# Simple data split: 70% train, 30% validation
for class_name in animal_classes:
    # Source and destination paths
    class_source = source_dir / class_name
    train_class = train_dir / class_name
    valid_class = valid_dir / class_name
    
    # Create class directories
    train_class.mkdir(exist_ok=True)
    valid_class.mkdir(exist_ok=True)
    
    # Get all image files
    images = list(class_source.glob("*.jpg"))
    random.shuffle(images)
    
    # Split 70/30
    split_idx = int(len(images) * 0.7)
    train_images = images[:split_idx]
    valid_images = images[split_idx:]
    
    # Copy files
    for img in train_images:
        shutil.copy2(img, train_class / img.name)
    for img in valid_images:
        shutil.copy2(img, valid_class / img.name)
    
    print(f"{class_name}: {len(train_images)} train, {len(valid_images)} valid")

print("Done! Data split completed.")

antelope: 42 train, 18 valid
badger: 42 train, 18 valid
bat: 42 train, 18 valid
bear: 42 train, 18 valid
bat: 42 train, 18 valid
bear: 42 train, 18 valid
bee: 42 train, 18 valid
beetle: 42 train, 18 valid
bee: 42 train, 18 valid
beetle: 42 train, 18 valid
bison: 42 train, 18 valid
boar: 42 train, 18 valid
bison: 42 train, 18 valid
boar: 42 train, 18 valid
butterfly: 42 train, 18 valid
cat: 42 train, 18 valid
butterfly: 42 train, 18 valid
cat: 42 train, 18 valid
caterpillar: 42 train, 18 valid
chimpanzee: 42 train, 18 valid
caterpillar: 42 train, 18 valid
chimpanzee: 42 train, 18 valid
cockroach: 42 train, 18 valid
cow: 42 train, 18 valid
cockroach: 42 train, 18 valid
cow: 42 train, 18 valid
coyote: 42 train, 18 valid
crab: 42 train, 18 valid
coyote: 42 train, 18 valid
crab: 42 train, 18 valid
crow: 42 train, 18 valid
deer: 42 train, 18 valid
crow: 42 train, 18 valid
deer: 42 train, 18 valid
dog: 42 train, 18 valid
dolphin: 42 train, 18 valid
dog: 42 train, 18 valid
dolphin: 42 train, 1

<br><br>