In [2]:
import os
import pandas as pd
import numpy as np
from PIL import Image

# Load your dataset
df = pd.read_csv("DIGITS.csv")

# Split into features and target
X = df.drop("target", axis=1)
y = df["target"]

# Create base dataset folder
base_dir = "digits_dataset_10"
os.makedirs(base_dir, exist_ok=True)

# Keep track of how many images saved per class
saved_counts = {str(i): 0 for i in range(10)}

# Loop through rows
for idx in range(len(X)):
    label = str(y.iloc[idx])   # digit label (0–9)

    # Only save if less than 10 already saved for this class
    if saved_counts[label] < 10:
        img_array = X.iloc[idx].values.reshape(8, 8)

        # Scale 0–16 → 0–255 for saving
        img = Image.fromarray((img_array * 16).astype(np.uint8))

        # Create label folder
        label_folder = os.path.join(base_dir, label)
        os.makedirs(label_folder, exist_ok=True)

        # Save image
        img.save(os.path.join(label_folder, f"digit_{idx}.png"))

        # Update count
        saved_counts[label] += 1

    # Stop if all classes have 10
    if all(count >= 10 for count in saved_counts.values()):
        break

print("✅ 10 images per digit (0–9) saved into 'digits_dataset_10'")


✅ 10 images per digit (0–9) saved into 'digits_dataset_10'
