In [None]:
import os, struct, gzip, numpy as np

def _smart_open(path):
    # transparently handle .gz files
    return gzip.open(path, 'rb') if path.endswith('.gz') else open(path, 'rb')

def find_file(base_path, fname):
    """Return the real file, no matter how Kaggle wrapped it."""
    direct = os.path.join(base_path, fname)                 # /…/fname
    wrapped = os.path.join(base_path, fname, fname)         # /…/fname/fname
    for p in (direct, direct + '.gz', wrapped, wrapped + '.gz'):
        if os.path.isfile(p):
            return p
    raise FileNotFoundError(f'{fname} not found under {base_path}')

def load_images(path):
    with _smart_open(path) as f:
        magic, size = struct.unpack(">II", f.read(8))
        rows, cols  = struct.unpack(">II", f.read(8))
        data = np.frombuffer(f.read(rows * cols * size), dtype=np.uint8)
        return data.reshape(size, rows * cols)

def load_labels(path):
    with _smart_open(path) as f:
        _, size = struct.unpack(">II", f.read(8))
        return np.frombuffer(f.read(size), dtype=np.uint8)

def load_mnist_data(base_path):
    X_train = load_images(find_file(base_path, 'train-images-idx3-ubyte'))
    y_train = load_labels(find_file(base_path, 'train-labels-idx1-ubyte'))
    X_test  = load_images(find_file(base_path, 't10k-images-idx3-ubyte'))
    y_test  = load_labels(find_file(base_path, 't10k-labels-idx1-ubyte'))
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = load_mnist_data(mnist_dataset_path)
print(X_train.shape, y_train.shape)   # (60000, 784) (60000,)

import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import img_to_array, load_img

def load_images_from_folder(folder_path, img_size=(128, 128)):
    images = []
    labels = []
    class_names = sorted(os.listdir(folder_path))  # Assuming one folder per class

    for label_idx, class_name in enumerate(class_names):
        class_folder = os.path.join(folder_path, class_name)
        if not os.path.isdir(class_folder):
            continue

        for filename in os.listdir(class_folder):
            img_path = os.path.join(class_folder, filename)
            try:
                img = load_img(img_path, target_size=img_size)
                img_array = img_to_array(img)
                images.append(img_array)
                labels.append(label_idx)
            except Exception as e:
                print(f"Error loading {img_path}: {e}")

    images = np.array(images, dtype="float32") / 255.0  # Normalize to [0,1]
    labels = np.array(labels)
    return images, labels, class_names

def split_data(images, labels, test_size=0.2, random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(
        images, labels, test_size=test_size, random_state=random_state, stratify=labels
    )
    return X_train, X_test, y_train, y_test

# Example usage:
folder_path = 'your/folder/path'  # 🔥 Example: './dataset/'
img_size = (128, 128)  # 🔥 Resize images to 128x128

images, labels, class_names = load_images_from_folder(folder_path, img_size)
X_train, X_test, y_train, y_test = split_data(images, labels)

print(f"Train set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")
print(f"Number of classes: {len(class_names)}")
print(f"Class names: {class_names}")
