In [None]:
%pip install -q numpy
%pip install -q pandas
%pip install -q Pillow
%pip install -q tensorflow
%pip install -q keras
%pip install -q keras-tuner
%pip install -q keras.utils
%pip install -q ipywidgets

In [14]:
import os
import numpy as np
from PIL import Image
import concurrent.futures
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.utils import to_categorical
from tensorflow.keras import layers, models
from keras_tuner import HyperModel
from keras_tuner.tuners import RandomSearch

In [None]:
def process_image(img_path):
    try:
        img = Image.open(img_path).convert('L')  # Convert to grayscale
        img = img.resize((28, 28))  # Ensure the image is 28x28 pixels
        img_array = np.array(img) / 255.0  # Normalize to range [0, 1]
        label = os.path.basename(os.path.dirname(img_path))  # Get the folder name as label
        return img_array, label
    except Exception as e:
        print(f"Error processing image {img_path}: {e}")
        return None, None

In [None]:
def load_images_from_folder(folder):
    images = []
    labels = []
    img_paths = []
    
    # Collect all image paths
    for label in os.listdir(folder):
        label_path = os.path.join(folder, label)
        if os.path.isdir(label_path):
            for filename in os.listdir(label_path):
                img_path = os.path.join(label_path, filename)
                if os.path.isfile(img_path) and img_path.endswith(('.png', '.jpg', '.jpeg')):
                    img_paths.append((img_path, label))
    
    # Use concurrent processing to load images
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = executor.map(lambda p: process_image(p[0]), img_paths)
    
    for (img_array, _), (_, label) in zip(results, img_paths):
        if img_array is not None:
            images.append(img_array)
            labels.append(label)
    
    return np.array(images), np.array(labels)

In [None]:
# Define the paths to the folders
train_folder = 'dataset2/train'
test_folder = 'dataset2/test'
val_folder = 'dataset2/val'

In [None]:
# Load the images and labels
x_train, y_train = load_images_from_folder(train_folder)
x_test, y_test = load_images_from_folder(test_folder)
x_val, y_val = load_images_from_folder(val_folder)

In [None]:
# Combine all labels to find all unique labels
all_labels = np.concatenate([y_train, y_test, y_val])
unique_labels = np.unique(all_labels)
label_to_index = {label: index for index, label in enumerate(unique_labels)}
print(f"Label to index mapping: {label_to_index}")

In [None]:
# Convert all labels to integers using the combined unique labels
y_train = np.array([label_to_index[label] for label in y_train])
y_test = np.array([label_to_index[label] for label in y_test])
y_val = np.array([label_to_index[label] for label in y_val])

In [None]:
# Ensure no out-of-bound indices
print(f"Unique train labels (converted): {np.unique(y_train)}")
print(f"Unique test labels (converted): {np.unique(y_test)}")
print(f"Unique validation labels (converted): {np.unique(y_val)}")

In [None]:
# Reshape the data to include channel dimension (required by Conv2D)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)
x_val = x_val.reshape(-1, 28, 28, 1)

In [None]:
# Convert labels to categorical (one-hot encoding)
num_classes = len(unique_labels)
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
y_val = to_categorical(y_val, num_classes)

In [None]:
# Verify one-hot encoding
print(f"Shape of y_train after one-hot encoding: {y_train.shape}")
print(f"Shape of y_test after one-hot encoding: {y_test.shape}")
print(f"Shape of y_val after one-hot encoding: {y_val.shape}")

In [11]:
# Model building function
def build_model(hp):
    model = models.Sequential()
    model.add(layers.Conv2D(
        filters=hp.Int('filters', min_value=32, max_value=128, step=32),
        kernel_size=hp.Choice('kernel_size', values=[3, 5]),
        activation='relu',
        input_shape=(28, 28, 3)
    ))
    model.add(layers.MaxPooling2D(pool_size=2))
    model.add(layers.Flatten())
    model.add(layers.Dense(
        units=hp.Int('units', min_value=32, max_value=128, step=32),
        activation='relu'
    ))
    model.add(layers.Dense(10, activation='softmax'))

    model.compile(
        optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop']),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [15]:
# HyperModel class
class MyHyperModel(HyperModel):
    def build(self, hp):
        return build_model(hp)

In [16]:
# Tuning
tuner = RandomSearch(
    MyHyperModel(),
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=2,
    directory='my_dir',
    project_name='my_project'
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
tuner.search(
    train_dataset,
    validation_data=val_dataset,
    epochs=10
)

NameError: name 'train_dataset' is not defined