In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv2D, BatchNormalization, MaxPool2D, SpatialDropout2D,
    Flatten, Dense, Dropout
)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import numpy as np
import h5py

# Load combined datasets
def load_data(filepath='dataset.h5', dataset_name='mnist', reshape_to_cnn=True):
    with h5py.File(filepath, 'r') as f:
        group = f[dataset_name]
        images = group['images'][:]
        labels = group['labels'][:]
        
        if reshape_to_cnn:
            images = images.reshape(-1, 28, 28, 1)
        
        return images, labels

def load_combined_datasets(dataset_names=['mnist', 'emnist', 'usps', 'sklearn_digits']):
    datasets = {}
    for name in dataset_names:
        x, y = load_data(dataset_name=name, reshape_to_cnn=True)
        datasets[name] = (x, y)
    
    x_combined = np.concatenate([datasets[name][0] for name in datasets])
    y_combined = np.concatenate([datasets[name][1] for name in datasets])
    return x_combined, y_combined

# Load and preprocess all datasets
x_combined, y_combined = load_combined_datasets()
x_combined = x_combined.astype('float32') / 255.0
y_combined = tf.keras.utils.to_categorical(y_combined, 10)

# Split into train/validation sets
x_train, x_val, y_train, y_val = train_test_split(
    x_combined, y_combined, test_size=0.1, random_state=42
)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)
datagen.fit(x_train)

# Model architecture (unchanged)
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    BatchNormalization(),
    Conv2D(32, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPool2D((2,2)),
    SpatialDropout2D(0.2),
    
    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPool2D((2,2)),
    SpatialDropout2D(0.2),
    
    Flatten(),
    Dense(256, activation='relu', kernel_regularizer='l2'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Training callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint('best_model_combined.h5', save_best_only=True, monitor='val_accuracy'),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)
]

# Train model
history = model.fit(
    datagen.flow(x_train, y_train, batch_size=256),
    validation_data=(x_val, y_val),
    epochs=20,
    callbacks=callbacks
)

# Save final model
model.save('improved_mnist_cnn_with_all_datasets.h5')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-02-10 16:22:34.452315: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
  self._warn_if_super_not_called()


Epoch 1/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step - accuracy: 0.8778 - loss: 1.6475



[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m508s[0m 395ms/step - accuracy: 0.8779 - loss: 1.6467 - val_accuracy: 0.9843 - val_loss: 0.1805 - learning_rate: 0.0010
Epoch 2/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 513ms/step - accuracy: 0.9737 - loss: 0.2180



[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m667s[0m 525ms/step - accuracy: 0.9737 - loss: 0.2180 - val_accuracy: 0.9898 - val_loss: 0.1551 - learning_rate: 0.0010
Epoch 3/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 513ms/step - accuracy: 0.9790 - loss: 0.1920



[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m667s[0m 525ms/step - accuracy: 0.9790 - loss: 0.1920 - val_accuracy: 0.9914 - val_loss: 0.1441 - learning_rate: 0.0010
Epoch 4/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m670s[0m 528ms/step - accuracy: 0.9810 - loss: 0.1826 - val_accuracy: 0.9899 - val_loss: 0.1458 - learning_rate: 0.0010
Epoch 5/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 487ms/step - accuracy: 0.9810 - loss: 0.1800



[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m632s[0m 497ms/step - accuracy: 0.9810 - loss: 0.1800 - val_accuracy: 0.9929 - val_loss: 0.1269 - learning_rate: 0.0010
Epoch 6/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1095s[0m 862ms/step - accuracy: 0.9824 - loss: 0.1717 - val_accuracy: 0.9888 - val_loss: 0.1441 - learning_rate: 0.0010
Epoch 7/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m609s[0m 479ms/step - accuracy: 0.9840 - loss: 0.1629 - val_accuracy: 0.7601 - val_loss: 0.9363 - learning_rate: 0.0010
Epoch 8/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m612s[0m 482ms/step - accuracy: 0.9843 - loss: 0.1622 - val_accuracy: 0.9905 - val_loss: 0.1394 - learning_rate: 0.0010
Epoch 9/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step - accuracy: 0.9893 - loss: 0.0990



[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m611s[0m 481ms/step - accuracy: 0.9893 - loss: 0.0990 - val_accuracy: 0.9956 - val_loss: 0.0504 - learning_rate: 2.0000e-04
Epoch 10/20
[1m1270/1270[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m610s[0m 480ms/step - accuracy: 0.9902 - loss: 0.0663 - val_accuracy: 0.9954 - val_loss: 0.0489 - learning_rate: 2.0000e-04
Epoch 11/20
[1m 327/1270[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m7:22[0m 470ms/step - accuracy: 0.9912 - loss: 0.0618

KeyboardInterrupt: 