#  Could not pickle the task to send it to the workers 

GridSearchCV fails KerasClassifier for multiple causes. Multithreading with ```GridSearchCV(n_jobs=-1)``` causes the error if an adapted Normalization layer is used.

* [GridSearchCV with KerasClassifier causes Could not pickle the task to send it to the workers with an adapted Keras layer](https://stackoverflow.com/q/75482370/4281353)



In [1]:
import numpy as np
import tensorflow as tf
from keras.models import (
    Model,
    Sequential
)
from keras.layers import (
    Layer,
    Dense, 
    Dropout, 
    Flatten, 
    Normalization,
    BatchNormalization,
    Activation,
    Conv2D, 
    MaxPooling2D,
)

import sklearn
from sklearn.model_selection import (
    GridSearchCV
)
from scikeras.wrappers import (
    KerasClassifier, 
)

In [2]:
print('The scikit-learn version is {}.'.format(sklearn.__version__))

The scikit-learn version is 1.1.3.


# CIFAR-10

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
input_shape = x_train[0].shape
number_of_classes = 10

# Grid Search Parameter

In [4]:
batch_size = [32]
epochs = [2, 3]
param_grid = dict(batch_size=batch_size, epochs=epochs)

---
# Error Case - Adapted Normalization Layer

In [5]:
normalization: Layer = None

normalization = Normalization(
    name="norm",
    input_shape=input_shape,  # (32, 32, 3)
    axis=-1                   # Regard each pixel as a feature
)
normalization.adapt(x_train)

2023-02-17 20:34:02.326077: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [6]:
def create_model():
    model = Sequential([
        normalization,
        Conv2D(
            name="conv",
            filters=32,
            kernel_size=(3, 3),
            strides=(1, 1),
            padding="same",
            activation='relu',
            input_shape=input_shape
        ),
        MaxPooling2D(
            name="maxpool",
            pool_size=(2, 2)
        ),
        Flatten(),
        Dense(
            name="full",
            units=100,
            activation="relu"
        ),
        Dense(
            name="label",
            units=number_of_classes,
            activation="softmax"
        )
    ])
    model.compile(
        loss=tf.keras.losses.sparse_categorical_crossentropy,
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

In [7]:
model = KerasClassifier(model=create_model, verbose=2)

In [8]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(x_train, y_train)

PicklingError: Could not pickle the task to send it to the workers.

# Normal Case

No error without the adapted layer.

In [9]:
max_value = float(np.max(x_train))
x_train_normed, x_test_normed = x_train/max_value, x_test/max_value

# Model

In [10]:
def create_model_with_normalization_layer():
    model = Sequential([
        Conv2D(
            name="conv",
            filters=32,
            kernel_size=(3, 3),
            strides=(1, 1),
            padding="same",
            activation='relu',
            input_shape=input_shape
        ),
        MaxPooling2D(
            name="maxpool",
            pool_size=(2, 2)
        ),
        Flatten(),
        Dense(
            name="full",
            units=100,
            activation="relu"
        ),
        Dense(
            name="label",
            units=number_of_classes,
            activation="softmax"
        )
    ])
    model.compile(
        loss=tf.keras.losses.sparse_categorical_crossentropy,
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

In [11]:
model = KerasClassifier(model=create_model_with_normalization_layer, verbose=2)

In [12]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(x_train, y_train)

2023-02-17 20:34:35.216055: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-17 20:34:35.217841: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-17 20:34:35.221679: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-17 20:34:35.223022: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-17 20:34:35.226140: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-17 20:34:35.235263: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/3
Epoch 1/2
Epoch 1/3
Epoch 1/2
Epoch 1/3
Epoch 1/2
1042/1042 - 19s - loss: 4.2843 - accuracy: 0.3465 - 19s/epoch - 18ms/step
Epoch 2/3
1042/1042 - 19s - loss: 5.1563 - accuracy: 0.1821 - 19s/epoch - 18ms/step
Epoch 2/2
1042/1042 - 19s - loss: 3.8695 - accuracy: 0.2710 - 19s/epoch - 18ms/step
Epoch 2/3
1042/1042 - 19s - loss: 4.6497 - accuracy: 0.3139 - 19s/epoch - 18ms/step
Epoch 2/2
1042/1042 - 19s - loss: 5.8426 - accuracy: 0.1595 - 19s/epoch - 18ms/step
Epoch 2/2
1042/1042 - 19s - loss: 4.5681 - accuracy: 0.2115 - 19s/epoch - 18ms/step
Epoch 2/3
1042/1042 - 19s - loss: 1.4724 - accuracy: 0.4860 - 19s/epoch - 18ms/step
Epoch 3/3
1042/1042 - 19s - loss: 1.8964 - accuracy: 0.3038 - 19s/epoch - 18ms/step
1042/1042 - 19s - loss: 1.6692 - accuracy: 0.4215 - 19s/epoch - 18ms/step
Epoch 3/3
1042/1042 - 19s - loss: 1.5876 - accuracy: 0.4373 - 19s/epoch - 18ms/step
1042/1042 - 19s - loss: 1.8214 - accuracy: 0.3668 - 19s/epoch - 18ms/step
Epoch 3/3
1042/1042 - 19s - loss: 2.0591 - acc

In [13]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.462920 using {'batch_size': 32, 'epochs': 3}
0.354919 (0.063444) with: {'batch_size': 32, 'epochs': 2}
0.462920 (0.011277) with: {'batch_size': 32, 'epochs': 3}
