In [1]:
# prepare data

from tensorflow.keras.datasets import cifar10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

labels = [
    "Airplane",
    "Car",
    "Bird",
    "Cat",
    "Deer",
    "Dog",
    "Frog",
    "Horse",
    "Ship",
    "Truck",
]
label_to_index = {label: i for i, label in enumerate(labels)}

# normalize
# x_train, x_test = x_train / 255.0, x_test / 255.0

2025-02-01 01:12:59.488749: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-01 01:12:59.491360: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-01 01:12:59.498151: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738339979.508762  716220 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738339979.511949  716220 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-01 01:12:59.524413: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

In [2]:
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam

def provide_model(optimizer=Adam(learning_rate=0.0005)):
    input_layer = Input(shape=(32, 32, 3))

    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)

    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    output = Dense(10, activation="softmax")(x)

    model = Model(input_layer, output)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

W0000 00:00:1738339982.468742  716220 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [3]:
import numpy as np
import hashlib
import json


def hash_data(src: any) -> str:  # same as haio_hash
    return hashlib.md5(json.dumps(src, sort_keys=True).encode()).hexdigest()


data_to_label_index = {hash_data(img.tolist()): y_train[i] for i, img in enumerate(x_train)}


def provide_label(img_list) -> np.ndarray:
    return np.array([data_to_label_index[hash_data(img.tolist())] for img in img_list])

In [4]:
# fine-tuning

#  データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=10, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.2284 - loss: 2.3098
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.6479 - loss: 1.0636
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8468 - loss: 0.5425
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9476 - loss: 0.2816
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9888 - loss: 0.1190
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9993 - loss: 0.0630
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9999 - loss: 0.0375
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0227
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━

2025-02-01 01:13:50.549644: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 122880000 exceeds 10% of free system memory.


Step 1/40 - Test Accuracy: 0.1002
[1m279/279[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Epoch 1/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9451 - loss: 0.2565
Epoch 2/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9410 - loss: 0.1909
Epoch 3/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9921 - loss: 0.0538
Epoch 4/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0108
Epoch 5/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0034
Epoch 6/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0026
Epoch 7/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0020
Epoch 8/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

2025-02-01 01:13:58.218432: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 122880000 exceeds 10% of free system memory.


Step 2/40 - Test Accuracy: 0.2103
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Epoch 1/10
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9547 - loss: 0.1601
Epoch 2/10
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9546 - loss: 0.1509
Epoch 3/10
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9938 - loss: 0.0429
Epoch 4/10
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9992 - loss: 0.0111
Epoch 5/10
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0044
Epoch 6/10
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0026
Epoch 7/10
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0014
Epoch 8/10
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

2025-02-01 01:14:05.830523: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 122880000 exceeds 10% of free system memory.


Step 3/40 - Test Accuracy: 0.4645
[1m272/272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Epoch 1/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9326 - loss: 0.2675
Epoch 2/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9500 - loss: 0.1492
Epoch 3/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9878 - loss: 0.0484
Epoch 4/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9994 - loss: 0.0064
Epoch 5/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0025
Epoch 6/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0015
Epoch 7/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0010
Epoch 8/10
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

2025-02-01 01:14:13.856674: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 122880000 exceeds 10% of free system memory.


Step 4/40 - Test Accuracy: 0.4705
[1m269/269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Epoch 1/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9366 - loss: 0.2142
Epoch 2/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9725 - loss: 0.1044
Epoch 3/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9908 - loss: 0.0371
Epoch 4/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9997 - loss: 0.0055
Epoch 5/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0020
Epoch 6/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0014
Epoch 7/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0010
Epoch 8/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

2025-02-01 01:14:22.242046: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 122880000 exceeds 10% of free system memory.


Step 5/40 - Test Accuracy: 0.4730
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9479 - loss: 0.1994
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9455 - loss: 0.1525
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9873 - loss: 0.0521
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9947 - loss: 0.0174
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.9969 - loss: 0.0065
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0013
Epoch 7/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 1.0000 - loss: 0.0010
Epoch 8/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [5]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

313/313 - 1s - 3ms/step - accuracy: 0.5628 - loss: 3.7008


Test accuracy: 0.5627999901771545
Test loss: 3.700786828994751


In [None]:
# re-training

# データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    model = provide_model()
    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=10, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

In [None]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

In [None]:
# re-training

# データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    model = provide_model()
    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=20, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

In [None]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

In [None]:
# re-training
# epoch 50

# データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    model = provide_model()
    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=50, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

In [None]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)