In [1]:
# prepare data

from tensorflow.keras.datasets import cifar10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

labels = [
    "Airplane",
    "Car",
    "Bird",
    "Cat",
    "Deer",
    "Dog",
    "Frog",
    "Horse",
    "Ship",
    "Truck",
]
label_to_index = {label: i for i, label in enumerate(labels)}

# normalize
# x_train, x_test = x_train / 255.0, x_test / 255.0

In [2]:
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam

def provide_model(optimizer="adam"):
    input_layer = Input(shape=(32, 32, 3))

    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)

    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)

    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    output = Dense(10, activation="softmax")(x)

    model = Model(input_layer, output)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

In [3]:
import numpy as np
import hashlib
import json


def hash_data(src: any) -> str:  # same as haio_hash
    return hashlib.md5(json.dumps(src, sort_keys=True).encode()).hexdigest()


data_to_label_index = {hash_data(img.tolist()): y_train[i] for i, img in enumerate(x_train)}


def provide_label(img_list) -> np.ndarray:
    return np.array([data_to_label_index[hash_data(img.tolist())] for img in img_list])

In [4]:
# fine-tuning

#  データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled / 255.0, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=10, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 43ms/step - accuracy: 0.2223 - loss: 2.4175
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 0.5465 - loss: 1.2982
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.7501 - loss: 0.7862
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - accuracy: 0.8843 - loss: 0.4031
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - accuracy: 0.9753 - loss: 0.1536
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step - accuracy: 0.9972 - loss: 0.0636
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 1.0000 - loss: 0.0253
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 1.0000 - loss: 0.0124
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━

In [5]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

313/313 - 5s - 16ms/step - accuracy: 0.5367 - loss: 4.2783


Test accuracy: 0.5367000102996826
Test loss: 4.278298854827881


In [6]:
# retrain

#  データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled / 255.0, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    model = provide_model()
    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=10, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 72ms/step - accuracy: 0.2510 - loss: 2.3857
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 67ms/step - accuracy: 0.5956 - loss: 1.2297
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - accuracy: 0.7632 - loss: 0.7381
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - accuracy: 0.8847 - loss: 0.3997
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - accuracy: 0.9613 - loss: 0.1773
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 57ms/step - accuracy: 0.9977 - loss: 0.0523
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 59ms/step - accuracy: 0.9981 - loss: 0.0305
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - accuracy: 1.0000 - loss: 0.0133
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━

In [7]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

313/313 - 2s - 6ms/step - accuracy: 0.5630 - loss: 1.9702


Test accuracy: 0.5630000233650208
Test loss: 1.9701793193817139


In [10]:
# re-training

# データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled / 255.0, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    model = provide_model()
    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=20, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.1970 - loss: 2.5244
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.5209 - loss: 1.2940
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.7837 - loss: 0.6877
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.8976 - loss: 0.3591
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.9787 - loss: 0.1466
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.9888 - loss: 0.0746
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.9993 - loss: 0.0305
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 1.0000 - loss: 0.0186
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━

In [11]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

313/313 - 2s - 6ms/step - accuracy: 0.5822 - loss: 2.1203


Test accuracy: 0.5821999907493591
Test loss: 2.120327949523926


In [None]:
# re-training
# epoch 50

# データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled / 255.0, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    model = provide_model()
    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=50, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

In [None]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)