In [1]:
# prepare data

from tensorflow.keras.datasets import cifar10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

labels = [
    "Airplane",
    "Car",
    "Bird",
    "Cat",
    "Deer",
    "Dog",
    "Frog",
    "Horse",
    "Ship",
    "Truck",
]
label_to_index = {label: i for i, label in enumerate(labels)}

# normalize
# x_train, x_test = x_train / 255.0, x_test / 255.0

In [2]:
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

def provide_model(optimizer='adam'):
    input = Input(shape=(32, 32, 3))

    x = Conv2D(32, (3, 3), activation='relu')(input)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)

    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    output = Dense(10, activation="softmax")(x)

    model = Model(input, output)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

In [3]:
import numpy as np
import hashlib
import json


def hash_data(src: any) -> str:  # same as haio_hash
    return hashlib.md5(json.dumps(src, sort_keys=True).encode()).hexdigest()


data_to_label_index = {hash_data(img.tolist()): y_train[i] for i, img in enumerate(x_train)}


def provide_label(img_list) -> np.ndarray:
    return np.array([data_to_label_index[hash_data(img.tolist())] for img in img_list])

In [None]:
# fine-tuning

#  データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=10, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.1299 - loss: 16.5832
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1815 - loss: 2.2214
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.2717 - loss: 2.1123
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.3244 - loss: 1.9412
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.3829 - loss: 1.6878
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4728 - loss: 1.4615
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5498 - loss: 1.2305
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6149 - loss: 1.1105
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [5]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

313/313 - 1s - 4ms/step - accuracy: 0.5058 - loss: 4.6802


Test accuracy: 0.5058000087738037
Test loss: 4.680164813995361


In [None]:
# re-training

# データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    model = provide_model()
    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=10, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - accuracy: 0.1056 - loss: 13.3335
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.1886 - loss: 2.2510
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.2694 - loss: 2.0330
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.3521 - loss: 1.8584
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.4251 - loss: 1.6893
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.4535 - loss: 1.4920
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.5010 - loss: 1.4398
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.5824 - loss: 1.1580
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━

In [7]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

313/313 - 1s - 5ms/step - accuracy: 0.5216 - loss: 1.3465


Test accuracy: 0.5216000080108643
Test loss: 1.3464887142181396


In [8]:
# re-training

# データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    model = provide_model()
    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=20, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.1289 - loss: 11.3610
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2096 - loss: 2.1995
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.3223 - loss: 1.8801
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4590 - loss: 1.6336
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.5286 - loss: 1.4049
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6208 - loss: 1.1245
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7012 - loss: 0.9254
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7632 - loss: 0.7260
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [9]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

313/313 - 1s - 4ms/step - accuracy: 0.5317 - loss: 1.9011


Test accuracy: 0.5317000150680542
Test loss: 1.9010968208312988


In [10]:
# re-training
# epoch 50

# データの準備
data_size = 10000
x_train = x_train[:data_size]  # 最初の10,000枚だけを使う

# Active Learningの設定
initial_size = 1000  # 最初に使用するデータ数
query_size = 100  # 追加するデータ数
n_steps = 40  # ステップ数 (最大で90)

# 最初の1,000枚を使用する
initial_indices = np.arange(initial_size)
x_labeled = x_train[initial_indices]
y_labeled = np.array(provide_label(x_labeled))

# 残りの9,000枚を未使用として保持
remaining_indices = np.arange(initial_size, data_size)
x_unlabeled = x_train[remaining_indices]

# モデルを提供して初期学習
model = provide_model()
model.fit(x_labeled, y_labeled, epochs=10, verbose=1)

# Active Learningのループ
for step in range(n_steps):
    # 未使用データに対して予測を行う
    predictions = model.predict(x_unlabeled / 255.0)
    # 予測の不確実性を計算（確信度の低い順に選ぶ）
    uncertainties = np.max(predictions, axis=1)
    query_indices = np.argsort(uncertainties)[:query_size]

    # 新しく選ばれたデータをラベル付きデータに追加
    new_samples = x_unlabeled[query_indices]
    new_labels = np.array(provide_label(new_samples))

    x_labeled = np.concatenate([x_labeled, new_samples])
    y_labeled = np.concatenate([y_labeled, new_labels])

    # 未使用データから選ばれたデータを除去
    x_unlabeled = np.delete(x_unlabeled, query_indices, axis=0)

    model = provide_model()
    # モデルをFine-Tuning
    model.fit(x_labeled / 255.0, y_labeled, epochs=50, batch_size=32, verbose=1)

    # テストセットでの評価
    test_loss, test_accuracy = model.evaluate(x_test / 255.0, y_test, verbose=0)
    print(f"Step {step+1}/{n_steps} - Test Accuracy: {test_accuracy:.4f}")

Epoch 1/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.1108 - loss: 18.2873
Epoch 2/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.1420 - loss: 2.3344
Epoch 3/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.2942 - loss: 2.0237
Epoch 4/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.3906 - loss: 1.7250
Epoch 5/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4833 - loss: 1.4764
Epoch 6/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.5874 - loss: 1.1981
Epoch 7/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.6293 - loss: 1.0278
Epoch 8/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7497 - loss: 0.8177
Epoch 9/10
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━

In [11]:
# eval

test_loss, test_acc = model.evaluate(x_test / 255.0, y_test, verbose=2)
print("\n")
print("Test accuracy:", test_acc)
print("Test loss:", test_loss)

313/313 - 3s - 9ms/step - accuracy: 0.5521 - loss: 4.1269


Test accuracy: 0.5521000027656555
Test loss: 4.126890659332275
