<a href="https://colab.research.google.com/github/nalgo-intern/team-a-2025-summer-08-04/blob/ryotaro_branch/one_digits.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pathlib
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from scipy.ndimage import rotate, shift
# 改善した学習データ作成関数
# 1枚の画像にデータ拡張を適用する関数
def _augment_image(image):
    """1枚の画像に回転、移動、明るさ変更の拡張を適用する"""
    img_aug = image.copy().astype(np.float32) # 計算用にfloat型に変換

    # 1. ランダムな回転 (-10度から10度)
    angle = np.random.uniform(-10, 10)
    img_aug = rotate(img_aug, angle, reshape=False, mode='constant', cval=0)

    # 2. ランダムな移動 (上下左右に-3から3ピクセル)
    shift_val = np.random.uniform(-3, 3, size=2)
    img_aug = shift(img_aug, shift_val, mode='constant', cval=0)

    # 3. ランダムな明るさ変更 (0.7倍から1.3倍)
    brightness = np.random.uniform(0.7, 1.3)
    img_aug = np.clip(img_aug * brightness, 0, 255) # 0-255の範囲に収める
    
    return img_aug

def make_double_images(images, labels, num_samples, augmentation=True):
    double_images = []
    double_labels = []

    for _ in range(num_samples):
        idx1 = np.random.randint(0, len(images))
        idx2 = np.random.randint(0, len(images))

        # 10の位の数字(label1)を取得
        label1 = labels[idx1]

        if label1 == 0:
            # 1桁の数字の場合
            # 元のMNISTデータをそのまま中央に配置した画像として扱う
            final_image = images[idx2]
            
            # データ拡張が有効な場合は、この1枚の画像に適用
            if augmentation:
                final_image = _augment_image(final_image)
            
            # 0-1の範囲に正規化
            final_image = final_image / 255.0
            
            double_images.append(final_image)
            double_labels.append(labels[idx2])

        else:
            # 2桁の数字の場合 ---
            img1 = images[idx1]
            img2 = images[idx2]

            # データ拡張が有効な場合は、それぞれの画像に適用
            if augmentation:
                img1 = _augment_image(img1)
                img2 = _augment_image(img2)
            
            # 2枚の画像を横に連結
            combined = np.concatenate([img1, img2], axis=1)

            # (28, 56) -> (28, 28) にリサイズ
            combined_with_channel = combined[..., np.newaxis]
            resized_with_channel = tf.image.resize(combined_with_channel, [28, 28], method='bilinear')
            
            # チャンネル次元を削除し、正規化
            final_image = np.squeeze(resized_with_channel) / 255.0

            double_images.append(final_image)
            double_labels.append(label1 * 10 + labels[idx2])

    return np.array(double_images), np.array(double_labels)

# データを読み込む関数
def load_double_data_final(num_train_samples=80000, num_test_samples=20000):
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train_double, y_train_double = make_double_images(x_train, y_train, num_train_samples, augmentation=True)
    x_test_double, y_test_double = make_double_images(x_test, y_test, num_test_samples, augmentation=True)
    return (x_train_double, y_train_double), (x_test_double, y_test_double)

In [None]:
print("numpy :", np.__version__)
print("pandas :", pd.__version__)
print("tensorflow :", tf.__version__)
print("matplotlib :", matplotlib.__version__)

In [None]:
#ここはColab用に学習の遷移のためのファイル。
CSV_FILE_PATH = "trainlog.csv"
if not os.path.exists(CSV_FILE_PATH):
    pathlib.Path(CSV_FILE_PATH).touch()

In [None]:
#データロード
(X_train, y_train),(X_test, y_test) = load_double_data_final()

In [None]:
print("X_train : ", X_train.shape)
print("y_train : ", y_train.shape)
print("X_test : ", X_test.shape)
print("y_test : ", y_test.shape)

In [None]:
#画像データの確認
for i in [1,10,100]:
   print("y_train", "(i="+str(i)+"): ", y_train[i])
   print("X_train", "(i="+str(i)+"): ")
   plt.imshow(X_train[i], cmap='gray')
   plt.show()

In [None]:
#入力値の確認
print("X_train min", X_train.min())
print("X_train max", X_train.max())

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28), name='input'),
    #二桁を分割して、28*14を入力するときは、input_shapeを書き換え

    tf.keras.layers.Dense(512, name='dense_1'),
    tf.keras.layers.Activation(tf.nn.relu, name='gelu_1'),

    tf.keras.layers.Dense(256, name='dense_2'),
    tf.keras.layers.Activation(tf.nn.relu, name='gelu_2'),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(256, name='dense_3'),
    tf.keras.layers.Activation(tf.nn.relu, name='gelu_3'),
    tf.keras.layers.Dense(100, name='dense_4'),
    #今回の出力は0~99のどれか一つを出力するので、100としている。
    tf.keras.layers.Activation(tf.nn.softmax, name='softmax')
])

In [None]:
model.summary()

In [None]:
# コンパイルの方式
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
callbacks = []
callbacks.append(tf.keras.callbacks.CSVLogger(CSV_FILE_PATH))

In [None]:
#学習
history = model.fit(X_train, y_train,
                    batch_size=100,
                    epochs=30,
                    verbose=1,
                    validation_data=(X_test, y_test),
                    callbacks=callbacks)

In [None]:
# テスト
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=1)
print("loss(train): {:.4}".format(train_loss))
print("accuracy(train): {:.4}".format(train_acc))

print()

test_loss, test_acc = model.evaluate(X_test, y_test, verbose=1)
print("loss(test): {:.4}".format(test_loss))
print("accuracy(test): {:.4}".format(test_acc))

In [None]:
df = pd.read_csv(CSV_FILE_PATH)
df.head()

In [None]:
epochs = df["epoch"].values
train_acc = df["accuracy"].values
train_loss = df["loss"].values
test_acc = df["val_accuracy"].values
test_loss = df["val_loss"].values

In [None]:
plt.plot(epochs, train_acc, label="train data")
plt.plot(epochs, test_acc, label="test data")
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.legend(loc="lower right")
plt.show()

In [None]:
# テスト結果の表
import matplotlib.pyplot as plt
import numpy as np

ROW = 6
COLUMN = 8
num_images = ROW * COLUMN
predictions = model.predict(X_test[:num_images])

y_preds = np.argmax(predictions, axis=1)


fig = plt.figure(figsize=(12, 8))


for i in range(ROW * COLUMN):
    y_true = y_test[i]
    y_pred = y_preds[i]

    if y_true == y_pred:
        result = "True"  # 正解
    else:
        result = "False" # 不正解

    plt.subplot(ROW, COLUMN, i+1)
    plt.imshow(X_test[i], cmap='gray')
    plt.title(f"No.{i} - {result}\ny_true:{y_true}, y_pred:{y_pred}")
    plt.axis("off")

fig.tight_layout()
fig.show()

In [None]:
#出力のonehotをヒストグラムで描画
# まばらになっているほど精度が悪い結果を出している。
prediction_probabilities = predictions[8]

class_labels = np.arange(len(prediction_probabilities))

# 棒グラフ
plt.figure(figsize=(10, 6))
bars = plt.bar(class_labels, prediction_probabilities, color='skyblue', label='Probability')

plt.xlabel('Class Label')
plt.ylabel('Probability')
plt.xticks(class_labels)  # X軸の目盛りをクラスラベルに設定
plt.ylim(0, 1)            # Y軸の範囲を0から1に設定
plt.grid(axis='y', linestyle='--', alpha=0.7)

# 最も確率の高い棒をハイライトする
predicted_class = np.argmax(prediction_probabilities)
bars[predicted_class].set_color('royalblue')

plt.legend()
plt.tight_layout()


In [None]:
#ジニ係数をだすパート
# 一桁の推定->二桁の推定の移行を行う場合に役立つ
# そのまま、数字を推定する場合はいらない。
import numpy as np

squared_probs = np.square(predictions)
sum_squared_probs = np.sum(squared_probs, axis=1)
gini_coefficients = 1 - sum_squared_probs

gini_list = []
# 結果の表示
for i, gini in enumerate(gini_coefficients):
    #print(f"Prediction {i+1} {predictions[i]}: Gini = {gini:.4f}")
    gini_list.append(gini)

plt.plot(gini_list)

In [None]:
# 学習済みモデルの保存
ins_path = 'trained_model_v2.h5'
model.save(ins_path)