In [None]:
#!pip3 install tensorflow
#!pip install protobuf

# AIの中間層の可視化

In [None]:
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input

## MNISTデータの読み込み

embeddingsを取得するために、元データが必要。

In [None]:
# MNISTデータの読み込み
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# データの正規化
train_images = train_images / 255.0
test_images = test_images / 255.0

# データの形状確認
print("Training data shape:", train_images.shape)
print("Testing data shape:", test_images.shape)


## モデルの読み込み

In [None]:
# モデルの読み込み
loaded_model = tf.keras.models.load_model('model.keras')  # または 'model.h5'
loaded_model.summary()

### このモデルでは、どのような文字の間違いが多いかを確認
正しくモデルが読み込みていることの確認

In [None]:
import numpy as np
from collections import Counter

def display_predictions(model, input_data, true_labels):
    # Model predictions
    predictions = model.predict(input_data)
    
    # Get the indices of the predicted classes
    predicted_labels = np.argmax(predictions, axis=1)
    
    # Initialize a dictionary to count mismatches
    mismatches = Counter()
    
    # Display results
    for i in range(len(true_labels)):
        if true_labels[i] != predicted_labels[i]:
            # Increment the count of the mismatched pair in the dictionary
            mismatches[(true_labels[i], predicted_labels[i])] += 1
    
    # Display mismatched information in a sorted manner
    for (actual, predicted), count in mismatches.most_common():
        print(f"Mismatched Pair: Actual Label {actual} | Predicted Label {predicted} | Count: {count}")

# Since this is a hypothetical code snippet, we need to mock the model's predict function and the data
# to be able to run this function without errors.

# Use the first 10 samples from the teacher data to display the prediction results
sample_data = test_images
sample_labels = test_labels

# Now we can call the modified function
display_predictions(loaded_model, sample_data, sample_labels)

## 埋め込みembeddings (各層のベクトル)の取得
### 各層のembeddings取得用関数定義

In [None]:
# Embeddingsを取得する関数
def get_embeddings_by_layer_name(model, input_data, layer_name):
    intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
    embeddings = intermediate_layer_model.predict(input_data)
    return embeddings

### embeddingsの取得

In [None]:
# 訓練データのembeddings取得
embeddings_train_dense_1 = get_embeddings_by_layer_name(loaded_model, train_images, 'dense_1')  # 第1のDense層の出力
embeddings_train_dense_2 = get_embeddings_by_layer_name(loaded_model, train_images, 'dense_2')  # 第2のDense層の出力
embeddings_train_output = get_embeddings_by_layer_name(loaded_model, train_images, 'output')  # 出力層

In [None]:
## 検証データのembeddings取得
embeddings_test_dense_1 = get_embeddings_by_layer_name(loaded_model, test_images, 'dense_1')  # 第1のDense層の出力
embeddings_test_dense_2 = get_embeddings_by_layer_name(loaded_model, test_images, 'dense_2')  # 第2のDense層の出力
embeddings_test_output = get_embeddings_by_layer_name(loaded_model, test_images, 'output')  # 出力層

## toorPIAによる中間層のembeddingsの可視化

In [None]:
import toorpia.utils as tp

### 訓練データのembeddingsの解析

In [None]:
%%time
# toorpiaによるembeddingsの可視化

common_options = {
#    'item_normalization': True,
#    'vector_normalization': True,
    'randomSeed': 0,
}

# 各fit_transform関数に共通のオプションを適用する
results_train_dense_1 = tp.fit_transform(embeddings_train_dense_1, working_dir='analysis_dense_1', **common_options)
results_train_dense_2 = tp.fit_transform(embeddings_train_dense_2, working_dir='analysis_dense_2', **common_options)
results_train_output = tp.fit_transform(embeddings_train_output, working_dir='analysis_output', **common_options)

### 検証データのembeddingsの解析

In [None]:
%%time
# toorpiaによるembeddingsの可視化
results_test_dense_1 = tp.fit_transform(embeddings_test_dense_1, addplot=True, working_dir='analysis_dense_1', **common_options)
results_test_dense_2 = tp.fit_transform(embeddings_test_dense_2, addplot=True, working_dir='analysis_dense_2', **common_options)
results_test_output = tp.fit_transform(embeddings_test_output, addplot=True, working_dir='analysis_output', **common_options)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Set up the figure and axes for two side-by-side plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 12))  # Set total figure size

# Data and labels for the first plot (training)
x_train, y_train = results_train_dense_1[:, 0], results_train_dense_1[:, 1]

# Markers setup
markers = ['o', 's', '^', 'v', '<', '>', 'p', '*', 'h', 'D']

# First subplot (training data)
for i in sorted(range(10), reverse=False):
    idx = np.where(train_labels == i)
    ax1.scatter(x_train[idx], y_train[idx], c=[plt.cm.jet(i/9)], marker=markers[i], label=str(i), alpha=0.2, s=10)

ax1.set_xlim(-4, 4)
ax1.set_ylim(-4, 4)
ax1.set_aspect('equal')
ax1.legend()
ax1.set_title('Training Data')

# Data and labels for the second plot (test)
x_test, y_test = results_test_dense_1[:, 0], results_test_dense_1[:, 1]

# Base scatter for the test data, using training data as the base
ax2.scatter(x_train, y_train, label='base', c='lightgray', alpha=0.2, s=10)

# Second subplot (test data)
for i in sorted(range(10), reverse=False):
    idx = np.where(test_labels[0:len(x_test)] == i)
    #idx = np.where(train_labels[0:len(x_test)] == i)
    ax2.scatter(x_test[idx], y_test[idx], c=[plt.cm.jet(i/9)], marker=markers[i], label=str(i), alpha=0.5, s=25)

ax2.set_xlim(-4, 4)
ax2.set_ylim(-4, 4)
ax2.set_aspect('equal')
ax2.legend()
ax2.set_title('Test Data')

# Show the combined plot
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Set up the figure and axes for two side-by-side plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 12))  # Set total figure size

# Data and labels for the first plot (training)
x_train, y_train = results_train_dense_2[:, 0], results_train_dense_2[:, 1]

# Markers setup
markers = ['o', 's', '^', 'v', '<', '>', 'p', '*', 'h', 'D']

# First subplot (training data)
for i in sorted(range(10), reverse=False):
    idx = np.where(train_labels == i)
    ax1.scatter(x_train[idx], y_train[idx], c=[plt.cm.jet(i/9)], marker=markers[i], label=str(i), alpha=0.2, s=10)

ax1.set_xlim(-4, 4)
ax1.set_ylim(-4, 4)
ax1.set_aspect('equal')
ax1.legend()
ax1.set_title('Training Data')

# Data and labels for the second plot (test)
x_test, y_test = results_test_dense_2[:, 0], results_test_dense_2[:, 1]

# Base scatter for the test data, using training data as the base
ax2.scatter(x_train, y_train, label='base', c='lightgray', alpha=0.2, s=10)

# Second subplot (test data)
for i in sorted(range(10), reverse=False):
    idx = np.where(test_labels[0:len(x_test)] == i)
    ax2.scatter(x_test[idx], y_test[idx], c=[plt.cm.jet(i/9)], marker=markers[i], label=str(i), alpha=0.5, s=25)

ax2.set_xlim(-4, 4)
ax2.set_ylim(-4, 4)
ax2.set_aspect('equal')
ax2.legend()
ax2.set_title('Test Data')

# Show the combined plot
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Set up the figure and axes for two side-by-side plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 12))  # Set total figure size

# Data and labels for the first plot (training)
x_train, y_train = results_train_output[:, 0], results_train_output[:, 1]

# Markers setup
markers = ['o', 's', '^', 'v', '<', '>', 'p', '*', 'h', 'D']

# First subplot (training data)
for i in sorted(range(10), reverse=False):
    idx = np.where(train_labels == i)
    ax1.scatter(x_train[idx], y_train[idx], c=[plt.cm.jet(i/9)], marker=markers[i], label=str(i), alpha=0.2, s=10)

ax1.set_xlim(-4, 4)
ax1.set_ylim(-4, 4)
ax1.set_aspect('equal')
ax1.legend()
ax1.set_title('Training Data')

# Data and labels for the second plot (test)
x_test, y_test = results_test_output[:, 0], results_test_output[:, 1]

# Base scatter for the test data, using training data as the base
ax2.scatter(x_train, y_train, label='base', c='lightgray', alpha=0.2, s=10)

# Second subplot (test data)
for i in sorted(range(10), reverse=False):
    idx = np.where(test_labels[0:len(x_test)] == i)
    ax2.scatter(x_test[idx], y_test[idx], c=[plt.cm.jet(i/9)], marker=markers[i], label=str(i), alpha=0.5, s=25)

ax2.set_xlim(-4, 4)
ax2.set_ylim(-4, 4)
ax2.set_aspect('equal')
ax2.legend()
ax2.set_title('Test Data')

# Show the combined plot
plt.show()