# Simple MNIST convnet

**Author:** [fchollet](https://twitter.com/fchollet)<br>
**Date created:** 2015/06/19<br>
**Last modified:** 2020/04/21<br>
**Description:** A simple convnet that achieves ~99% test accuracy on MNIST.

## Setup

In [1]:
!unzip audio_spectrogram_2.zip

Archive:  audio_spectrogram_2.zip
   creating: Electric/
   creating: Folk/
   creating: HipHop/
   creating: International/
   creating: Latin/
  inflating: Electric/001066.mp3     
  inflating: Electric/001073.mp3     
  inflating: Electric/001075.mp3     
  inflating: Electric/000615.mp3     
  inflating: Electric/001069.mp3     
  inflating: Folk/000141.mp3         
  inflating: Folk/000194.mp3         
  inflating: Folk/000193.mp3         
  inflating: Folk/000140.mp3         
  inflating: Folk/000190.mp3         
  inflating: HipHop/000695.mp3       
  inflating: HipHop/000676.mp3       
  inflating: HipHop/000005.mp3       
  inflating: HipHop/000694.mp3       
  inflating: HipHop/000002.mp3       
  inflating: International/001681.mp3  
  inflating: International/001682.mp3  
  inflating: International/001082.mp3  
  inflating: International/000853.mp3  
  inflating: International/001680.mp3  
  inflating: Latin/000704.mp3        
  inflating: Latin/000708.mp3        
  inflati

In [6]:
import numpy as np
import keras
from keras import layers

## Prepare the data

In [7]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [2]:
!pwd

/content


In [24]:
import tensorflow as tf

# 学習用データセットの作成
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "./",
    image_size=(250, 250),  # CNNに合わせてサイズ指定
    batch_size=5,
    label_mode='categorical'  # one-hot形式にしたい場合
)

class_names = train_ds.class_names
print(class_names)

Found 25 files belonging to 6 classes.
['Electric', 'Folk', 'HipHop', 'International', 'Latin', 'sample_data']


In [20]:
# 0-255 を 0-1 に変換
normalization_layer = tf.keras.layers.Rescaling(1./255)

train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
#val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

# パフォーマンス向上のためキャッシュ・プリフェッチ
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
#val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [21]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(250, 250, 3)),
    layers.Conv2D(32, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(6, activation='softmax')  # クラス数に応じて変更
])



In [22]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_ds, epochs=10)

Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.1044 - loss: 8.5594
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.2150 - loss: 2.5023
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.6172 - loss: 1.2553
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8394 - loss: 0.9819
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8467 - loss: 0.5862
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8361 - loss: 0.5595
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8850 - loss: 0.3327
Epoch 8/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9072 - loss: 0.1867
Epoch 9/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x7b883818d610>

In [16]:
# 推定したい画像のパス
img_path = 'Electric_000615.png'

# 画像を読み込み
img = tf.keras.preprocessing.image.load_img(img_path, target_size=(250, 250))  # モデルの入力サイズに合わせる

# 数値配列に変換
img_array = tf.keras.preprocessing.image.img_to_array(img)

# バッチ次元（1枚だけでも）を追加： (128, 128, 3) → (1, 128, 128, 3)
img_array = np.expand_dims(img_array, axis=0)

# 正規化（学習時に正規化していた場合）
img_array = img_array / 255.0

In [23]:
predictions = model.predict(img_array)  # 出力は確率（例：[ [0.1, 0.9] ]）

# 最も確率の高いクラスのインデックスを取得
predicted_class_index = np.argmax(predictions[0])

print("予測されたクラスインデックス:", predicted_class_index)


print("予測クラス名:", class_names[predicted_class_index ])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
予測されたクラスインデックス: 0
予測クラス名: Electric


## Build the model

In [8]:
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

## Train the model

In [None]:
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

## Evaluate the trained model

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])