# 100 Sports Image Classification

## データのインポート

In [None]:
# import kagglehub
# path = kagglehub.dataset_download("gpiosenka/sports-classification")
# print("Path to dataset files:", path)

## 設定値

In [None]:
EPOCHS = (
    10  # エポック数（初期テスト：10~30、軽量モデル：30~50、本格学習：50~100）
)
IMG_SIZE = 128  # 画像のサイズ（224は転移学習向けの標準。ConvNet自作では128程度がバランス良）
BATCH_SIZE = 64  # バッチサイズ（32はメモリ節約・訓練遅め、64は訓練高速化・学習安定性あり）
OUTPUT_LAYER_SIZE = 10  # 出力層のサイズ

## データの表示

In [None]:
import pandas as pd

# data_path = './kagglehub_cache/datasets/gpiosenka/sports-classification/versions/9/'
data_path = "./test_data/"
csv_path = data_path + "sports.csv"
df = pd.read_csv(csv_path)
df.head()

Unnamed: 0,class id,filepaths,labels,data set
0,0,train/air hockey/001.jpg,air hockey,train
1,0,train/air hockey/002.jpg,air hockey,train
2,0,train/air hockey/003.jpg,air hockey,train
3,0,train/air hockey/004.jpg,air hockey,train
4,0,train/air hockey/005.jpg,air hockey,train


### カテゴリ列のユニーク値チェック

In [None]:
categorical_columns = ["data set"]
for col in categorical_columns:
    if col in df.columns:
        unique_values = df[col].value_counts()
        print(f"{col} 列のユニーク値 ({len(unique_values)} 個):")
        for value, count in unique_values.items():
            print(f"  {value}: {count} 件")

data set 列のユニーク値 (3 個):
  train: 13493 件
  test: 500 件
  valid: 500 件


## データの分離

In [None]:
df_train = df[df["data set"] == "train"]
df_test = df[df["data set"] == "test"]
df_valid = df[df["data set"] == "valid"]

display(df_train.head())

Unnamed: 0,class id,filepaths,labels,data set
0,0,train/air hockey/001.jpg,air hockey,train
1,0,train/air hockey/002.jpg,air hockey,train
2,0,train/air hockey/003.jpg,air hockey,train
3,0,train/air hockey/004.jpg,air hockey,train
4,0,train/air hockey/005.jpg,air hockey,train


## CNN

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

### データの前処理

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True
)
train_set = train_datagen.flow_from_directory(
    data_path + "train",
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
)

valid_datagen = ImageDataGenerator(
    rescale=1.0 / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True
)
valid_set = valid_datagen.flow_from_directory(
    data_path + "valid",
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
)

test_datagen = ImageDataGenerator(
    rescale=1.0 / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True
)
test_set = test_datagen.flow_from_directory(
    data_path + "test",
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
)

Found 1289 images belonging to 10 classes.
Found 50 images belonging to 10 classes.
Found 50 images belonging to 10 classes.


### CNNの構築

In [None]:
cnn = tf.keras.models.Sequential()
cnn.add(tf.keras.layers.Conv2D(
        filters=32,
        kernel_size=3,
        activation="relu",
        input_shape=[IMG_SIZE, IMG_SIZE, 3],
    )
)
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))
cnn.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation="relu"))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))
cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(units=128, activation="relu"))
cnn.add(tf.keras.layers.Dropout(0.3))
cnn.add(tf.keras.layers.Dense(units=OUTPUT_LAYER_SIZE, activation="softmax"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
cnn.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=[
        tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
        tf.keras.metrics.TopKCategoricalAccuracy(k=3, name="top3_acc"),
        tf.keras.metrics.TopKCategoricalAccuracy(k=5, name="top5_acc"),
    ],
)

In [None]:
history = cnn.fit(x=train_set, validation_data=valid_set, epochs=EPOCHS)

  self._warn_if_super_not_called()


Epoch 1/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 569ms/step - accuracy: 0.0867 - loss: 2.9443 - top3_acc: 0.2989 - top5_acc: 0.4997

  self._warn_if_super_not_called()


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 610ms/step - accuracy: 0.1040 - loss: 2.6012 - top3_acc: 0.3142 - top5_acc: 0.5252 - val_accuracy: 0.1000 - val_loss: 2.2567 - val_top3_acc: 0.3000 - val_top5_acc: 0.5600
Epoch 2/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 552ms/step - accuracy: 0.1389 - loss: 2.2046 - top3_acc: 0.3949 - top5_acc: 0.6144 - val_accuracy: 0.2600 - val_loss: 2.0923 - val_top3_acc: 0.4400 - val_top5_acc: 0.6000
Epoch 3/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 553ms/step - accuracy: 0.2816 - loss: 1.9736 - top3_acc: 0.5919 - top5_acc: 0.7665 - val_accuracy: 0.3600 - val_loss: 1.7225 - val_top3_acc: 0.7400 - val_top5_acc: 0.8800
Epoch 4/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 555ms/step - accuracy: 0.4081 - loss: 1.7343 - top3_acc: 0.7269 - top5_acc: 0.8619 - val_accuracy: 0.5600 - val_loss: 1.3866 - val_top3_acc: 0.8000 - val_top5_acc: 0.8800
Epoch 5/10
[1m21/21[0

In [None]:
results = cnn.evaluate(valid_set)
for name, value in zip(cnn.metrics_names, results):
    print(f"{name}: {value:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step - accuracy: 0.7400 - loss: 0.7875 - top3_acc: 0.9200 - top5_acc: 0.9400
loss: 0.7875
compile_metrics: 0.7400


In [None]:
import os
import json

class_indices = train_set.class_indices
model_path = "./ml"
os.makedirs(model_path, exist_ok=True)

with open(os.path.join(model_path, "class_indices.json"), "w") as f:
    json.dump(class_indices, f, indent=2, ensure_ascii=False)
print("クラスインデックス（class_indices）を JSON 出力しました。")

model_filename = f"model_{EPOCHS}epochs.h5"
cnn.save(os.path.join(model_path, model_filename))
print(f"モデルを保存しました: {model_filename}")



クラスインデックス（class_indices）を JSON 出力しました。
モデルを保存しました: model_10epochs.h5
