# 100 Sports Image Classification

## データのインポート

In [1]:
# 初回のみ実行

# import kagglehub

# # Download latest version
# path = kagglehub.dataset_download("gpiosenka/sports-classification")

# print("Path to dataset files:", path)

## データの表示

In [2]:
import pandas as pd

data_path = './kagglehub_cache/datasets/gpiosenka/sports-classification/versions/9/'
csv_path = data_path + 'sports.csv'
df = pd.read_csv(csv_path)
df.head()

Unnamed: 0,class id,filepaths,labels,data set
0,0,train/air hockey/001.jpg,air hockey,train
1,0,train/air hockey/002.jpg,air hockey,train
2,0,train/air hockey/003.jpg,air hockey,train
3,0,train/air hockey/004.jpg,air hockey,train
4,0,train/air hockey/005.jpg,air hockey,train


### カテゴリ列のユニーク値チェック

In [3]:
# categorical_columns = ["labels",'data set']
categorical_columns = ['data set']
for col in categorical_columns:
    if col in df.columns:
        unique_values = df[col].value_counts()
        print(f"{col} 列のユニーク値 ({len(unique_values)} 個):")
        for value, count in unique_values.items():
            print(f"  {value}: {count} 件")

data set 列のユニーク値 (3 個):
  train: 13493 件
  test: 500 件
  valid: 500 件


## データの分離

In [4]:
df_train = df[df['data set'] == 'train']
df_test = df[df['data set'] == 'test']
df_valid = df[df['data set'] == 'valid']

display(df_train.head())
# display(df_test.head())
# display(df_valid.head())

Unnamed: 0,class id,filepaths,labels,data set
0,0,train/air hockey/001.jpg,air hockey,train
1,0,train/air hockey/002.jpg,air hockey,train
2,0,train/air hockey/003.jpg,air hockey,train
3,0,train/air hockey/004.jpg,air hockey,train
4,0,train/air hockey/005.jpg,air hockey,train


## CNN

In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2025-07-25 09:02:48.593384: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-25 09:02:48.629355: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-25 09:02:49.128261: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-25 09:02:49.912151: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753434170.320448    1515 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753434170.41

### データの前処理

#### 訓練用データセットの前処理

In [6]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)
train_set = train_datagen.flow_from_directory(data_path + 'train',
                                                 target_size = (64, 64),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')
# 2分類の場合は class_mode = 'binary' を指定
# 多分類の場合は class_mode = 'categorical' を指定

Found 13492 images belonging to 100 classes.


#### 検証用データセットの前処理

In [7]:
valid_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)
valid_set = valid_datagen.flow_from_directory(data_path + 'valid',
                                                 target_size = (64, 64),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')

Found 500 images belonging to 100 classes.


#### テストデータセットの前処理

In [8]:
test_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)
test_set = test_datagen.flow_from_directory(data_path + 'test',
                                                 target_size = (64, 64),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')

Found 500 images belonging to 100 classes.


### CNNの構築

#### イニシャライズ

In [9]:
cnn = tf.keras.models.Sequential()

#### 畳み込みandプーリング

In [10]:
# 一層目
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[64, 64, 3]))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))

# 二層目
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=2, strides=2))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-07-25 09:03:07.518790: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


#### Flattening

In [11]:
cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))

#### 出力層の追加

In [12]:
cnn.add(tf.keras.layers.Dense(units=100, activation='softmax'))
# 2分類の場合は units=1, activation='sigmoid' を指定
# 多分類の場合は units=[number], activation='softmax' を指定

### モデル学習

#### モデルのコンパイルと訓練

In [13]:
cnn.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
# 2分類の場合は loss = 'binary_crossentropy' を指定
# 多分類の場合は loss = 'categorical_crossentropy' を指定

#### モデルの訓練

In [14]:
cnn.fit(x = train_set, validation_data = valid_set, epochs = 25)

  self._warn_if_super_not_called()


Epoch 1/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 534ms/step - accuracy: 0.0373 - loss: 4.4352

  self._warn_if_super_not_called()


[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 556ms/step - accuracy: 0.0374 - loss: 4.4346 - val_accuracy: 0.1240 - val_loss: 3.7009
Epoch 2/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 427ms/step - accuracy: 0.1467 - loss: 3.6057 - val_accuracy: 0.2240 - val_loss: 3.1756
Epoch 3/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 346ms/step - accuracy: 0.2415 - loss: 3.0656 - val_accuracy: 0.3020 - val_loss: 2.7728
Epoch 4/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 357ms/step - accuracy: 0.3055 - loss: 2.7746 - val_accuracy: 0.3580 - val_loss: 2.6482
Epoch 5/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 427ms/step - accuracy: 0.3474 - loss: 2.5693 - val_accuracy: 0.3600 - val_loss: 2.5259
Epoch 6/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 453ms/step - accuracy: 0.3781 - loss: 2.4411 - val_accuracy: 0.4020 - val_loss: 2.4072
Epoch 7/25
[1m

<keras.src.callbacks.history.History at 0x7ff22a2ebc20>

## 結果の出力

In [15]:
import os
import json

# クラス名のマッピング（index → label）
class_indices = train_set.class_indices

# 保存用ディレクトリ
model_path = './ml'
os.makedirs(model_path, exist_ok=True)

# class_indices を JSON に保存（例: { 'air hockey': 0, 'archery': 1, ... }）
with open(os.path.join(model_path, "class_indices.json"), "w") as f:
    json.dump(class_indices, f, indent=2, ensure_ascii=False)
print("クラスインデックス（class_indices）を JSON 出力しました。")

# モデル保存
cnn.save(os.path.join(model_path, 'model.h5'))
print("モデルを保存しました。")




クラスインデックス（class_indices）を JSON 出力しました。
モデルを保存しました。
