<a href="https://colab.research.google.com/github/knsg16/Yatagarasu-Eye/blob/main/yatagarasu_eye.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# JSONデータの解析

In [1]:
import json
from google.colab import drive

# google driveのマウント
drive.mount('/content/drive')

# JSONファイルの読み込み
with open('/content/drive/MyDrive/Yatagarasu-Eye/annotations/instances_default.json', 'r') as file:
    data = json.load(file)

# 画像ファイル名とラベルの抽出
image_labels = {img['file_name']: any(ann['image_id'] == img['id'] and ann['category_id'] == 1 for ann in data['annotations']) for img in data['images']}


Mounted at /content/drive


# 画像データの前処理

In [2]:
import cv2
import numpy as np
import os
from concurrent.futures import ThreadPoolExecutor

base_path = '/content/drive/MyDrive/Yatagarasu-Eye/images'

def load_and_preprocess_image(file_name):
    img_path = os.path.join(base_path, file_name)
    img = cv2.imread(img_path)
    img = cv2.resize(img, (1280, 720))
    img = img / 255.0
    return img

# 並列処理を使用して画像を前処理
with ThreadPoolExecutor(max_workers=10) as executor:
    preprocessed_images = {file_name: executor.submit(load_and_preprocess_image, file_name) for file_name in image_labels.keys()}

# 結果を収集
preprocessed_images = {file_name: future.result() for file_name, future in preprocessed_images.items()}


# シーケンスデータの作成

In [3]:
sequence_length = 30  # 連続するフレーム数
sequences = []
labels = []

file_names = list(preprocessed_images.keys())
for i in range(len(file_names) - sequence_length):
    sequence = [preprocessed_images[file_names[j]] for j in range(i, i + sequence_length)]
    label = image_labels[file_names[i + sequence_length - 1]]
    sequences.append(sequence)
    labels.append(label)


# トレーニングデータの準備

In [4]:
import tensorflow as tf


def data_generator():
    for seq, label in zip(sequences, labels):
        yield seq, label

# データセットの作成
output_signature = (
    tf.TensorSpec(shape=(30, 720, 1280, 3), dtype=tf.float32),  # バッチ次元を除去
    tf.TensorSpec(shape=(), dtype=tf.int32),  # バッチ次元を除去
)
dataset = tf.data.Dataset.from_generator(data_generator, output_signature=output_signature)

# バッチサイズを8に設定
dataset = dataset.batch(2)

# モデルの作成

In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Conv3D(8, kernel_size=(3, 3, 3), activation='relu', input_shape=(30, 720, 1280, 3)),
    layers.MaxPooling3D(pool_size=(2, 2, 2)),
    layers.Conv3D(16, kernel_size=(3, 3, 3), activation='relu'),
    layers.MaxPooling3D(pool_size=(2, 2, 2)),
    # 他のConv3DレイヤーやMaxPooling3Dレイヤーを追加
    layers.Flatten(),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# モデルのトレーニング

In [12]:
for x_batch, y_batch in dataset.take(1):
    # このバッチの形状を印刷
    print('x_batch shape:', x_batch.shape)
    print('y_batch shape:', y_batch.shape)



x_batch shape: (8, 30, 720, 1280, 3)
y_batch shape: (8,)


In [6]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(dataset, epochs=20)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 28, 718, 1278, 8   656       
                             )                                   
                                                                 
 max_pooling3d (MaxPooling3  (None, 14, 359, 639, 8)   0         
 D)                                                              
                                                                 
 conv3d_1 (Conv3D)           (None, 12, 357, 637, 16   3472      
                             )                                   
                                                                 
 max_pooling3d_1 (MaxPoolin  (None, 6, 178, 318, 16)   0         
 g3D)                                                            
                                                                 
 flatten (Flatten)           (None, 5433984)           0

In [9]:
import tensorflow as tf

# モデルを.h5形式で保存
model.save('model.h5')

  saving_api.save_model(


# アップロードされた動画に対して、ハンドの判定を行い、その結果を動画に書き込んで表示するプログラム

In [12]:
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
from IPython.display import display, HTML
from base64 import b64encode
from google.colab import files
import tensorflow as tf
import os


# 学習済みのモデルをロード
model = tf.keras.models.load_model('/content/model.h5')

# ユーザーに動画をアップロードさせる
uploaded = files.upload()
video_path = next(iter(uploaded))

# 動画の読み込みと書き出しの準備
cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

# モデル予測用のフレームを保存するリスト
frames = []

# 動画が終わるまでフレームを読み取り続ける
while(cap.isOpened()):
    ret, frame = cap.read()
    if ret:
        # フレームのサイズを変更（モデルに合わせる）
        frame_resized = cv2.resize(frame, (1280, 720))  # モデルの入力サイズに合わせる
        frames.append(frame_resized)

        # 30フレーム溜まったらモデルの予測を実行
        if len(frames) == 30:
            input_data = np.expand_dims(frames, axis=0)
            prediction = model.predict(input_data)

            # 予測結果に基づいてラベルを決定
            print(prediction[0][0])
            label = "Hand" if prediction[0][0] > 0.5 else "No Hand"

            # ラベルをフレームに表示
            cv2.putText(frame, label, (10,60), cv2.FONT_HERSHEY_SIMPLEX, 2.0, (0,0,255), 2, cv2.LINE_AA)

            # ラベルが表示されたフレームを書き出し
            out.write(frame)

            # 最も古いフレームを削除
            frames.pop(0)
    else:
        break

# 動画の読み取りと書き出しを終了
cap.release()
out.release()

# AVIファイルをMP4ファイルに変換
!ffmpeg -i output.avi output.mp4

# MP4ファイルを表示・再生
mp4 = open('output.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
display(HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url))




Saving hand05.mp4 to hand05.mp4
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsn