<a href="https://colab.research.google.com/github/philipp-ding/AI/blob/main/LoadModelAndPredictSignLanguageCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Projectrealisierung:

**Aufgabe:**
Erstellung einer Applikation mit Kamera Input, die Zeichensprache von nicht-Hörenden / nicht-Sprechenden Personen konvertiert und diese anderen Meeting-Teilnehmern als Untertitel ausgibt. Bereitstellung eines Frontends zu Demonstrationszwecken und einer API, um die Modelle in andere Applikationen wie Teams oder Discord zu integrieren.

how to execute this notebook:
1. Go to https://drive.google.com/file/d/1be8Cai-xqnSQKJQmnVxAuEFmm_xU1N9Z/view?usp=sharing (containing the zip file with the 100 glosses), then add a shortcut to this file in your drive so that it is integrated into your google drive
2. Go to https://drive.google.com/file/d/1EIE3FUYi_hvIxAEqxaxxmrHlE1GDmcND/view?usp=sharing (containing the WASL100.json file), then add a shortcut to this file in your drive
3. Simply run the cells in this notebook


# 1 Sign Language Detection

## 1.1 Imports

In [None]:
# The way this tutorial uses the `TimeDistributed` layer requires TF>=2.10
!pip install -U "tensorflow>=2.10.0"

Collecting tensorflow>=2.10.0
  Downloading tensorflow-2.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (524.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m524.1/524.1 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting keras<2.14,>=2.13.1 (from tensorflow>=2.10.0)
  Downloading keras-2.13.1-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.14,>=2.13 (from tensorflow>=2.10.0)
  Downloading tensorboard-2.13.0-py3-none-any.whl (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-estimator<2.14,>=2.13.0 (from tensorflow>=2.10.0)
  Downloading tensorflow_estimator-2.13.0-py2.py3-none-any.whl (440 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m440.8/440.8 kB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
Collecti

In [None]:
!pip install --upgrade pip



In [None]:
!pip install remotezip tqdm opencv-python
!pip install -q git+https://github.com/tensorflow/docs

Collecting remotezip
  Downloading remotezip-0.12.1.tar.gz (7.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: remotezip
  Building wheel for remotezip (setup.py) ... [?25l[?25hdone
  Created wheel for remotezip: filename=remotezip-0.12.1-py3-none-any.whl size=7933 sha256=e483ffbc7306b2b1ed1734733c2c9534726c18b85a32d596b678819a1bc7c4ba
  Stored in directory: /root/.cache/pip/wheels/fc/76/04/beed1a6df4eb7430ee13c3900746edd517e5e597298d1f73f3
Successfully built remotezip
Installing collected packages: remotezip
Successfully installed remotezip-0.12.1
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone


In [None]:
import tqdm
import random
import pathlib
import itertools
import collections

import os
import cv2
import numpy as np
import remotezip as rz

import tensorflow as tf

# Some modules to display an animation using imageio.
import imageio
from IPython import display
from urllib import request
from tensorflow_docs.vis import embed

## 1.2 Setup Test Dir

In [None]:
video_path_for_generator = "/content/drive/MyDrive/test"
# video_file_path = video_path_for_generator +
video_file_path = "/content/test_video/"        # "/content/africa_basketball_sleep.mp4"
video_path_for_generator = pathlib.Path(video_path_for_generator)

In [None]:
# connect to google drive --> allows to store kaggle.json API token under the main folder in the drive,
#   so that it doesn't have to be uploaded every time
# Alternatively the kaggle.json can be uploaded under /content/

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1.4 preprocess video

In [None]:
def format_frames(frame, output_size):
  """
    Pad and resize an image from a video.

    Args:
      frame: Image that needs to resized and padded.
      output_size: Pixel size of the output frame image.

    Return:
      Formatted frame with padding of specified output size.
  """
  frame = tf.image.convert_image_dtype(frame, tf.float32)
  frame = tf.image.resize_with_pad(frame, *output_size)
  return frame

In [None]:
def frames_from_video_file(video_path, n_frames, output_size = (224,224), frame_step = 15):
  """
    Creates frames from each video file present for each category.

    Args:
      video_path: File path to the video.
      n_frames: Number of frames to be created per video file.
      output_size: Pixel size of the output frame image.

    Return:
      An NumPy array of frames in the shape of (n_frames, height, width, channels).
  """
  # Read each video frame by frame
  result = []
  src = cv2.VideoCapture(str(video_path))

  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)

  need_length = 1 + (n_frames - 1) * frame_step

  if need_length > video_length:
    start = 0
  else:
    max_start = video_length - need_length
    start = random.randint(0, max_start + 1)

  src.set(cv2.CAP_PROP_POS_FRAMES, start)
  # ret is a boolean indicating whether read was successful, frame is the image itself
  ret, frame = src.read()
  result.append(format_frames(frame, output_size))

  for _ in range(n_frames - 1):
    for _ in range(frame_step):
      ret, frame = src.read()
    if ret:
      frame = format_frames(frame, output_size)
      result.append(frame)
    else:
      result.append(np.zeros_like(result[0]))
  src.release()
  result = np.array(result)[..., [2, 1, 0]]

  return result

## 1.5 Create Dataset Generator

In [None]:
class FrameGenerator:
  def __init__(self, path, n_frames, training = False):
    """ Returns a set of frames with their associated label.

      Args:
        path: Video file paths.
        n_frames: Number of frames.
        training: Boolean to determine if training dataset is being created.
    """
    self.path = path
    self.n_frames = n_frames
    self.training = training
    self.class_names = sorted(set(p.name for p in self.path.iterdir() if p.is_dir()))
    self.class_ids_for_name = dict((name, idx) for idx, name in enumerate(self.class_names))

  def get_files_and_class_names(self):
    video_paths = list(self.path.glob('*/*.mp4'))
    classes = [p.parent.name for p in video_paths]
    return video_paths, classes

  def __call__(self):
    video_paths, classes = self.get_files_and_class_names()

    pairs = list(zip(video_paths, classes))

    if self.training:
      random.shuffle(pairs)

    for path, name in pairs:
      video_frames = frames_from_video_file(path, self.n_frames)
      label = self.class_ids_for_name[name] # Encode labels
      yield video_frames, label

In [None]:
# Create the training set
output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
                    tf.TensorSpec(shape = (), dtype = tf.int16))
test_ds =  tf.data.Dataset.from_generator(FrameGenerator(video_path_for_generator, 10, training=False),
                                          output_signature = output_signature)


In [None]:
fg = FrameGenerator(video_path_for_generator, 10, training=True)

In [None]:
test_ds = test_ds.batch(2)

test_frames, test_labels = next(iter(test_ds))

print(f'Shape of validation set of frames: {test_frames.shape}')
print(f'Shape of validation labels: {test_labels.shape}')

Shape of validation set of frames: (2, 10, 224, 224, 3)
Shape of validation labels: (2,)


# 2 Modelltraining

## 2.1 Training setup

In [None]:
# pip install -q git+https://github.com/tensorflow/docs

In [None]:
class_num = len(fg.class_names)

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed

In [None]:
def preprocess_3d_data(data):
    # Randomly crop the video.
    crop_size = (10, 224, 224, 3)
    data_shape = tf.shape(data)
    data = tf.image.random_crop(data, size=(data_shape[0], crop_size[0], crop_size[1], crop_size[2], crop_size[3]))

    # Randomly flip the video horizontally.
    data = tf.map_fn(lambda x: tf.image.random_flip_left_right(x), data)

    # Add more data augmentation techniques here if needed.

    return data

# Create a random 3D tensor representing video data.
video_data = tf.random.normal((10, 240, 240, 3))

# # Apply the preprocessing function to the video data.
# processed_data = preprocess_3d_data(video_data)

In [None]:
from tensorflow.keras.layers import Layer

class DataAugmentationLayer(Layer):
    def __init__(self, **kwargs):
        super(DataAugmentationLayer, self).__init__(**kwargs)

    def call(self, inputs):
        return preprocess_3d_data(inputs)

In [None]:
from typing import Tuple
def create_preprocessing_layer(input_shape: Tuple = (10, 224, 224, 3), rescaling = True):
    video_input = tf.keras.Input(shape=input_shape)
    x = ZeroPadding3D(padding=((0, 0),(4,4),(4,4)))(video_input)
    x = DataAugmentationLayer()(x)
    if rescaling:
      x = tf.keras.layers.Rescaling(scale=255)(x)
    return video_input, x

In [None]:
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dropout, \
ZeroPadding3D, ZeroPadding2D, RandomRotation, RandomCrop,RandomFlip, RandomZoom, BatchNormalization

In [None]:
def create_efficient_net_model(base_model_trainable: bool = True, rescaling: bool = True):

  net = tf.keras.applications.EfficientNetB0(include_top = False)
  net.trainable = base_model_trainable
  # Example usage:
  input_shape = (10, 224, 224, 3)
  video_input, x = create_preprocessing_layer(input_shape, rescaling = rescaling)

  x = tf.keras.layers.TimeDistributed(net)(x)
  x = tf.keras.layers.Dense(100)(x)
  x = tf.keras.layers.GlobalAveragePooling3D()(x)
  return tf.keras.Model(inputs=video_input, outputs=x)

model = create_efficient_net_model(base_model_trainable = True, rescaling = True)

# 3 Detection and Recognition with Pre-Trained model


## 3.1 Top 100 classes

In [None]:
CLASS_NAMES = ['accident', 'africa', 'all', 'apple', 'basketball', 'bed', 'before', 'bird', 'birthday', 'black', 'blue', 'book', 'bowling', 'brown', 'but', 'can', 'candy', 'chair', 'change', 'cheat', 'city', 'clothes', 'color', 'computer', 'cook', 'cool', 'corn', 'cousin', 'cow', 'dance', 'dark', 'deaf', 'decide', 'doctor', 'dog', 'drink', 'eat', 'enjoy', 'family', 'fine', 'finish', 'fish', 'forget', 'full', 'give', 'go', 'graduate', 'hat', 'hearing', 'help', 'hot', 'how', 'jacket', 'kiss', 'language', 'last', 'later', 'letter', 'like', 'man', 'many', 'medicine', 'meet', 'mother', 'need', 'no', 'now', 'orange', 'paint', 'paper', 'pink', 'pizza', 'play', 'pull', 'purple', 'right', 'same', 'school', 'secretary', 'shirt', 'short', 'son', 'study', 'table', 'tall', 'tell', 'thanksgiving', 'thin', 'thursday', 'time', 'walk', 'want', 'what', 'white', 'who', 'woman', 'work', 'wrong', 'year', 'yes']

## 3.2 Detection Functions


In [None]:
! pip install moviepy
! pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (33.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.8/33.8 MB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.4.6-py3-none-any.whl (31 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.1 sounddevice-0.4.6


In [None]:
# import detection librarys
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
from moviepy.editor import VideoFileClip
from moviepy.video.fx.all import crop

mp_face_mesh = mp.solutions.face_mesh

In [None]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results


def draw_landmarks(image, results):
    mp_holistic = mp.solutions.holistic  # Holistic model
    mp_drawing = mp.solutions.drawing_utils  # Drawing utilities

    # Draw left hand connections
    image_new = mp_drawing.draw_landmarks(
        image,
        landmark_list=results.left_hand_landmarks,
        connections=mp_holistic.HAND_CONNECTIONS,
        landmark_drawing_spec=mp_drawing.DrawingSpec(
            color=(232, 254, 255), thickness=1, circle_radius=4
        ),
        connection_drawing_spec=mp_drawing.DrawingSpec(
            color=(255, 249, 161), thickness=2, circle_radius=2
        ),
    )

    image = image_new if image_new is not None else image
    # Draw right hand connections
    image_new = mp_drawing.draw_landmarks(
        image,
        landmark_list=results.right_hand_landmarks,
        connections=mp_holistic.HAND_CONNECTIONS,
        landmark_drawing_spec=mp_drawing.DrawingSpec(
            color=(232, 254, 255), thickness=1, circle_radius=4
        ),
        connection_drawing_spec=mp_drawing.DrawingSpec(
            color=(255, 249, 161), thickness=2, circle_radius=2
        ),
    )
    image = image_new if image_new is not None else image

    return image


def cut_videos(start_frame, end_frame, video_path, counter, min_x, max_x, min_y, max_y ):
    cap_temp = cv2.VideoCapture(video_path)


    output_path = video_path.split(".")[0] + str(counter) + "." + video_path.split(".")[1]

    # Get the frames per second (fps) and frame count of the video
    fps = cap_temp.get(cv2.CAP_PROP_FPS)
    frame_count = cap_temp.get(cv2.CAP_PROP_FRAME_COUNT)

    # Set the start and end frame numbers
    start_frame_num = start_frame
    end_frame_num = end_frame

    # Check if the specified frames are within the video's range
    if start_frame_num > frame_count or end_frame_num > frame_count:
        print("Invalid frame range.")
        cap_temp.release()
        exit()

    # Set the start frame position
    cap_temp.set(cv2.CAP_PROP_POS_FRAMES, start_frame_num)

    # Create a VideoWriter object to write the extracted frames
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    output = cv2.VideoWriter(output_path, fourcc, fps, (int(cap_temp.get(3)), int(cap_temp.get(4))))

    # Read and write the frames within the specified range
    current_frame = start_frame_num

    while current_frame <= end_frame_num:
        ret, frame = cap_temp.read()
        if not ret:
            break

        height, width = frame.shape[:2]
        # print(height, width)
        # cropped_frame = frame[int(min_x*width):int(max_x*width), int(min_y*height):int(max_y*height)]

        # print(cropped_frame.shape)

        output.write(frame)
        current_frame += 1

    cap_temp.release()

    return {output_path: [int(min_x*width), int(min_y*height), int(max_x*width), int(max_y*height)]}
    # cap.release()
    # clip = VideoFileClip(output_path)
    # new_clip = crop(clip, x1=int(min_x*width), y1=int(min_y*height), x2=int(max_x*width), y2=int(max_y*height))
    # target_path_cropped = output_path.split(".")[0] + "cropped." + output_path.split(".")[1]
    # print(new_clip.write_videofile(target_path_cropped, codec='mpeg4', audio=False))


def filter_min_max(lst):
    x_vals = [i.x for i in lst]
    y_vals = [i.y for i in lst]
    min_x = min(x_vals)
    max_x = max(x_vals)
    min_y = min(y_vals)
    max_y = max(y_vals)
    return [min_x, max_x, min_y, max_y]


def get_min_max_of_face(frame):
    with mp_face_mesh.FaceMesh(static_image_mode=True,
                           max_num_faces=1,
                        #    refine_landmarks=True,
                           min_detection_confidence=0.5) as face_mesh:

        results = face_mesh.process(frame)

    if bool(results.multi_face_landmarks):
        face_landmarks = results.multi_face_landmarks[0]
        face_coordinates = [face_landmark for face_landmark in face_landmarks.landmark]
        return filter_min_max(face_coordinates)
    else:
        return [None, None, None, None]


def create_empty_dataframe():
    return pd.DataFrame(columns=["left_hand_x_min", "left_hand_x_max", "left_hand_y_min", "left_hand_y_max",
                                                    "right_hand_x_min", "right_hand_x_max", "right_hand_y_min", "right_hand_y_max",
                                                    "face_x_min", "face_x_max", "face_y_min", "face_y_max"])


def crop_videos_from_dataframe(df: pd.DataFrame, cap):
    # for item in ["left_hand_x_min", "left_hand_x_max", "right_hand_x_min", "right_hand_x_max"]:
    #     df[item].fillna(df["face_x_min"].dropna().max())

    # for item in ["left_hand_y_max", "left_hand_y_min", "right_hand_y_max", "right_hand_y_min"]:
    #     df[item].fillna(df["face_y_min"].dropna().max())

    # df["left_hand_x_max"].fillna(df["face_x_max"].dropna().max())
    min_x = np.nanmin(np.array((df["left_hand_x_min"].dropna().min(),df["face_x_min"].dropna().min(), df["right_hand_x_min"].dropna().min())))
    min_y = np.nanmin(np.array((df["left_hand_y_min"].dropna().min(),df["face_y_min"].dropna().min(), df["right_hand_y_min"].dropna().min())))
    max_x = np.nanmax(np.array((df["left_hand_x_max"].dropna().max(),df["face_x_max"].dropna().max(), df["right_hand_x_max"].dropna().max())))
    max_y = np.nanmax(np.array((df["left_hand_y_max"].dropna().max(),df["face_y_max"].dropna().max(), df["right_hand_y_max"].dropna().max())))
    # min_y = min(min(df["left_hand_y_min"]), min(df["face_y_min"]), min(df["right_hand_y_min"]))
    # max_x = max(max(df["left_hand_x_max"]), max(df["face_x_max"]), max(df["right_hand_x_max"]))
    # max_y = max(max(df["left_hand_y_max"]), max(df["face_y_max"]), max(df["right_hand_y_max"]))
    face_middle = (df["face_x_min"].dropna().mean() + df["face_x_max"].dropna().mean())/2
    distance = max(face_middle-min_x, max_x-face_middle)
    # crop_video(face_middle-distance, min_y, face_middle+distance, max_y)
    min_x = face_middle-distance-0.1 if face_middle-distance-0.1 > 0 else 0
    max_x = face_middle+distance+0.1  if face_middle+distance+0.1 < 1 else 1
    min_y = min_y-0.1 if min_y-0.1 > 0 else 0
    max_y = max_y+0.1 if max_y+0.1 < 1 else 1
    print(face_middle-distance, face_middle+distance, min_y, max_y)
    return face_middle-distance, face_middle+distance, min_y, max_y

In [None]:
# video_path = pathlib.Path(video_path)

cap = cv2.VideoCapture(video_file_path)

fps = cap.get(cv2.CAP_PROP_FPS)

threshold_counter_no_hand = fps // 2

video_times = []
crop_video = []
video_postions = create_empty_dataframe()
start_time_temp = None
counter_no_hand = None
# end_time_temp = None
frame_counter = 0

# Check if camera opened successfully
if (cap.isOpened()== False):
  print("Error opening video stream or file")

with mp.solutions.holistic.Holistic(
        min_detection_confidence=0.5, min_tracking_confidence=0.5
    ) as holistic:
  # Read until video is completed
  while(cap.isOpened()):
    # Capture frame-by-frame
    ret, frame = cap.read()
    if ret == True:
      frame_counter += 1

      # Make detections
      image, results = mediapipe_detection(frame, holistic)
      if results is not None:
        if results.left_hand_landmarks is not None:
          video_postions_temp = filter_min_max([hand_landmarks for hand_landmarks in results.left_hand_landmarks.landmark])
        else:
          video_postions_temp = [None, None, None, None]
        if results.right_hand_landmarks is not None:
          video_postions_temp.extend(filter_min_max([hand_landmarks for hand_landmarks in results.right_hand_landmarks.landmark]))
        else:
          video_postions_temp.extend([None, None, None, None])

        video_postions_temp.extend(get_min_max_of_face(frame))
        video_postions_temp_df = pd.DataFrame([video_postions_temp],
                                           columns=["left_hand_x_min", "left_hand_x_max", "left_hand_y_min", "left_hand_y_max",
                                                    "right_hand_x_min", "right_hand_x_max", "right_hand_y_min", "right_hand_y_max",
                                                    "face_x_min", "face_x_max", "face_y_min", "face_y_max"])
        video_postions = pd.concat([video_postions, video_postions_temp_df], ignore_index=True)

      if results.right_hand_landmarks is None and results.left_hand_landmarks is None:

        if counter_no_hand is not None:
          counter_no_hand += 1

          if counter_no_hand > threshold_counter_no_hand:
            # if end_time_temp is None:
            if (frame_counter - start_time_temp) > fps:
              video_times.append((start_time_temp, frame_counter))
              min_x, max_x, min_y, max_y = crop_videos_from_dataframe(video_postions, cap)
              video_postions = create_empty_dataframe()
              crop_video.append(cut_videos(start_time_temp, frame_counter, video_file_path, len(video_times), min_x, max_x, min_y, max_y))
              start_time_temp = None
              counter_no_hand = None

      else:
        if start_time_temp is None:
          start_time_temp = frame_counter
          counter_no_hand = 0

      if results is not None and image is not None:
        image = draw_landmarks(image, results)

      # # Display the resulting frame
      # cv2.imshow('Image', image)


      # # Press Q on keyboard to  exit
      # if cv2.waitKey(25) & 0xFF == ord('q'):
      #   break

    # Break the loop
    else:
      break

if start_time_temp is not None:
  if (frame_counter - start_time_temp) > fps:
    video_times.append((start_time_temp, frame_counter))
    min_x, max_x, min_y, max_y = crop_videos_from_dataframe(video_postions, cap)
    video_postions = create_empty_dataframe()
    crop_video.append(cut_videos(start_time_temp, frame_counter, video_file_path, len(video_times), min_x, max_x, min_y, max_y))

# When everything done, release the video capture object
cap.release()

for item in crop_video:
    for key, value in item.items():
        clip = VideoFileClip(key)
        new_clip = crop(clip, x1=value[0], y1=value[1], x2=value[2], y2=value[3])
        target_path_cropped = key.split(".")[0] + "cropped." + key.split(".")[1]
        print(new_clip.write_videofile(target_path_cropped, audio=False))   # , codec='mpeg4'

Error opening video stream or file


## 3.3. Load Recognition Model

- Model has to be stored under the root dir in drive
- this can be achieved by copying the model and set the dir to "Meine Ablagen"

In [None]:
model = create_efficient_net_model()

checkpoint_path = pathlib.Path("/content/drive/MyDrive/checkpoints_EfficientNetB0_15epochs")

model.compile(optimizer = 'adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
              metrics=['accuracy'])

model.fit(test_ds.take(1),
          epochs = 1)



<keras.src.callbacks.History at 0x7f46d4acd600>

In [None]:
# Loads the weights
model.load_weights("/content/drive/MyDrive/checkpoints_EfficientNetB0_2_epochs/my_checkpoint")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f46e8d21930>

In [None]:
def to_gif(images):
  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)
  imageio.mimsave('./animation.gif', converted_images, fps=10)
  return embed.embed_file('./animation.gif')

In [None]:
from IPython.display import display
from IPython.display import Video

# Create an instance of FrameGenerator for the chosen dataset
chosen_fg = FrameGenerator(pathlib.Path(video_file_path), 10)

# Select random video and true label
chosen_frames, true_label = random.choice(list(chosen_fg()))

# model prediction
chosen_frames_expanded = np.expand_dims(chosen_frames, axis=0)
predicted_label = np.argmax(model.predict(chosen_frames_expanded), axis=-1)

# Get the true label's class name
true_class_name = chosen_fg.class_names[true_label]

#get the predicted labels class name
# CLASS_NAMES = fg.class_names
predicted_class_name = "test" # CLASS_NAMES[predicted_label[0]]

# print(f"True label: {true_label} ({true_class_name})")
print(f"Predicted label: {predicted_label[0]} ({predicted_class_name})")

# # Display the video
# random_video_path = None
for path, name in zip(*chosen_fg.get_files_and_class_names()):
    if chosen_fg.class_ids_for_name[name] == true_label:
        random_video_path = str(path)
        break

# if random_video_path:
#     display(Video(to_gif(sample_video), embed=True))
# else:
#     print("Error: video not found.")
sample_video = frames_from_video_file(pathlib.Path(random_video_path), n_frames = 10)
to_gif(sample_video)

FileNotFoundError: ignored

In [None]:
# display(Video("/content/africa_basketball_sleep.mp4", embed=True))