In [2]:
import tqdm
import random
import pathlib
import itertools
import collections

import os
import cv2
import numpy as np
import remotezip as rz

import tensorflow as tf

In [3]:
URL = 'https://storage.googleapis.com/thumos14_files/UCF101_videos.zip'

In [4]:
def list_files_from_zip_url(zip_url):

  files = []
  with rz.RemoteZip(zip_url) as zip:
    for zip_info in zip.infolist():
      files.append(zip_info.filename)
  return files

In [5]:
files = list_files_from_zip_url(URL)
files = [f for f in files if f.endswith('.avi')]
files[:10]

['UCF101/v_ApplyEyeMakeup_g01_c01.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c02.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c03.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c04.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c05.avi',
 'UCF101/v_ApplyEyeMakeup_g01_c06.avi',
 'UCF101/v_ApplyEyeMakeup_g02_c01.avi',
 'UCF101/v_ApplyEyeMakeup_g02_c02.avi',
 'UCF101/v_ApplyEyeMakeup_g02_c03.avi',
 'UCF101/v_ApplyEyeMakeup_g02_c04.avi']

In [6]:
def get_class(fname):

  return fname.split('_')[-3]

In [7]:
def get_files_per_class(files):

  files_for_class = collections.defaultdict(list)
  for fname in files:
    class_name = get_class(fname)
    files_for_class[class_name].append(fname)
  return files_for_class

In [8]:
NUM_CLASSES = 101
FILES_PER_CLASS = 50

In [9]:
files_for_class = get_files_per_class(files)
classes = list(files_for_class.keys())

In [10]:
print('Num classes:', len(classes))
print('Num videos for class[0]:', len(files_for_class[classes[0]]))

Num classes: 101
Num videos for class[0]: 145


In [11]:
def select_subset_of_classes(files_for_class, classes, files_per_class):

  files_subset = dict()

  for class_name in classes:
    class_files = files_for_class[class_name]
    files_subset[class_name] = class_files[:files_per_class]

  return files_subset

In [12]:
files_subset = select_subset_of_classes(files_for_class, classes[:NUM_CLASSES], FILES_PER_CLASS)
list(files_subset.keys())

['ApplyEyeMakeup',
 'ApplyLipstick',
 'Archery',
 'BabyCrawling',
 'BalanceBeam',
 'BandMarching',
 'BaseballPitch',
 'BasketballDunk',
 'Basketball',
 'BenchPress',
 'Biking',
 'Billiards',
 'BlowDryHair',
 'BlowingCandles',
 'BodyWeightSquats',
 'Bowling',
 'BoxingPunchingBag',
 'BoxingSpeedBag',
 'BreastStroke',
 'BrushingTeeth',
 'CleanAndJerk',
 'CliffDiving',
 'CricketBowling',
 'CricketShot',
 'CuttingInKitchen',
 'Diving',
 'Drumming',
 'Fencing',
 'FieldHockeyPenalty',
 'FloorGymnastics',
 'FrisbeeCatch',
 'FrontCrawl',
 'GolfSwing',
 'Haircut',
 'Hammering',
 'HammerThrow',
 'HandstandPushups',
 'HandstandWalking',
 'HeadMassage',
 'HighJump',
 'HorseRace',
 'HorseRiding',
 'HulaHoop',
 'IceDancing',
 'JavelinThrow',
 'JugglingBalls',
 'JumpingJack',
 'JumpRope',
 'Kayaking',
 'Knitting',
 'LongJump',
 'Lunges',
 'MilitaryParade',
 'Mixing',
 'MoppingFloor',
 'Nunchucks',
 'ParallelBars',
 'PizzaTossing',
 'PlayingCello',
 'PlayingDaf',
 'PlayingDhol',
 'PlayingFlute',
 'Play

In [13]:
def download_from_zip(zip_url, to_dir, file_names):

  with rz.RemoteZip(zip_url) as zip:
    for fn in tqdm.tqdm(file_names):
      class_name = get_class(fn)
      zip.extract(fn, str(to_dir / class_name))
      unzipped_file = to_dir / class_name / fn

      fn = pathlib.Path(fn).parts[-1]
      output_file = to_dir / class_name / fn
      unzipped_file.rename(output_file)

In [14]:
def split_class_lists(files_for_class, count):

  split_files = []
  remainder = {}
  for cls in files_for_class:
    split_files.extend(files_for_class[cls][:count])
    remainder[cls] = files_for_class[cls][count:]
  return split_files, remainder

In [15]:
def download_ufc_101_subset(zip_url, num_classes, splits, download_dir):

  files = list_files_from_zip_url(zip_url)
  for f in files:
    path = os.path.normpath(f)
    tokens = path.split(os.sep)
    if len(tokens) <= 2:
      files.remove(f) # Remove that item from the list if it does not have a filename
  
  files_for_class = get_files_per_class(files)

  classes = list(files_for_class.keys())[:num_classes]

  for cls in classes:
    random.shuffle(files_for_class[cls])
    
  # Only use the number of classes you want in the dictionary
  files_for_class = {x: files_for_class[x] for x in classes}

  dirs = {}
  for split_name, split_count in splits.items():
    print(split_name, ":")
    split_dir = download_dir / split_name
    split_files, files_for_class = split_class_lists(files_for_class, split_count)
    download_from_zip(zip_url, split_dir, split_files)
    dirs[split_name] = split_dir

  return dirs

In [18]:
#로컬 실행 시 이미 subset을 생성한 경우 하단 코드 말고 해당 코드 실행
donwload_path = pathlib.Path('./UCF101_subset/')
download_dir = pathlib.Path('./UCF101_subset/')
video_count_train = len(list(donwload_path.glob('train/*/*.avi')))
video_count_val = len(list(donwload_path.glob('val/*/*.avi')))
video_count_test = len(list(donwload_path.glob('test/*/*.avi')))
from pathlib import Path

subset_paths = {
    'train': Path('UCF101_subset/train'),
    'val': Path('UCF101_subset/val'),
    'test': Path('UCF101_subset/test')
}


video_total = video_count_train + video_count_val + video_count_test
print(f"Total videos: {video_total}")

Total videos: 5050


In [18]:
download_dir = pathlib.Path('./UCF101_subset/')
subset_paths = download_ufc_101_subset(URL,
                                       num_classes = NUM_CLASSES,
                                       splits = {"train": 30, "val": 10, "test": 10},
                                       download_dir = download_dir)

train :



  0%|          | 0/300 [00:00<?, ?it/s]


  0%|          | 1/300 [00:00<00:58,  5.14it/s]


  1%|          | 2/300 [00:00<00:51,  5.82it/s]


  1%|          | 3/300 [00:00<00:59,  5.03it/s]


  1%|▏         | 4/300 [00:00<01:06,  4.44it/s]


  2%|▏         | 6/300 [00:01<00:52,  5.64it/s]


  3%|▎         | 8/300 [00:01<00:40,  7.18it/s]


  3%|▎         | 9/300 [00:01<00:38,  7.47it/s]


  3%|▎         | 10/300 [00:01<00:49,  5.90it/s]


  4%|▎         | 11/300 [00:01<00:48,  5.92it/s]


  4%|▍         | 12/300 [00:02<00:47,  6.10it/s]


  4%|▍         | 13/300 [00:02<00:42,  6.75it/s]


  5%|▍         | 14/300 [00:02<00:50,  5.70it/s]


  5%|▌         | 16/300 [00:02<00:42,  6.64it/s]


  6%|▌         | 18/300 [00:02<00:33,  8.41it/s]


  7%|▋         | 20/300 [00:02<00:28,  9.77it/s]


  7%|▋         | 22/300 [00:03<00:25, 10.79it/s]


  8%|▊         | 24/300 [00:03<00:27, 10.18it/s]


  9%|▊         | 26/300 [00:03<00:29,  9.31it/s]


  9%|▉         | 28/300 [00:03<00:26, 10.39it/s]


 10%|█         | 30/300 [00:03<00:24, 10.97it/s]


 11%|█         | 32/300 [00:04<00:26, 10.16it/s]


 11%|█▏        | 34/300 [00:04<00:25, 10.25it/s]


 12%|█▏        | 36/300 [00:04<00:33,  7.86it/s]


 13%|█▎        | 38/300 [00:04<00:30,  8.69it/s]


 13%|█▎        | 40/300 [00:04<00:26,  9.83it/s]


 14%|█▍        | 42/300 [00:05<00:23, 11.05it/s]


 15%|█▍        | 44/300 [00:05<00:23, 11.10it/s]


 15%|█▌        | 46/300 [00:05<00:23, 10.79it/s]


 16%|█▌        | 48/300 [00:05<00:24, 10.27it/s]


 17%|█▋        | 50/300 [00:05<00:23, 10.49it/s]


 17%|█▋        | 52/300 [00:06<00:30,  8.07it/s]


 18%|█▊        | 54/300 [00:06<00:26,  9.27it/s]


 19%|█▊        | 56/300 [00:06<00:23, 10.29it/s]


 19%|█▉        | 58/300 [00:06<00:23, 10.43it/s]


 20%|██        | 60/300 [00:06<00:22, 10.85it/s]


 21%|██        | 62/300 [00:07<00:26,  9.02it/s]


 21%|██▏       | 64/300 [00:07<00:34,  6.83it/s]


 22%|██▏       | 66/300 [00:07<00:32,  7.29it/s]


 22%|██▏       | 67/300 [00:08<00:39,  5.95it/s]


 23%|██▎       | 69/300 [00:08<00:31,  7.32it/s]


 23%|██▎       | 70/300 [00:08<00:34,  6.75it/s]


 24%|██▎       | 71/300 [00:08<00:32,  7.10it/s]


 24%|██▍       | 72/300 [00:08<00:34,  6.70it/s]


 25%|██▍       | 74/300 [00:08<00:27,  8.29it/s]


 25%|██▌       | 76/300 [00:09<00:24,  9.25it/s]


 26%|██▌       | 78/300 [00:09<00:21, 10.38it/s]


 27%|██▋       | 80/300 [00:09<00:19, 11.18it/s]


 27%|██▋       | 82/300 [00:09<00:26,  8.19it/s]


 28%|██▊       | 84/300 [00:09<00:23,  9.12it/s]


 29%|██▊       | 86/300 [00:10<00:23,  9.28it/s]


 29%|██▉       | 88/300 [00:10<00:20, 10.10it/s]


 30%|███       | 90/300 [00:10<00:24,  8.43it/s]


 30%|███       | 91/300 [00:10<00:25,  8.32it/s]


 31%|███       | 92/300 [00:11<00:29,  7.03it/s]


 31%|███       | 93/300 [00:11<00:31,  6.53it/s]


 31%|███▏      | 94/300 [00:11<00:31,  6.61it/s]


 32%|███▏      | 95/300 [00:11<00:30,  6.81it/s]


 32%|███▏      | 97/300 [00:11<00:25,  7.90it/s]


 33%|███▎      | 99/300 [00:11<00:21,  9.17it/s]


 34%|███▎      | 101/300 [00:12<00:24,  8.21it/s]


 34%|███▍      | 103/300 [00:12<00:19,  9.96it/s]


 35%|███▌      | 105/300 [00:12<00:17, 11.29it/s]


 36%|███▌      | 107/300 [00:12<00:17, 11.09it/s]


 36%|███▋      | 109/300 [00:12<00:18, 10.19it/s]


 37%|███▋      | 111/300 [00:12<00:16, 11.19it/s]


 38%|███▊      | 113/300 [00:13<00:16, 11.40it/s]


 38%|███▊      | 115/300 [00:13<00:15, 12.07it/s]


 39%|███▉      | 117/300 [00:13<00:23,  7.80it/s]


 40%|███▉      | 119/300 [00:13<00:20,  8.70it/s]


 40%|████      | 121/300 [00:14<00:22,  7.90it/s]


 41%|████      | 122/300 [00:14<00:24,  7.29it/s]


 41%|████      | 123/300 [00:14<00:25,  7.06it/s]


 41%|████▏     | 124/300 [00:14<00:32,  5.38it/s]


 42%|████▏     | 125/300 [00:15<00:36,  4.76it/s]


 42%|████▏     | 127/300 [00:15<00:27,  6.39it/s]


 43%|████▎     | 129/300 [00:15<00:20,  8.22it/s]


 44%|████▎     | 131/300 [00:15<00:19,  8.69it/s]


 44%|████▍     | 133/300 [00:15<00:17,  9.36it/s]


 45%|████▌     | 135/300 [00:15<00:15, 10.34it/s]


 46%|████▌     | 137/300 [00:16<00:14, 11.23it/s]


 46%|████▋     | 139/300 [00:16<00:15, 10.67it/s]


 47%|████▋     | 141/300 [00:16<00:13, 11.70it/s]


 48%|████▊     | 143/300 [00:16<00:14, 10.69it/s]


 48%|████▊     | 145/300 [00:16<00:15, 10.22it/s]


 49%|████▉     | 147/300 [00:17<00:15,  9.76it/s]


 50%|████▉     | 149/300 [00:17<00:14, 10.72it/s]


 50%|█████     | 151/300 [00:17<00:16,  9.23it/s]


 51%|█████     | 152/300 [00:17<00:18,  8.16it/s]


 51%|█████▏    | 154/300 [00:18<00:18,  7.74it/s]


 52%|█████▏    | 155/300 [00:18<00:19,  7.56it/s]


 52%|█████▏    | 156/300 [00:18<00:25,  5.60it/s]


 52%|█████▏    | 157/300 [00:18<00:25,  5.55it/s]


 53%|█████▎    | 158/300 [00:18<00:26,  5.41it/s]


 53%|█████▎    | 159/300 [00:19<00:29,  4.73it/s]


 53%|█████▎    | 160/300 [00:19<00:25,  5.41it/s]


 54%|█████▎    | 161/300 [00:19<00:24,  5.65it/s]


 54%|█████▍    | 163/300 [00:19<00:17,  7.65it/s]


 55%|█████▌    | 165/300 [00:19<00:16,  8.26it/s]


 55%|█████▌    | 166/300 [00:19<00:16,  8.25it/s]


 56%|█████▌    | 167/300 [00:20<00:20,  6.53it/s]


 56%|█████▌    | 168/300 [00:20<00:27,  4.76it/s]


 57%|█████▋    | 170/300 [00:20<00:21,  5.99it/s]


 57%|█████▋    | 172/300 [00:20<00:17,  7.40it/s]


 58%|█████▊    | 173/300 [00:21<00:16,  7.67it/s]


 58%|█████▊    | 175/300 [00:21<00:14,  8.65it/s]


 59%|█████▉    | 177/300 [00:21<00:13,  8.94it/s]


 59%|█████▉    | 178/300 [00:21<00:13,  8.75it/s]


 60%|██████    | 180/300 [00:21<00:12,  9.78it/s]


 60%|██████    | 181/300 [00:22<00:16,  7.37it/s]


 61%|██████    | 183/300 [00:22<00:13,  8.80it/s]


 61%|██████▏   | 184/300 [00:22<00:18,  6.37it/s]


 62%|██████▏   | 185/300 [00:22<00:18,  6.26it/s]


 62%|██████▏   | 187/300 [00:22<00:14,  7.93it/s]


 63%|██████▎   | 188/300 [00:22<00:14,  7.96it/s]


 63%|██████▎   | 189/300 [00:23<00:13,  7.99it/s]


 64%|██████▎   | 191/300 [00:23<00:11,  9.80it/s]


 64%|██████▍   | 193/300 [00:23<00:10, 10.00it/s]


 65%|██████▌   | 195/300 [00:23<00:10, 10.32it/s]


 66%|██████▌   | 197/300 [00:23<00:09, 10.40it/s]


 66%|██████▋   | 199/300 [00:23<00:08, 11.73it/s]


 67%|██████▋   | 201/300 [00:24<00:08, 11.02it/s]


 68%|██████▊   | 203/300 [00:24<00:09,  9.78it/s]


 68%|██████▊   | 205/300 [00:24<00:09, 10.31it/s]


 69%|██████▉   | 207/300 [00:24<00:07, 11.70it/s]


 70%|██████▉   | 209/300 [00:24<00:07, 12.56it/s]


 70%|███████   | 211/300 [00:24<00:07, 11.97it/s]


 71%|███████   | 213/300 [00:25<00:08, 10.03it/s]


 72%|███████▏  | 215/300 [00:25<00:08,  9.93it/s]


 72%|███████▏  | 217/300 [00:25<00:09,  9.19it/s]


 73%|███████▎  | 219/300 [00:25<00:08,  9.45it/s]


 74%|███████▎  | 221/300 [00:26<00:08,  9.16it/s]


 74%|███████▍  | 222/300 [00:26<00:09,  8.55it/s]


 74%|███████▍  | 223/300 [00:26<00:09,  8.41it/s]


 75%|███████▌  | 225/300 [00:26<00:07, 10.09it/s]


 76%|███████▌  | 227/300 [00:26<00:06, 11.50it/s]


 76%|███████▋  | 229/300 [00:26<00:05, 11.92it/s]


 77%|███████▋  | 231/300 [00:26<00:05, 12.47it/s]


 78%|███████▊  | 233/300 [00:27<00:05, 11.90it/s]


 78%|███████▊  | 235/300 [00:27<00:08,  7.70it/s]


 79%|███████▉  | 237/300 [00:27<00:06,  9.28it/s]


 80%|███████▉  | 239/300 [00:27<00:05, 10.19it/s]


 80%|████████  | 241/300 [00:28<00:06,  9.47it/s]


 81%|████████  | 243/300 [00:28<00:05, 10.58it/s]


 82%|████████▏ | 245/300 [00:28<00:05, 10.94it/s]


 82%|████████▏ | 247/300 [00:28<00:04, 12.01it/s]


 83%|████████▎ | 249/300 [00:28<00:04, 12.63it/s]


 84%|████████▎ | 251/300 [00:28<00:04, 11.82it/s]


 84%|████████▍ | 253/300 [00:29<00:03, 12.32it/s]


 85%|████████▌ | 255/300 [00:29<00:03, 12.76it/s]


 86%|████████▌ | 257/300 [00:29<00:03, 13.03it/s]


 86%|████████▋ | 259/300 [00:29<00:02, 13.80it/s]


 87%|████████▋ | 261/300 [00:29<00:02, 14.39it/s]


 88%|████████▊ | 263/300 [00:29<00:02, 14.09it/s]


 88%|████████▊ | 265/300 [00:29<00:02, 13.53it/s]


 89%|████████▉ | 267/300 [00:30<00:02, 13.49it/s]


 90%|████████▉ | 269/300 [00:30<00:02, 14.15it/s]


 90%|█████████ | 271/300 [00:30<00:02, 13.98it/s]


 91%|█████████ | 273/300 [00:30<00:02, 10.01it/s]


 92%|█████████▏| 275/300 [00:30<00:02, 10.15it/s]


 92%|█████████▏| 277/300 [00:31<00:02,  9.25it/s]


 93%|█████████▎| 279/300 [00:31<00:01, 10.67it/s]


 94%|█████████▎| 281/300 [00:31<00:01, 10.39it/s]


 94%|█████████▍| 283/300 [00:31<00:01, 10.52it/s]


 95%|█████████▌| 285/300 [00:31<00:01, 11.80it/s]


 96%|█████████▌| 287/300 [00:31<00:01, 12.02it/s]


 96%|█████████▋| 289/300 [00:32<00:00, 12.99it/s]


 97%|█████████▋| 291/300 [00:32<00:00, 13.65it/s]


 98%|█████████▊| 293/300 [00:32<00:00, 13.17it/s]


 98%|█████████▊| 295/300 [00:32<00:00, 14.01it/s]


 99%|█████████▉| 297/300 [00:32<00:00, 14.88it/s]


100%|█████████▉| 299/300 [00:32<00:00, 14.43it/s]


100%|██████████| 300/300 [00:32<00:00,  9.16it/s]




val :



  0%|          | 0/100 [00:00<?, ?it/s]


  1%|          | 1/100 [00:00<00:26,  3.68it/s]


  2%|▏         | 2/100 [00:00<00:19,  5.00it/s]


  3%|▎         | 3/100 [00:00<00:16,  5.97it/s]


  5%|▌         | 5/100 [00:00<00:12,  7.53it/s]


  7%|▋         | 7/100 [00:00<00:09, 10.03it/s]


  9%|▉         | 9/100 [00:00<00:07, 12.10it/s]


 11%|█         | 11/100 [00:01<00:08, 10.35it/s]


 13%|█▎        | 13/100 [00:01<00:10,  8.60it/s]


 15%|█▌        | 15/100 [00:01<00:09,  8.70it/s]


 17%|█▋        | 17/100 [00:01<00:08,  9.76it/s]


 19%|█▉        | 19/100 [00:02<00:07, 10.85it/s]


 21%|██        | 21/100 [00:02<00:07, 10.46it/s]


 23%|██▎       | 23/100 [00:02<00:08,  9.17it/s]


 24%|██▍       | 24/100 [00:02<00:08,  8.64it/s]


 25%|██▌       | 25/100 [00:02<00:09,  7.55it/s]


 27%|██▋       | 27/100 [00:03<00:08,  8.84it/s]


 28%|██▊       | 28/100 [00:03<00:08,  8.38it/s]


 30%|███       | 30/100 [00:03<00:07,  9.85it/s]


 32%|███▏      | 32/100 [00:03<00:07,  8.70it/s]


 34%|███▍      | 34/100 [00:03<00:07,  8.68it/s]


 35%|███▌      | 35/100 [00:04<00:08,  7.83it/s]


 37%|███▋      | 37/100 [00:04<00:07,  8.34it/s]


 38%|███▊      | 38/100 [00:04<00:08,  7.67it/s]


 41%|████      | 41/100 [00:04<00:06,  9.79it/s]


 42%|████▏     | 42/100 [00:04<00:06,  9.20it/s]


 44%|████▍     | 44/100 [00:04<00:06,  9.06it/s]


 45%|████▌     | 45/100 [00:05<00:06,  8.80it/s]


 47%|████▋     | 47/100 [00:05<00:05, 10.40it/s]


 49%|████▉     | 49/100 [00:05<00:04, 11.86it/s]


 51%|█████     | 51/100 [00:05<00:04, 11.11it/s]


 53%|█████▎    | 53/100 [00:05<00:04,  9.41it/s]


 55%|█████▌    | 55/100 [00:06<00:05,  8.87it/s]


 57%|█████▋    | 57/100 [00:06<00:04,  9.54it/s]


 59%|█████▉    | 59/100 [00:06<00:04,  8.47it/s]


 60%|██████    | 60/100 [00:06<00:05,  7.17it/s]


 61%|██████    | 61/100 [00:06<00:05,  7.31it/s]


 62%|██████▏   | 62/100 [00:07<00:05,  7.11it/s]


 63%|██████▎   | 63/100 [00:07<00:05,  7.12it/s]


 65%|██████▌   | 65/100 [00:07<00:03,  9.27it/s]


 67%|██████▋   | 67/100 [00:07<00:03, 10.84it/s]


 69%|██████▉   | 69/100 [00:07<00:02, 11.53it/s]


 71%|███████   | 71/100 [00:07<00:02, 10.95it/s]


 73%|███████▎  | 73/100 [00:08<00:02, 10.14it/s]


 75%|███████▌  | 75/100 [00:08<00:02, 10.28it/s]


 77%|███████▋  | 77/100 [00:08<00:02,  9.69it/s]


 79%|███████▉  | 79/100 [00:08<00:01, 11.04it/s]


 81%|████████  | 81/100 [00:08<00:01, 10.60it/s]


 83%|████████▎ | 83/100 [00:09<00:01,  8.89it/s]


 85%|████████▌ | 85/100 [00:09<00:01,  8.87it/s]


 87%|████████▋ | 87/100 [00:09<00:01, 10.34it/s]


 89%|████████▉ | 89/100 [00:09<00:00, 11.74it/s]


 91%|█████████ | 91/100 [00:09<00:00, 10.75it/s]


 93%|█████████▎| 93/100 [00:10<00:00,  8.99it/s]


 95%|█████████▌| 95/100 [00:10<00:00, 10.58it/s]


 97%|█████████▋| 97/100 [00:10<00:00, 10.48it/s]


 99%|█████████▉| 99/100 [00:10<00:00,  9.80it/s]


100%|██████████| 100/100 [00:10<00:00,  9.28it/s]




test :



  0%|          | 0/100 [00:00<?, ?it/s]


  2%|▏         | 2/100 [00:00<00:06, 14.56it/s]


  4%|▍         | 4/100 [00:00<00:06, 13.72it/s]


  6%|▌         | 6/100 [00:00<00:07, 12.61it/s]


  8%|▊         | 8/100 [00:00<00:06, 14.45it/s]


 10%|█         | 10/100 [00:00<00:05, 15.40it/s]


 12%|█▏        | 12/100 [00:00<00:06, 14.62it/s]


 14%|█▍        | 14/100 [00:00<00:05, 14.79it/s]


 16%|█▌        | 16/100 [00:01<00:05, 15.20it/s]


 18%|█▊        | 18/100 [00:01<00:06, 12.69it/s]


 20%|██        | 20/100 [00:01<00:05, 13.41it/s]


 22%|██▏       | 22/100 [00:01<00:05, 13.27it/s]


 24%|██▍       | 24/100 [00:01<00:06, 12.57it/s]


 26%|██▌       | 26/100 [00:01<00:06, 12.01it/s]


 28%|██▊       | 28/100 [00:02<00:05, 13.19it/s]


 30%|███       | 30/100 [00:02<00:05, 13.78it/s]


 32%|███▏      | 32/100 [00:02<00:05, 13.00it/s]


 34%|███▍      | 34/100 [00:02<00:04, 13.31it/s]


 36%|███▌      | 36/100 [00:02<00:04, 13.40it/s]


 38%|███▊      | 38/100 [00:02<00:04, 12.76it/s]


 40%|████      | 40/100 [00:02<00:04, 13.47it/s]


 42%|████▏     | 42/100 [00:03<00:04, 13.69it/s]


 44%|████▍     | 44/100 [00:03<00:03, 14.20it/s]


 46%|████▌     | 46/100 [00:03<00:03, 14.21it/s]


 48%|████▊     | 48/100 [00:03<00:03, 13.98it/s]


 50%|█████     | 50/100 [00:03<00:03, 14.61it/s]


 52%|█████▏    | 52/100 [00:03<00:03, 12.75it/s]


 54%|█████▍    | 54/100 [00:04<00:04, 10.02it/s]


 56%|█████▌    | 56/100 [00:04<00:04, 10.31it/s]


 58%|█████▊    | 58/100 [00:04<00:03, 11.38it/s]


 60%|██████    | 60/100 [00:04<00:04,  9.10it/s]


 62%|██████▏   | 62/100 [00:04<00:03, 10.59it/s]


 64%|██████▍   | 64/100 [00:05<00:03, 11.32it/s]


 66%|██████▌   | 66/100 [00:05<00:03, 11.01it/s]


 68%|██████▊   | 68/100 [00:05<00:02, 11.34it/s]


 70%|███████   | 70/100 [00:05<00:02, 12.22it/s]


 72%|███████▏  | 72/100 [00:05<00:02, 12.78it/s]


 74%|███████▍  | 74/100 [00:05<00:02, 12.44it/s]


 76%|███████▌  | 76/100 [00:05<00:01, 13.46it/s]


 78%|███████▊  | 78/100 [00:06<00:01, 12.85it/s]


 80%|████████  | 80/100 [00:06<00:01, 13.87it/s]


 82%|████████▏ | 82/100 [00:06<00:01, 14.51it/s]


 84%|████████▍ | 84/100 [00:06<00:01, 13.89it/s]


 86%|████████▌ | 86/100 [00:06<00:00, 14.67it/s]


 88%|████████▊ | 88/100 [00:06<00:00, 14.88it/s]


 90%|█████████ | 90/100 [00:06<00:00, 15.02it/s]


 92%|█████████▏| 92/100 [00:07<00:00, 13.83it/s]


 94%|█████████▍| 94/100 [00:07<00:00, 13.82it/s]


 96%|█████████▌| 96/100 [00:07<00:00, 13.33it/s]


 98%|█████████▊| 98/100 [00:07<00:00, 12.62it/s]


100%|██████████| 100/100 [00:07<00:00, 12.55it/s]


100%|██████████| 100/100 [00:07<00:00, 12.92it/s]




In [19]:
video_count_train = len(list(download_dir.glob('train/*/*.avi')))
video_count_val = len(list(download_dir.glob('val/*/*.avi')))
video_count_test = len(list(download_dir.glob('test/*/*.avi')))
video_total = video_count_train + video_count_val + video_count_test
print(f"Total videos: {video_total}")

Total videos: 5050


In [20]:
def format_frames(frame, output_size):

  frame = tf.image.convert_image_dtype(frame, tf.float32)
  frame = tf.image.resize_with_pad(frame, *output_size)
  return frame

In [21]:
def frames_from_video_file(video_path, n_frames, output_size = (224,224), frame_step = 15):

  # Read each video frame by frame
  result = []
  src = cv2.VideoCapture(str(video_path))  

  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)

  need_length = 1 + (n_frames - 1) * frame_step

  if need_length > video_length:
    start = 0
  else:
    max_start = video_length - need_length
    start = random.randint(0, max_start + 1)

  src.set(cv2.CAP_PROP_POS_FRAMES, start)
  # ret is a boolean indicating whether read was successful, frame is the image itself
  ret, frame = src.read()
  result.append(format_frames(frame, output_size))

  for _ in range(n_frames - 1):
    for _ in range(frame_step):
      ret, frame = src.read()
    if ret:
      frame = format_frames(frame, output_size)
      result.append(frame)
    else:
      result.append(np.zeros_like(result[0]))
  src.release()
  result = np.array(result)[..., [2, 1, 0]]

  return result

In [22]:
class FrameGenerator:
  def __init__(self, path, n_frames):

    self.path = path
    self.n_frames = n_frames
    self.class_names = sorted(set(p.name for p in self.path.iterdir() if p.is_dir()))
    self.class_ids_for_name = dict((name, idx) for idx, name in enumerate(self.class_names))

  def get_files_and_class_names(self):
    video_paths = list(self.path.glob('*/*.avi'))
    classes = [p.parent.name for p in video_paths] 
    return video_paths, classes

  def __call__(self):
    video_paths, classes = self.get_files_and_class_names()

    pairs = list(zip(video_paths, classes))

    random.shuffle(pairs)

    for path, name in pairs:
      video_frames = frames_from_video_file(path, self.n_frames) 
      label = self.class_ids_for_name[name] # Encode labels
      yield video_frames, label

In [23]:
fg = FrameGenerator(subset_paths['train'], 10)

frames, label = next(fg())

print(f"Shape: {frames.shape}")
print(f"Label: {label}")

2024-03-04 08:51:47.967061: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2024-03-04 08:51:47.967082: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2024-03-04 08:51:47.967090: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2024-03-04 08:51:47.967265: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-03-04 08:51:47.967546: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Shape: (10, 224, 224, 3)
Label: 20


In [24]:
# Create the training set
output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
                    tf.TensorSpec(shape = (), dtype = tf.int16))
train_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['train'], 10),
                                          output_signature = output_signature)

In [25]:
# Create the validation set
val_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['val'], 10),
                                        output_signature = output_signature)

In [26]:
# Print the shapes of the data
train_frames, train_labels = next(iter(train_ds))
print(f'Shape of training set of frames: {train_frames.shape}')
print(f'Shape of training labels: {train_labels.shape}')

val_frames, val_labels = next(iter(val_ds))
print(f'Shape of validation set of frames: {val_frames.shape}')
print(f'Shape of validation labels: {val_labels.shape}')

Shape of training set of frames: (10, 224, 224, 3)
Shape of training labels: ()
Shape of validation set of frames: (10, 224, 224, 3)
Shape of validation labels: ()


In [27]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)

In [28]:
train_ds = train_ds.batch(2)
val_ds = val_ds.batch(2)

train_frames, train_labels = next(iter(train_ds))
print(f'Shape of training set of frames: {train_frames.shape}')
print(f'Shape of training labels: {train_labels.shape}')

val_frames, val_labels = next(iter(val_ds))
print(f'Shape of validation set of frames: {val_frames.shape}')
print(f'Shape of validation labels: {val_labels.shape}')

2024-03-04 08:52:19.529061: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 112 of 1000
2024-03-04 08:52:29.542893: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 221 of 1000
2024-03-04 08:52:49.501235: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 445 of 1000
2024-03-04 08:53:09.507420: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 665 of 1000
2024-03-04 08:53:29.473956: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 885 of 1000
2024-03-04 08:53:39.524814: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while)

Shape of training set of frames: (2, 10, 224, 224, 3)
Shape of training labels: (2,)


2024-03-04 08:53:50.958122: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:12: Filling up shuffle buffer (this may take a while): 107 of 1000
2024-03-04 08:54:10.867196: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:12: Filling up shuffle buffer (this may take a while): 325 of 1000
2024-03-04 08:54:20.906134: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:12: Filling up shuffle buffer (this may take a while): 435 of 1000
2024-03-04 08:54:40.924379: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:12: Filling up shuffle buffer (this may take a while): 654 of 1000
2024-03-04 08:54:50.964332: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:12: Filling up shuffle buffer (this may take a while): 764 of 1000
2024-03-04 08:55:10.862159: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:12: Filling up shuffle buffer (this may take a 

Shape of validation set of frames: (2, 10, 224, 224, 3)
Shape of validation labels: (2,)


In [29]:
net = tf.keras.applications.EfficientNetB0(include_top = False)
net.trainable = False

model = tf.keras.Sequential([
    tf.keras.layers.Rescaling(scale=255),
    tf.keras.layers.TimeDistributed(net),
    tf.keras.layers.Dense(101),
    tf.keras.layers.GlobalAveragePooling3D()
])

model.compile(optimizer = 'adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
              metrics=['accuracy'])

history = model.fit(train_ds, 
          epochs = 10,
          validation_data = val_ds,
          callbacks = tf.keras.callbacks.EarlyStopping(patience = 2, monitor = 'val_loss'))

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
Epoch 1/10


2024-03-04 08:55:32.403051: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
2024-03-04 08:55:42.708599: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 108 of 1000
2024-03-04 08:55:52.734134: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 215 of 1000
2024-03-04 08:56:12.729725: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 417 of 1000
2024-03-04 08:56:32.731944: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 633 of 1000
2024-03-04 08:56:52.691580: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:9: Filling up shuffle buffer (this may take a while): 851 of 1000


   1515/Unknown - 871s 508ms/step - loss: 2.1241 - accuracy: 0.5370

2024-03-04 09:09:59.052470: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 10416043903271811642
2024-03-04 09:09:59.052685: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 15390455012631567379
2024-03-04 09:09:59.052898: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 18384978038703466429
2024-03-04 09:10:11.040113: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:12: Filling up shuffle buffer (this may take a while): 105 of 1000
2024-03-04 09:10:21.040258: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:12: Filling up shuffle buffer (this may take a while): 211 of 1000
2024-03-04 09:10:40.997974: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:12: Filling up shuffle buffer (this may take a while): 423 of 1000
2024-03-04 09:10:51.018964: I tensorflow

Epoch 2/10


2024-03-04 09:12:04.956932: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 3789756474715271013


Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


In [31]:

# Create the validation set
test_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['test'], 10),
                                        output_signature = output_signature)
test_frames, test_labels = next(iter(test_ds))
print(f'Shape of validation set of frames: {test_frames.shape}')
print(f'Shape of validation labels: {test_frames.shape}')
test_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)
test_frames, test_labels = next(iter(test_ds))
test_loss, test_accuracy = model.evaluate(test_ds)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

Shape of validation set of frames: (10, 224, 224, 3)
Shape of validation labels: (10, 224, 224, 3)
Test Loss: 0.5251172184944153
Test Accuracy: 0.8782178163528442


2024-03-04 09:45:11.262620: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 3789756474715271013


In [30]:
model.save('ucf101_subset_model.h5')

  saving_api.save_model(
