In [1]:
### Yolo model ###
##################
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.0.137-py3-none-any.whl (605 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m605.5/605.5 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ultralytics
Successfully installed ultralytics-8.0.137


In [2]:
### Imports ###
###############

import numpy as np
import pandas as pd
import cv2
import shutil
import os
from tqdm import tqdm


import torch
from ultralytics import YOLO
from PIL import Image

In [3]:
### Set data directory
##################

# connect to drive
from google.colab import drive
drive.mount('/content/drive')

# set data directory
data_dir = '/content/drive/MyDrive/0_Masterarbeit/2_Pipelines/Data'

Mounted at /content/drive


In [4]:
### Upload video and audio files ###
####################################

data_file = 'ferrari'

# copy zip files
shutil.copy(os.path.join(data_dir, f'Video_{data_file}.zip'), './')
shutil.copy(os.path.join(data_dir, f'Audio_{data_file}.zip'), './')

# create folders to unpack zip files to
os.makedirs('./Video')
os.makedirs('./Audio')

# unpack zip files
shutil.unpack_archive(f'./Video_{data_file}.zip', extract_dir = './Video')
shutil.unpack_archive(f'./Audio_{data_file}.zip', extract_dir = './Audio')

In [5]:
### Load model ###
##################

# model
yolo_model = YOLO('yolov8l.pt')

# coco classes
classes = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
           6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign',
           12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep',
           19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella',
           26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball',
           33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard',
           38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife',
           44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli',
           51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch',
           58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse',
           65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
           72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
           78: 'hair drier', 79: 'toothbrush'}


Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt to yolov8l.pt...
100%|██████████| 83.7M/83.7M [00:00<00:00, 371MB/s]


In [6]:
### Function to apply object detection to individual videos ###
###############################################################

def object_detection(video_file):

    # initialize video capturing object
    cap = cv2.VideoCapture(video_file)

    # extract fps to set interval between frames to be contidered
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # frame interval -> every n = 1 second, a frame is considered in prediction
    frame_interval = 1 * fps

    # initialize counter and dicts to store object probabilities and counts
    counter = 0
    obj_count_dict = {value:0 for _ , value in classes.items()}
    pr_dict = {value:0 for _ , value in classes.items()}

    # loop though video
    while True:
      ret, frame = cap.read()

      counter+=1

      if not ret:
        break

      if counter % frame_interval != 0:
        continue

      # crop faces from frames and apply emotion classification
      img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
      im_pil = Image.fromarray(img_rgb)
      pr = yolo_model.predict(source = im_pil, conf = 0.5, save = False, verbose = None)
      for i, j in zip(pr[0].boxes.cpu().numpy().cls, pr[0].boxes.cpu(). numpy().conf):
        pr_dict[classes[i]] += j
        obj_count_dict[classes[i]] += 1

    # Release the video capture object and close the windows
    cap.release()
    cv2.destroyAllWindows()

    # calculate mean probabilites individually for detected objects
    result = np.array([a/b if b > 0 else a for a, b in zip([p for _, p in pr_dict.items()],[o for _, o in obj_count_dict.items()])])


    # return normalized probabilitiy vector
    if np.all(result == 0):
      return list(result)
    else:
      return list(result / result.sum())


In [7]:
### Extract for each video ###
##############################

video_id = []
object_probs = []
for video_file in tqdm(os.listdir('./Video')):

  video_id.append(video_file[:-4])
  object_probs.append(object_detection(os.path.join('./Video', video_file)))

100%|██████████| 182/182 [07:37<00:00,  2.51s/it]


In [8]:
### Create final dataframe for action detection ###
###################################################

objects_df = pd.DataFrame(object_probs)
objects_name_dict = {i:f"p_object_{c}" for i, (_, c) in enumerate(classes.items())}
objects_df = objects_df.rename(columns = objects_name_dict)
objects_df['video_id'] = video_id

In [9]:
### Save as csv file ###
########################
save_dir = '/content/drive/MyDrive/0_Masterarbeit/2_Pipelines/Feature_outputs'

objects_df.to_csv(f'./object_features_{data_file}.csv')
shutil.copy(f'./object_features_{data_file}.csv', save_dir)

'/content/drive/MyDrive/0_Masterarbeit/2_Pipelines/Feature_outputs/object_features_ferrari.csv'

In [10]:
objects_df

Unnamed: 0,p_object_person,p_object_bicycle,p_object_car,p_object_motorcycle,p_object_airplane,p_object_bus,p_object_train,p_object_truck,p_object_boat,p_object_traffic light,...,p_object_sink,p_object_refrigerator,p_object_book,p_object_clock,p_object_vase,p_object_scissors,p_object_teddy bear,p_object_hair drier,p_object_toothbrush,video_id
0,0.221253,0.0,0.000000,0.000000,0.19776,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.0,0.149793,0.0,0.0,0.0,2qjfQZ53oB0
1,0.000000,0.0,1.000000,0.000000,0.00000,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,_Hpq7xxtBGc
2,1.000000,0.0,0.000000,0.000000,0.00000,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,mCOEWzSqDqs
3,0.000000,0.0,0.387402,0.000000,0.00000,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,4bT_mfLmJyk
4,0.380678,0.0,0.343746,0.000000,0.00000,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,gvQp21gYTxI
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,0.279443,0.0,0.000000,0.275494,0.00000,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,Vn6AVLRp5CA
178,0.164496,0.0,0.207123,0.000000,0.00000,0.0,0.000000,0.128336,0.0,0.0,...,0.0,0.181561,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,MsKwouT2mKk
179,0.100032,0.0,0.112125,0.090899,0.00000,0.0,0.114292,0.089039,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.0,0.108108,0.0,0.0,0.0,Muf9p13RUcI
180,0.000000,0.0,0.513194,0.000000,0.00000,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,IsOLLqSiqU0
