# Recognition Data : Videos to Frames



In [46]:
#We load the libraries needed for the project
import cv2
import matplotlib.pyplot as plt
import boto3
from io import BytesIO
import io
import os
import multiprocessing as mp
import numpy as np
import urllib
import json
import pandas as pd
from loading_s3_data import loading_credentials, connect_s3, download_s3_data, get_signed_urls
from video_preprocessing import * 
from IPython.display import Image, display


In this notebook, we transform every video in our preprocessed dataset to a sequence of frames. 

In [18]:
df = pd.read_csv('../data/dataset_2.csv')
df.head()

Unnamed: 0,video_key,child_id,ASD,age,gender,VideoDuration,VideoFrameRate,VideoFrameHeight,VideoFrameWidth,Confidence1,...,EyesClosedConf,EyeGlasses,EyeGlassesConf,SunGlasses,SunGlassesConf,AgeLow,AgeHigh,Gender,GenderConf,Size
0,%2B07758486393/1637011476385/GuessWhat.mp4,+07758486393__daisy,1,0.0,Female,57.4,29.9,720.0,960.0,99.5,...,95.0,False,89.6,False,94.2,24.3,32.5,Female,95.8,0.264408
1,%2B07766544436/1626976603665/GuessWhat.mp4,+07766544436__andrew,1,11.0,Male,57.1,30.0,720.0,960.0,99.5,...,86.2,False,97.1,False,100.0,10.6,18.3,Female,98.5,0.030907
2,%2B07766544436/1626976728265/GuessWhat.mp4,+07766544436__andrew,1,11.0,Male,57.4,30.0,720.0,960.0,97.3,...,84.4,False,96.1,False,100.0,11.7,19.7,Female,91.3,0.022504
3,%2B11991436014/1567730951746/GuessWhat.mp4,+11991436014__Bernardo,1,3.0,Male,87.6,30.0,720.0,960.0,96.8,...,91.3,False,91.7,False,100.0,6.1,13.4,Female,93.6,0.406533
4,%2B13012528047/1587239689132/GuessWhat.mp4,+13012528047__dylan,1,8.0,Male,57.4,29.8,600.0,800.0,97.5,...,87.0,False,94.8,False,100.0,14.5,21.9,Male,93.9,0.031913


In [19]:
AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY = loading_credentials()
client = boto3.client('s3', 
                      aws_access_key_id=AWS_ACCESS_KEY_ID, 
                      aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

resource = boto3.resource('s3', 
                          aws_access_key_id=AWS_ACCESS_KEY_ID, 
                          aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

bucket_name = 'headsup-du1r3b78fy'
upload_bucket = 'bmi212marie'


In [56]:
upload_bucket = resource.Bucket(upload_bucket)

In [87]:
#We get all the signed urls from the csv file
df_signed = get_signed_urls('headsup-du1r3b78fy', '../data/dataset_2.csv', time_limit=20000)
print(len(df_signed))
df_signed.head()

2507


Unnamed: 0,video_key,child_id,ASD,age,gender,VideoDuration,VideoFrameRate,VideoFrameHeight,VideoFrameWidth,Confidence1,...,EyeGlasses,EyeGlassesConf,SunGlasses,SunGlassesConf,AgeLow,AgeHigh,Gender,GenderConf,Size,signed_url
0,%2B07758486393/1637011476385/GuessWhat.mp4,+07758486393__daisy,1,0.0,Female,57.4,29.9,720.0,960.0,99.5,...,False,89.6,False,94.2,24.3,32.5,Female,95.8,0.264408,https://headsup-du1r3b78fy.s3.amazonaws.com/%2...
1,%2B07766544436/1626976603665/GuessWhat.mp4,+07766544436__andrew,1,11.0,Male,57.1,30.0,720.0,960.0,99.5,...,False,97.1,False,100.0,10.6,18.3,Female,98.5,0.030907,https://headsup-du1r3b78fy.s3.amazonaws.com/%2...
2,%2B07766544436/1626976728265/GuessWhat.mp4,+07766544436__andrew,1,11.0,Male,57.4,30.0,720.0,960.0,97.3,...,False,96.1,False,100.0,11.7,19.7,Female,91.3,0.022504,https://headsup-du1r3b78fy.s3.amazonaws.com/%2...
3,%2B11991436014/1567730951746/GuessWhat.mp4,+11991436014__Bernardo,1,3.0,Male,87.6,30.0,720.0,960.0,96.8,...,False,91.7,False,100.0,6.1,13.4,Female,93.6,0.406533,https://headsup-du1r3b78fy.s3.amazonaws.com/%2...
4,%2B13012528047/1587239689132/GuessWhat.mp4,+13012528047__dylan,1,8.0,Male,57.4,29.8,600.0,800.0,97.5,...,False,94.8,False,100.0,14.5,21.9,Male,93.9,0.031913,https://headsup-du1r3b78fy.s3.amazonaws.com/%2...


In [88]:
def split(signed_url, video_key, upload_bucket):
    """This function splits a video into frames and uploads them to S3.
    Args:
        signed_url (str): Signed URL for the video.
        video_key (str): Key for the video.
        upload_bucket_name (str): Name of the bucket where the frames will be uploaded.
    Returns:
        None"""
   
    # Create the folder to save the images    
    flipFlag = 0
    if check_rotation(signed_url):
        flipFlag = 1
        print("Video is rotated.")
    
    vidcap = cv2.VideoCapture(signed_url)
    success, image = vidcap.read()

    count = 0
    temp_file_path = 'data/frames_' + str(video_key)
    os.makedirs(temp_file_path, exist_ok=True)
    while success:
        cv2.imwrite(f"{temp_file_path}/frame{count:04d}.jpg", image)  # save frame as JPEG file
        success, image = vidcap.read()
        if flipFlag:
            image = cv2.flip(image, 0)
        print('Read a new frame:', success)
        k = f"frames/{video_key}/frame{count:04d}.jpg"
        filename = f"{temp_file_path}/frame{count:04d}.jpg"
        print(k, filename)

        upload_bucket.upload_file(filename, k)

        os.remove(filename)
        print(count)
        count += 1

In [89]:
def splitallFrames(signed_urls, allFilenames, upload_bucket):
    """This function splits the videos into frames and stores them in the S3 bucket 'bmi212marie'.
    Args: 
        signed_urls (list): List of signed urls for the videos.
        allFilenames (list): List of video keys.
    Returns:
        None"""
    
    for i in range(49, len(signed_urls)):
        #We get the signed url for the video
        signed_url = signed_urls[i]
        #We get the video key
        file_id = allFilenames[i]
        #We split the video into frames and store them in the S3 bucket 'bmi212marie'
        split(signed_url, file_id, upload_bucket)
        print("Video {} uploaded".format(i))

In [90]:
#We want to split the videos into frames and store them in the S3 bucket 'bmi212marie'
#We use multiprocessing to speed up the process
bucket_name = 'headsup-du1r3b78fy'
upload_bucket = 'bmi212marie'
allFilenames = df_signed['video_key'].tolist()
signed_urls = df_signed['signed_url'].tolist()
upload_bucket = resource.Bucket(upload_bucket)


In [91]:
splitallFrames(signed_urls, allFilenames, upload_bucket)

Read a new frame: True
frames/_REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0000.jpg data/frames__REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0000.jpg
0
Read a new frame: True
frames/_REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0001.jpg data/frames__REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0001.jpg
1
Read a new frame: True
frames/_REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0002.jpg data/frames__REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0002.jpg
2
Read a new frame: True
frames/_REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0003.jpg data/frames__REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0003.jpg
3
Read a new frame: True
frames/_REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0004.jpg data/frames__REVIEWED/1_original_study/P11/1515090259141/HeadsUp.mp4/frame0004.jpg
4
Read a new frame: True
frames/_REVIEWED/1_original_study/P11/151509025

S3UploadFailedError: Failed to upload data/frames__REVIEWED/1_original_study/P13/1515091735149/HeadsUp.mp4/frame2042.jpg to bmi212marie/frames/_REVIEWED/1_original_study/P13/1515091735149/HeadsUp.mp4/frame2042.jpg: An error occurred (RequestTimeTooSkewed) when calling the PutObject operation: The difference between the request time and the current time is too large.