## Youtube Video and Processing

#### A short script to scrape short video clips of dashboard camera accidents from YouTube dashcam videos

In [1]:
#### start up inline plotting, all usual packages in "startup", plus any extras
%matplotlib inline
%run startup
from skimage import transform
import cv2
import os
import skimage
from skimage import io
from sklearn import cluster, datasets
import pafy
import pickle



#### Set up function to download youtube videos

In [2]:
def download_youtube(fileurl, name):
    '''fileurl = URL of the (youtube) video to be downloaded
    name = name video will be saved to on computer'''
    ### https://pythonhosted.org/pafy/#stream-methods and https://pypi.python.org/pypi/pafy
    v = pafy.new(fileurl)   ### uses URL provided to start to pafy instance
    s = v.getbest()         ### gets best resolution for video at link
    print("Size is %s" % s.get_filesize())         ### provides filesize
    fi = s.download(name+'.mp4') ### downloads video file to the videos folder
    return fi

 ### Write function to load the video in frame by frame
 Saves clips with accidents (identified manually) to pickle files for training with model.

In [15]:
def load_frames(videofile, name, tstart, save='yes'):
    '''Function takes downloaded videos and cuts out four second clips of video to save and use for model training.
    videofile = filepath to the video to read in
    name = name of file (for saving as pickle
    tstart = array of start times (in seconds) in video to be scraped (will scrape four seconds following the tstart time given)
    '''
    ###  loads in video as a sequence using VideoCapture function
    vidcap = cv2.VideoCapture(videofile)   ### open the video file to begin reading frames 
    success,image = vidcap.read()          ### reads in the first frame
    count = 0                              ### starts counter at zero
    success = True                         ### starts "sucess" flag at True

    while success:                         ### while success == True
        success, img = vidcap.read()       ### if success is still true, attempt to read in next frame from vidcap video import
        count += 1                         ### increase count
        if count in tstart:
            frames = []                    ### frames will be the individual images and frames_resh will be the "processed" ones
            for j in range(0,99):          ### for 99 frame (four second segments)
                ### conversion from RGB to grayscale image to reduce data
                success, img = vidcap.read()
                ### ref for above: https://www.safaribooksonline.com/library/view/programming-computer-vision/9781449341916/ch06.html

                tmp = skimage.color.rgb2gray(array(img))                    ### grayscale image
                tmp = skimage.transform.downscale_local_mean(tmp, (3,3))    ### downsample image
                frames.append(tmp)                                          ### add frame to temporary array
            
            count += 99                                              ### add to count for the frames we've just cycled through
            print count, tstart, name+str(count)                     ### print check
            pickle.dump(frames, open(name+str(count)+'.pkl', "wb"))  ### save all frames to a pickle
        
    return frames

In [16]:
### Video 1 - a compilation
#download_youtube('https://www.youtube.com/watch?v=9YI0Ct1V4LU', 'comp1')
### tstart  = array of start times, converted from minutes to seconds
#tstart = array([6, 44, 1*60+17, 1*60+36,  1*60+59, 2*60+19, 2*60+51, 3*60+54, 4*60+11, 4*60+26, 4*60+47, 5*60+6, 6*60+30])*30  ## tlen = (7*60+23)*30
#print tstart
#frames = load_frames('comp1.mp4', 'comp1', tstart)

In [17]:
### check how a few random frames look to check that video was read in okay
#for i in range(8,13):
#    imshow(frames[i][50], cmap='gray'); plt.show()

In [18]:
### Video 2 - a compilation
#download_youtube('https://www.youtube.com/watch?v=o4SJiAAMXo0', 'comp2')
#tstart = array([5, 16, 27, 1*60+14, 1*60+20, 2*60+27, 2*60+36, 3*60+4, 4*60+7, 4*60+18, 4*60+36, 5*60+17, 5*60+47, 
                #6*60+20, 7*60+3, 7*60+45, 8*60+13, 8*60+37, 9*60+32, 10*60+4, 10*60+28])*30  ## tlen = (7*60+23)*30
#print tstart
#frames = load_frames('comp2.mp4', 'comp2', tstart)

In [22]:
### Video 3 - a compilation
#download_youtube('https://www.youtube.com/watch?v=uXW7H1uonx8', 'comp3')
#tstart = array([15, 43, 53, 1*60+4, 1*60+13, 1*60+24, 1*60+47, 2*60+21, 2*60+31, 2*60+42, 2*60+55, 3*60+5, 3*60+27, 
#                4*60+6, 4*60+27, 4*60+39, 5*60+5, 5*60+22, 5*60+43, 5*60+57, 6*60+11, 6*60+18, 7*60+3, 7*60+35, 8*60+2, 
#                8*60+13, 8*60+27, 8*60+40, 10*60+30, 10*60+40, 11*60+8])*30
#print tstart
#frames = load_frames('comp3.mp4', 'comp3', tstart)

In [8]:
### Video 4 - a compilation
#download_youtube('https://www.youtube.com/watch?v=vj2HJ3DPsiM', 'comp4')
#tstart = array([40, 2*60+18, 2*60+31, 3*60+8, 3*60+46, 4*60+7, 4*60+56, 5*60+18, 5*60+39, 6*60+22, 6*60+40, 7*60+26, 
#                8*60+55, 9*60+5, 9*60+46])*30
#print tstart
#frames = load_frames('comp4.mp4', 'comp4', tstart)

In [21]:
### Video 5 - a compilation
#download_youtube('https://www.youtube.com/watch?v=kVl-6-A9ZO4', 'comp5')
#tstart = array([27, 55, 1*60+16, 1*60+51, 2*60, 2*60+11, 3*60+49, 4*60+28, 5*60+17, 6*60+31, 7*60+14, 8*60+27, 
#                8*60+41, 9*60+4, 9*60+26, 9*60+46])*30
#print tstart
#frames = load_frames('comp5.mp4', 'comp5', tstart)