In [None]:
!pip install pytube
!pip install imageio==2.4.1
!pip install youtube_comment_downloader

In [None]:
from pytube import YouTube, extract
from pytube.cli import on_progress
from moviepy.editor import *
import os, uuid, cv2, pandas as pd
from itertools import islice
from youtube_comment_downloader import *
from IPython.display import Markdown, display

In [None]:
def printmd(string):
    display(Markdown(string))

In [None]:
from google.colab import drive
drive.mount('/content/drive/')
os.chdir('drive/MyDrive')

if not os.path.exists('scomp_project'):
    os.mkdir('scomp_project')
    printmd("**A folder called scomp_project has been made for you, Please insert the video data csv file into it and rename it to video_data_url.csv**")

os.chdir('scomp_project')

In [None]:
def insert_header(path, headers):

    if not os.path.exists(path):

        with open(path, 'a', encoding="utf-8", newline='') as f_object:
                        
            dictwriter_object = DictWriter(f_object, fieldnames=headers)
            dictwriter_object.writeheader()
            f_object.close()

In [None]:
def createfolder(path):

    if not os.path.exists(path):
        os.mkdir(path)

In [None]:
def fetch_comments(link):

    comments = []

    while True:

        try:
            downloader = YoutubeCommentDownloader()
            comments_list = []
            comments = downloader.get_comments_from_url(link, sort_by=SORT_BY_POPULAR)
            break
        
        except:
            continue

    for comment in islice(comments, 10):
        comments_list.append(comment['text'])
    
    return comments_list

In [None]:
def DownloadYoutube(link, id, video_data):
    
    youtubeObject = YouTube(link, on_progress_callback=on_progress)

    video_data['id'] = id
    video_data['title'] = youtubeObject.title
    video_data['description'] = youtubeObject.description
    video_data['comments'] = fetch_comments(link)

    youtubeObject = youtubeObject.streams.get_highest_resolution()

    name = str(uuid.uuid4()) + '.mp4'
    youtubeObject.download(output_path='videos',filename=name)
    print("Download is completed successfully")

    return name, video_data

In [None]:
def convert_to_seconds(time):

    time_div = time.split(':')
    m_arr = [1,60,60*60]
    seconds = 0

    for count, time in enumerate(reversed(time_div)):
        seconds += int(time)*m_arr[count]    

    return seconds

In [None]:
def collect_images(path, new_filename ,img_nos):

    cam = cv2.VideoCapture(path+new_filename+".mp4")
    totframe = 0
  
    while(True):
      
        ret, frame = cam.read()
        if ret: totframe += 1
        else: break

    cam.release()
    cam = cv2.VideoCapture(path+new_filename+".mp4")

    curr_frame = 0
    capt_inc = int(totframe/(img_nos+1))
    capt_pnt = int(totframe/(img_nos+1))
    orig_img_nos = img_nos

    createfolder(path+'images')

    while img_nos != 0:
      
        ret, frame = cam.read()
        if ret:

            if curr_frame == capt_pnt:
                
                name = path+'images/'+new_filename+'_img'+ str(img_nos) + '.jpg'
                cv2.imwrite(name, frame)
                capt_pnt += capt_inc
                img_nos -= 1
                
            curr_frame += 1

        else:
            break
    
    cam.release()
    cv2.destroyAllWindows()

    print('Collected '+str(orig_img_nos)+' images from video\n\n')

In [None]:
def crop_out_video(filename, timefetch, emotion, id, index):

    timelimits = timefetch.split('-')
    start_time = timelimits[0].strip()
    end_time = timelimits[1].strip()
            
    start_time = convert_to_seconds(start_time)
    end_time = convert_to_seconds(end_time)
            
    video = VideoFileClip('videos/'+filename).subclip(start_time, end_time)

    createfolder('cropped_videos/'+emotion)
            
    new_filename = str(uuid.uuid4())+'('+id+')'
    video.write_videofile('cropped_videos/'+emotion+'/'+new_filename+ '.mp4') 
    video.close()
    
    print('Video '+str(index)+' downloaded')

    collect_images('cropped_videos/'+emotion+'/', new_filename, 6)

In [None]:
if __name__ == '__main__':

    df = pd.read_csv('video_data_url.csv')

    field_names_post = ['id','title','description','comments']
    field_names_fail = ['Emotion', 'Url', 'Timestamp']

    link_y_map = {}

    createfolder('videos')
    createfolder('cropped_videos')

    insert_header('cropped_videos/posts_data.csv', field_names_post)
    insert_header('cropped_videos/failed_data.csv', field_names_fail)

    
    for index, row in df.iterrows():
        
        link = row['Url']
        emotion = row['Emotion']
        timestamp = row['Timestamp']

        id = extract.video_id(link)

        try:
            new_video = 0
            filename = ''

            if id in link_y_map:
                filename = link_y_map[id]
            else:
                video_data = {}
                filename, video_data = DownloadYoutube(link, id, video_data)
                link_y_map[id] = filename
                new_video = 1

            if len(filename)>0:
                crop_out_video(filename, timestamp, emotion, id, index)

            if new_video:

                with open('cropped_videos/posts_data.csv', 'a', encoding="utf-8", newline='') as f_object:
                        
                    dictwriter_object = DictWriter(f_object, fieldnames=field_names_post)
                    dictwriter_object.writerow(video_data)
                    f_object.close()
                    
        except:
            
            print('!!!  Unable to process video no ' +str(index)+'  !!!')
            print('Copying information of the data to failed_data.csv\n\n')

            with open('cropped_videos/failed_data.csv', 'a', encoding="utf-8", newline='') as f_object:
                
                dictwriter_object = DictWriter(f_object, fieldnames=field_names_fail)
                dictwriter_object.writerow(row.to_dict())
                f_object.close()