In [11]:
import os
import pandas as pd
from pytubefix import Playlist as pl, YouTube as yt

## Data Collection and Weak Annotation Based on Playlist Complexity Level Estimation

In [2]:
playlist_df = pd.read_csv('classified_video_playlist_urls.csv')
playlist_df.head()

Unnamed: 0,Channel Name,Video Playlist URL,Complexity Level,No. of Extracted Video URLs
0,Homeschool Pop,https://youtube.com/playlist?list=PLNmq_T1B9lj...,Simple,12
1,Rachel Randazzo,https://youtube.com/playlist?list=PLGV8Dna9QsU...,Simple,8
2,Math and Science,https://youtube.com/playlist?list=PLnVYEpTNGNt...,Simple,12
3,The Learning Bees,https://youtube.com/playlist?list=PLgBd17cQkVt...,Simple,12
4,EasyElimu Learning Innovations,https://youtube.com/playlist?list=PL9jFeeMFHxs...,Simple,8


In [3]:
playlist_urls = list(playlist_df['Video Playlist URL'])
playlist_urls

['https://youtube.com/playlist?list=PLNmq_T1B9ljIg4yu14mzNas7hvwM5aZ2l&si=tfhs-CC2r8ZCmZZ',
 'https://youtube.com/playlist?list=PLGV8Dna9QsUwSDK_agDCT-24X_SfCrduE&si=dQdC6OPe5i6LYB-W',
 'https://youtube.com/playlist?list=PLnVYEpTNGNtWNqjyerNHdzNHWema_0n2W&si=Grp6Z2eO2PLXw0jI',
 'https://youtube.com/playlist?list=PLgBd17cQkVtCbajlUEkmfbPS5s4OUF0KU&si=DRp5JTIaE74-arae',
 'https://youtube.com/playlist?list=PL9jFeeMFHxsM3epT4vRyuwuPN7QDk8PY7&si=RAHiA5hGvuglA379',
 'https://youtube.com/playlist?list=PLKnB100k_YZkaH9sZ7tkOdR6JEQRdde28&si=9QVugbAXoM5XoJ3S',
 'https://youtube.com/playlist?list=PLBlnK6fEyqRgKEkaJvAe8oA6m_bUqt-MC&si=6uD5zItGuKm5Tbnr',
 'https://youtube.com/playlist?list=PLOspHqNVtKAC-FUNMq8qjYVw6_semZHw0&si=zqKxBtXwPqAxYJe1',
 'https://youtube.com/playlist?list=PLU_DCVXL8MyOFBXC-dO214lr1Uibp4TCi&si=ww0YX7bFuGHrJwqt',
 'https://youtube.com/playlist?list=PLKi4WTp6PRGX8NtQpzliTlMceQT1ua24f&si=6czQRzScE8u5NeRe',
 'https://youtube.com/playlist?list=PL9jFeeMFHxsOaoR6toc3RJmY5aycYcmGj&

In [4]:
# extract the specified number of urls and names of videos that are under 11 minutes from each playlist
# only a certain number of urls are extracted from each playlist to include the different presentation styles followed by different channels in the dataset
data = []

for n, p_url in enumerate(playlist_urls):
    p_url_object = pl(p_url)
    
    num_extracted_urls = 0
    num_urls_to_extract = playlist_df.loc[n]['No. of Extracted Video URLs']
    
    for v_url in p_url_object.video_urls:
        # ensure the number of extracted urls does not exceed the amount specified in the playlist dataset
        if num_extracted_urls == num_urls_to_extract:
            break
            
        v_url_object = yt(v_url)
        
        # ensure the video's duration is under 11 minutes (660 seconds)
        if v_url_object.length < 660:
            data.append([v_url_object.title, v_url, playlist_df.loc[n]['Complexity Level']])
            num_extracted_urls += 1
                
    print(f"Playlist {n+1}: Extracted successfully")

Playlist 1: Extracted successfully
Playlist 2: Extracted successfully
Playlist 3: Extracted successfully
Playlist 4: Extracted successfully
Playlist 5: Extracted successfully
Playlist 6: Extracted successfully
Playlist 7: Extracted successfully
Playlist 8: Extracted successfully
Playlist 9: Extracted successfully
Playlist 10: Extracted successfully
Playlist 11: Extracted successfully
Playlist 12: Extracted successfully
Playlist 13: Extracted successfully
Playlist 14: Extracted successfully
Playlist 15: Extracted successfully
Playlist 16: Extracted successfully
Playlist 17: Extracted successfully
Playlist 18: Extracted successfully
Playlist 19: Extracted successfully


In [5]:
# check if the extracted video names and urls are stored and classified properly
data

[['Plant Parts and Functions for Kids',
  'https://www.youtube.com/watch?v=18amLZ9vfG8',
  'Simple'],
 ['Adjectives for Kids | Homeschool Pop',
  'https://www.youtube.com/watch?v=4jxXnHSbicY',
  'Simple'],
 ['Roman Numerals For Kids',
  'https://www.youtube.com/watch?v=NrnXsKYpLJM',
  'Simple'],
 ['Benjamin Franklin for Kids',
  'https://www.youtube.com/watch?v=5LcMXnZfFFY',
  'Simple'],
 ['Paul Revere for Kids',
  'https://www.youtube.com/watch?v=dmwcfaythY4',
  'Simple'],
 ['Thomas Jefferson for Kids',
  'https://www.youtube.com/watch?v=uL_6b-k7aVI',
  'Simple'],
 ['Davy Crockett for Kids',
  'https://www.youtube.com/watch?v=Pxt_uA8w-t4',
  'Simple'],
 ['Susan B. Anthony for Kids',
  'https://www.youtube.com/watch?v=v23V-cHPpz4',
  'Simple'],
 ['George Washington Carver for Kids',
  'https://www.youtube.com/watch?v=X9Xyw43cC7M',
  'Simple'],
 ['Jackie Robinson for Kids | Biography Video',
  'https://www.youtube.com/watch?v=wMMwBhkxBv0',
  'Simple'],
 ['Urban, Suburban and Rural Areas

In [6]:
# total number of urls extracted from all the playlists
len(data)

193

In [7]:
# create a dataframe to store the extracted video names, urls, and class
video_df = pd.DataFrame(data, columns=['Video Name', 'Video URL', 'Complexity Level'])

In [8]:
# number of video urls in each class (Simple, Hard)
video_df['Complexity Level'].value_counts()

Hard      98
Simple    95
Name: Complexity Level, dtype: int64

In [9]:
video_df.to_csv('presentation_complexity_dataset_weak_annotation.csv', index=False)

## Dataset Preparation for Manual Re-Annotation and Verification

In [16]:
pcd_weak_df = pd.read_csv('presentation_complexity_dataset_weak_annotation.csv')
pcd_weak_df.head()

Unnamed: 0,Video Name,Video URL,Complexity Level
0,Plant Parts and Functions for Kids,https://www.youtube.com/watch?v=18amLZ9vfG8,Simple
1,Adjectives for Kids | Homeschool Pop,https://www.youtube.com/watch?v=4jxXnHSbicY,Simple
2,Roman Numerals For Kids,https://www.youtube.com/watch?v=NrnXsKYpLJM,Simple
3,Benjamin Franklin for Kids,https://www.youtube.com/watch?v=5LcMXnZfFFY,Simple
4,Paul Revere for Kids,https://www.youtube.com/watch?v=dmwcfaythY4,Simple


In [17]:
# drop 'Complexity Level' class column to avoid the influence of these labels during manual re-annotation
modified_pcd_df = pcd_weak_df.drop('Complexity Level', axis=1)
modified_pcd_df.head()

Unnamed: 0,Video Name,Video URL
0,Plant Parts and Functions for Kids,https://www.youtube.com/watch?v=18amLZ9vfG8
1,Adjectives for Kids | Homeschool Pop,https://www.youtube.com/watch?v=4jxXnHSbicY
2,Roman Numerals For Kids,https://www.youtube.com/watch?v=NrnXsKYpLJM
3,Benjamin Franklin for Kids,https://www.youtube.com/watch?v=5LcMXnZfFFY
4,Paul Revere for Kids,https://www.youtube.com/watch?v=dmwcfaythY4


In [18]:
# shuffle the samples in the dataset to ensure fair labelling is performed
modified_pcd_df = modified_pcd_df.sample(n=len(pcd_weak_df), replace=False)
modified_pcd_df

Unnamed: 0,Video Name,Video URL
114,"5th Grade Math 8.2, Word Problem Solving, Use ...",https://www.youtube.com/watch?v=bi2CLSuTmcs
14,Ancient Greece for Kids | History Learning Video,https://www.youtube.com/watch?v=jloEzVh31TE
18,Ancient Mesopotamia 101 | National Geographic,https://www.youtube.com/watch?v=xVf5kZA0HtQ
121,Oral Poetry -War Poetry Lesson 5 -English Form...,https://www.youtube.com/watch?v=ghuuJNusnWE
74,Why Are There So Many Foundation Models?,https://www.youtube.com/watch?v=QPQy7jUpmyA
...,...,...
22,Lesson 3 - Finding Factors Of Numbers - (5th G...,https://www.youtube.com/watch?v=OWlvooseo_o
128,POEM - THE OLD WIFE AND THE GHOST || REFERENCE...,https://www.youtube.com/watch?v=yE4nEpP5XlU
143,‚ÄúChapter 12 ‚Äì Huck and Jim Escape‚Äù|| English |...,https://www.youtube.com/watch?v=CFob4wPisgA
71,Adjectives (Part 2),https://www.youtube.com/watch?v=Q2VPDMelExs


In [19]:
# name the index column
# index column will be used later to sort the dataset and compare the manual labels with the weak labels
modified_pcd_df.index.name = 'ID'
modified_pcd_df

Unnamed: 0_level_0,Video Name,Video URL
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
114,"5th Grade Math 8.2, Word Problem Solving, Use ...",https://www.youtube.com/watch?v=bi2CLSuTmcs
14,Ancient Greece for Kids | History Learning Video,https://www.youtube.com/watch?v=jloEzVh31TE
18,Ancient Mesopotamia 101 | National Geographic,https://www.youtube.com/watch?v=xVf5kZA0HtQ
121,Oral Poetry -War Poetry Lesson 5 -English Form...,https://www.youtube.com/watch?v=ghuuJNusnWE
74,Why Are There So Many Foundation Models?,https://www.youtube.com/watch?v=QPQy7jUpmyA
...,...,...
22,Lesson 3 - Finding Factors Of Numbers - (5th G...,https://www.youtube.com/watch?v=OWlvooseo_o
128,POEM - THE OLD WIFE AND THE GHOST || REFERENCE...,https://www.youtube.com/watch?v=yE4nEpP5XlU
143,‚ÄúChapter 12 ‚Äì Huck and Jim Escape‚Äù|| English |...,https://www.youtube.com/watch?v=CFob4wPisgA
71,Adjectives (Part 2),https://www.youtube.com/watch?v=Q2VPDMelExs


In [20]:
modified_pcd_df.to_csv('modified_pcd_for_manual_annotation.csv')

## Dataset Finalization After Manual Re-Annotation and Verification

In [36]:
# load the verified manually annotated dataset
mpcd_checked_df = pd.read_csv('modified_pcd_for_manual_annotation_checked.csv', index_col='ID')
mpcd_checked_df.head()

Unnamed: 0_level_0,Video Name,Video URL,Manual Complexity Level
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
114,"5th Grade Math 8.2, Word Problem Solving, Use ...",https://www.youtube.com/watch?v=bi2CLSuTmcs,hard
14,Ancient Greece for Kids | History Learning Video,https://www.youtube.com/watch?v=jloEzVh31TE,simple
18,Ancient Mesopotamia 101 | National Geographic,https://www.youtube.com/watch?v=xVf5kZA0HtQ,simple
121,Oral Poetry -War Poetry Lesson 5 -English Form...,https://www.youtube.com/watch?v=ghuuJNusnWE,hard
74,Why Are There So Many Foundation Models?,https://www.youtube.com/watch?v=QPQy7jUpmyA,simple


In [37]:
# load the weakly annotated dataset which was annotated based on the playlist complexity level estimation
pcd_weak_df = pd.read_csv('presentation_complexity_dataset_weak_annotation.csv')
pcd_weak_df.head()

Unnamed: 0,Video Name,Video URL,Complexity Level
0,Plant Parts and Functions for Kids,https://www.youtube.com/watch?v=18amLZ9vfG8,Simple
1,Adjectives for Kids | Homeschool Pop,https://www.youtube.com/watch?v=4jxXnHSbicY,Simple
2,Roman Numerals For Kids,https://www.youtube.com/watch?v=NrnXsKYpLJM,Simple
3,Benjamin Franklin for Kids,https://www.youtube.com/watch?v=5LcMXnZfFFY,Simple
4,Paul Revere for Kids,https://www.youtube.com/watch?v=dmwcfaythY4,Simple


In [38]:
# sort the manually annotated dataset in ascending order of column 'ID' which is the index column
# dataset was shuffled for manual re-annotation and verification to ensure fair labelling was performed
mpcd_checked_df.sort_index(inplace=True)
mpcd_checked_df.head()

Unnamed: 0_level_0,Video Name,Video URL,Manual Complexity Level
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Plant Parts and Functions for Kids,https://www.youtube.com/watch?v=18amLZ9vfG8,simple
1,Adjectives for Kids | Homeschool Pop,https://www.youtube.com/watch?v=4jxXnHSbicY,simple
2,Roman Numerals For Kids,https://www.youtube.com/watch?v=NrnXsKYpLJM,simple
3,Benjamin Franklin for Kids,https://www.youtube.com/watch?v=5LcMXnZfFFY,simple
4,Paul Revere for Kids,https://www.youtube.com/watch?v=dmwcfaythY4,simple


In [39]:
# video urls that we planned to remove during the manual re-annotation phase were labelled with n/a
# these urls are now labelled as nan (missing values) by Pandas
mpcd_checked_df['Manual Complexity Level'].unique()

array(['simple', nan, 'hard'], dtype=object)

In [51]:
mpcd_checked_df.isnull().sum()

Video Name                  0
Video URL                   0
Manual Complexity Level    22
dtype: int64

In [52]:
# get the indices of rows with missing values from the manually annotated dataset
missing_value_indices = mpcd_checked_df[mpcd_checked_df.isnull().any(axis=1)].index
missing_value_indices

Int64Index([ 46,  84, 106, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
            183, 184, 185, 186, 187, 188, 190, 191, 192],
           dtype='int64', name='ID')

In [60]:
# remove the video urls with missing complexity level from the manually annotated dataset
mpcd_checked_df.drop(index=missing_value_indices, inplace=True)

In [61]:
mpcd_checked_df.isnull().sum()

Video Name                 0
Video URL                  0
Manual Complexity Level    0
dtype: int64

In [62]:
# remove the video urls with missing complexity level (in the manually annotated dataset) from the weakly annotated dataset
pcd_weak_df.drop(index=missing_value_indices, inplace=True)

In [70]:
# convert the weakly annotated labels to lowercase to match the casing of the manually annotated labels
pcd_weak_df['Complexity Level'] = pcd_weak_df['Complexity Level'].str.lower()

In [71]:
# compare the weakly annotated labels with the manually annotated labels to check if there is a large deviation from the initial estimation
(mpcd_checked_df['Manual Complexity Level'] == pcd_weak_df['Complexity Level']).value_counts()

True     170
False      1
dtype: int64

In [72]:
# manually annotated dataset has a class ratio of 1.2:1 which is almost balanced
mpcd_checked_df['Manual Complexity Level'].value_counts()

simple    94
hard      77
Name: Manual Complexity Level, dtype: int64

In [76]:
# rename the class column in the manually annotated dataset
mpcd_checked_df.rename({'Manual Complexity Level':'Complexity Level'}, axis=1, inplace=True)
mpcd_checked_df.columns

Index(['Video Name', 'Video URL', 'Complexity Level'], dtype='object')

In [77]:
mpcd_checked_df.to_csv('presentation_complexity_dataset.csv', index=False)

## Downloading YouTube Videos in the Dataset

In [7]:
# download YouTube video using url in 'mp4' file format with the highest resolution
def download_video(n, url, save_path):
    try:
        is_name_changed = False
        
        video_object = yt(url)
        video_stream = video_object.streams.filter(file_extension='mp4').get_highest_resolution()
        
        video_filename = video_stream.default_filename
        video_path = os.path.join(save_path, video_filename)
        
        # change the name of the file to the same name with a number at the end if it has the same name as an existing file
        if os.path.exists(video_path):
            i = 1
            
            video_name, extension = os.path.splitext(video_filename)
            video_filename = f'{video_name}_{i}{extension}'
            video_path = os.path.join(save_path, video_filename)
            
            while os.path.exists(video_path):
                i += 1
                
                video_filename = f'{video_name}_{i}{extension}'
                video_path = os.path.join(save_path, video_filename)
            
            is_name_changed = True
            
            video_stream.download(output_path=save_path, filename=video_filename)
        else:
            video_stream.download(output_path=save_path)
        
        print(f"Video {n}: Downloaded successfully")
        
        return is_name_changed, video_filename
    except:
        print(f"Video {n}: Error occurred")

In [8]:
pcd_df = pd.read_csv('presentation_complexity_dataset.csv')
pcd_df.head()

Unnamed: 0,Video Name,Video URL,Complexity Level
0,Plant Parts and Functions for Kids,https://www.youtube.com/watch?v=18amLZ9vfG8,simple
1,Adjectives for Kids | Homeschool Pop,https://www.youtube.com/watch?v=4jxXnHSbicY,simple
2,Roman Numerals For Kids,https://www.youtube.com/watch?v=NrnXsKYpLJM,simple
3,Benjamin Franklin for Kids,https://www.youtube.com/watch?v=5LcMXnZfFFY,simple
4,Paul Revere for Kids,https://www.youtube.com/watch?v=dmwcfaythY4,simple


In [9]:
# extract the urls of the videos in the 'simple' class from the presentation complexity dataset
simple_video_urls = list(pcd_df.loc[pcd_df['Complexity Level'] == 'simple', 'Video URL'])
simple_video_urls

['https://www.youtube.com/watch?v=18amLZ9vfG8',
 'https://www.youtube.com/watch?v=4jxXnHSbicY',
 'https://www.youtube.com/watch?v=NrnXsKYpLJM',
 'https://www.youtube.com/watch?v=5LcMXnZfFFY',
 'https://www.youtube.com/watch?v=dmwcfaythY4',
 'https://www.youtube.com/watch?v=uL_6b-k7aVI',
 'https://www.youtube.com/watch?v=Pxt_uA8w-t4',
 'https://www.youtube.com/watch?v=v23V-cHPpz4',
 'https://www.youtube.com/watch?v=X9Xyw43cC7M',
 'https://www.youtube.com/watch?v=wMMwBhkxBv0',
 'https://www.youtube.com/watch?v=YRxNQPmj1-8',
 'https://www.youtube.com/watch?v=Jd4kD9TicbA',
 'https://www.youtube.com/watch?v=IAQAAJo1fI0',
 'https://www.youtube.com/watch?v=J2exRx5QMRU',
 'https://www.youtube.com/watch?v=jloEzVh31TE',
 'https://www.youtube.com/watch?v=lBYmOuajdC8',
 'https://www.youtube.com/watch?v=hO1tzmi1V5g',
 'https://www.youtube.com/watch?v=n7ndRwqJYDM',
 'https://www.youtube.com/watch?v=xVf5kZA0HtQ',
 'https://www.youtube.com/watch?v=KhDY4KJuvc0',
 'https://www.youtube.com/watch?v=xlK8Sm

In [12]:
# download videos in the 'simple' class using the urls extracted from the dataset
simple_save_path = 'Downloaded Videos/Simple Videos'

print(f"Downloading Simple Videos:")

for n, url in enumerate(simple_video_urls):
    is_name_changed, video_filename = download_video(n+1, url, simple_save_path)
    
    # change the video's name in the dataset if the video's filename is changed due to having the same name as an existing file
    if is_name_changed:
        new_video_name, extension = os.path.splitext(video_filename)
        pcd_df.loc[pcd_df['Video URL'] == url, 'Video Name'] = new_video_name
        print(f"---Video name is changed in the presentation complexity dataset from '{yt(url).title}' to '{new_video_name}'---")

Downloading Simple Videos:
Video 1: Downloaded successfully
Video 2: Downloaded successfully
Video 3: Downloaded successfully
Video 4: Downloaded successfully
Video 5: Downloaded successfully
Video 6: Downloaded successfully
Video 7: Downloaded successfully
Video 8: Downloaded successfully
Video 9: Downloaded successfully
Video 10: Downloaded successfully
Video 11: Downloaded successfully
Video 12: Downloaded successfully
Video 13: Downloaded successfully
Video 14: Downloaded successfully
Video 15: Downloaded successfully
Video 16: Downloaded successfully
Video 17: Downloaded successfully
Video 18: Downloaded successfully
Video 19: Downloaded successfully
Video 20: Downloaded successfully
Video 21: Downloaded successfully
Video 22: Downloaded successfully
Video 23: Downloaded successfully
Video 24: Downloaded successfully
Video 25: Downloaded successfully
Video 26: Downloaded successfully
Video 27: Downloaded successfully
Video 28: Downloaded successfully
Video 29: Downloaded successfu

In [14]:
# extract the urls of the videos in the 'hard' class from the presentation complexity dataset
hard_video_urls = list(pcd_df.loc[pcd_df['Complexity Level'] == 'hard', 'Video URL']) 
hard_video_urls

['https://www.youtube.com/watch?v=bdBOVJlHtJE',
 'https://www.youtube.com/watch?v=eQgoDAsUW7Y',
 'https://www.youtube.com/watch?v=FkRmFMwYcTA',
 'https://www.youtube.com/watch?v=Xbi5aqiRoSU',
 'https://www.youtube.com/watch?v=y-qf_h_LmY0',
 'https://www.youtube.com/watch?v=HQJ8E5kdgbw',
 'https://www.youtube.com/watch?v=_SbKuvv-Jok',
 'https://www.youtube.com/watch?v=MOlyYI6YdH0',
 'https://www.youtube.com/watch?v=bVxUqES_-LY',
 'https://www.youtube.com/watch?v=yIx-yi23Kro',
 'https://www.youtube.com/watch?v=Jsu0HRcC40k',
 'https://www.youtube.com/watch?v=dtGI9E5_0_o',
 'https://www.youtube.com/watch?v=i5QmUTkD-cQ',
 'https://www.youtube.com/watch?v=7ecCApEIoHI',
 'https://www.youtube.com/watch?v=bcB7EKRqP9A',
 'https://www.youtube.com/watch?v=ntQDtwr12tE',
 'https://www.youtube.com/watch?v=lj6ApYHiaWg',
 'https://www.youtube.com/watch?v=BA0kznlaVFY',
 'https://www.youtube.com/watch?v=bi2CLSuTmcs',
 'https://www.youtube.com/watch?v=RdqYqgyaiwI',
 'https://www.youtube.com/watch?v=UNIQ14

In [15]:
# download videos in the 'hard' class using the urls extracted from the dataset
hard_save_path = 'Downloaded Videos/Hard Videos'

print(f"Downloading Hard Videos:")

for n, url in enumerate(hard_video_urls):
    is_name_changed, video_filename = download_video(n+1, url, hard_save_path)
    
    # change the video's name in the dataset if the video's filename is changed due to having the same name as an existing file
    if is_name_changed:
        new_video_name, extension = os.path.splitext(video_filename)
        pcd_df.loc[pcd_df['Video URL'] == url, 'Video Name'] = new_video_name
        print(f"---Video name is changed in the presentation complexity dataset from '{yt(url).title}' to '{new_video_name}'---")

Downloading Hard Videos:
Video 1: Downloaded successfully
Video 2: Downloaded successfully
Video 3: Downloaded successfully
Video 4: Downloaded successfully
Video 5: Downloaded successfully
Video 6: Downloaded successfully
Video 7: Downloaded successfully
Video 8: Downloaded successfully
Video 9: Downloaded successfully
Video 10: Downloaded successfully
Video 11: Downloaded successfully
Video 12: Downloaded successfully
Video 13: Downloaded successfully
Video 14: Downloaded successfully
Video 15: Downloaded successfully
Video 16: Downloaded successfully
Video 17: Downloaded successfully
Video 18: Downloaded successfully
Video 19: Downloaded successfully
Video 20: Downloaded successfully
Video 21: Downloaded successfully
Video 22: Downloaded successfully
Video 23: Downloaded successfully
Video 24: Downloaded successfully
Video 25: Downloaded successfully
Video 26: Downloaded successfully
Video 27: Downloaded successfully
Video 28: Downloaded successfully
Video 29: Downloaded successfull