Permalink
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
65 lines (57 sloc) 2.45 KB
from IPython.display import clear_output
from apiclient.discovery import build
API_KEY = open('google-api-key.txt', 'r').read()
youtube_service = build('youtube', 'v3', developerKey=API_KEY)
def download_channel_videos(channel):
"""
Download metadata for all videos of the given channel
from the Youtube API.
"""
videos = []
pageToken = None
while True:
response = youtube_service.playlistItems().list(playlistId=channel['playlist_id'], part="snippet", pageToken=pageToken).execute()
for video in response['items']:
videos.append({
'youtube_id': video['snippet']['resourceId']['videoId'],
'title': video['snippet']['title'],
'description': video['snippet']['description'],
'published_at': video['snippet']['publishedAt'],
'channel_youtube_id': channel['youtube_id'],
})
pageToken = response.get("nextPageToken")
clear_output(wait=True)
print 'Downloading videos from "{}": {}...'.format(channel['title'], len(videos))
if pageToken is None:
# There are no more videos to download
clear_output()
break
return videos
def download_channels_videos(channels):
"""
Download metadata for all videos of all the given channels,
then creates a separate CSV file (named videos-<CHANNEL>.csv)
with that information for each channel.
"""
for _, channel in channels.iterrows():
videos = download_videos(channel)
df = pd.DataFrame.from_records(videos)
output_file = 'videos-{}.csv'.format(channel['slug'])
df.to_csv(output_file, index=False, encoding='utf-8')
print "Generated file: %s" % output_file
def merge_channel_videos(channels, output_file='videos-MERGED.csv'):
"""
Merge all videos-<CHANNEL>.csv files previously generated by
`download_channels_videos()` into a single videos-MERGED.csv file.
"""
# Merge all videos together
videos = []
for _, channel in channels.iterrows():
channel_videos = pd.read_csv('videos-%s.csv' % channel['slug'])
channel_videos['channel'] = channel['title']
videos.append(channel_videos)
videos = pd.concat(videos, ignore_index=True)
videos['description'].fillna('', inplace=True)
videos.dropna(inplace=True)
videos.to_csv(output_file, index=False, encoding='utf-8')
print "Channel videos merged into %s" % output_file