In [34]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from datetime import datetime
import pandas as pd
import os

In [35]:
# api key name
API_KEY = 'YT_API_KEY'
#date formats
SOURCE_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
TARGET_DATE_FORMAT = '%Y-%m-%d'
##
CHANNEL = 'lekkostronniczy'
START_DATE = '2023-01-01'


#extraction filed Channels info
CHANNELS_SNIPPED = ['title',  'publishedAt', 'country', 'description']
CHANNELS_METRICS = ['viewCount', 'subscriberCount', 'videoCount']

#extractions fields for videos
VIDEOS_SNIPPED =['title', 'categoryId', 'publishedAt','description']
VIDOS_METRICS = ['viewCount', 'likeCount', 'commentCount']




In [36]:
today = datetime.today().strftime(TARGET_DATE_FORMAT)

Build Service

In [37]:
youtube = build('youtube', 'v3', developerKey=os.environ['YT_API_KEY'])

Get channel Id

In [38]:
request = youtube.search().list(
    part='id',
    type='channel',
    q=CHANNEL
)

In [39]:
try:
    response = request.execute()
except HttpError as e:
    print('Error response status code : {0}, reason : {1}'.format(e.status_code, e.error_details))

In [40]:
channelId = response['items'][0]['id']['channelId']

Get channel information

In [41]:
request = youtube.channels().list(
    part="snippet,statistics",
    id=channelId
)

try:
    response = request.execute()
except HttpError as e:
    print('Error response status code : {0}, reason : {1}'.format(e.status_code, e.error_details))

In [42]:
#E extract data for channels info table
df_columns = ['channelId'] + CHANNELS_SNIPPED
# df = pd.DataFrame(columns=df_columns)

channel_info = {}
for field in CHANNELS_SNIPPED:
    channel_info[field] = response['items'][0]['snippet'][field]
channel_info

channel_info['publishedAt']

#date conversion
date_obj = datetime.strptime(channel_info['publishedAt'], SOURCE_DATE_FORMAT)
channel_info['publishedAt'] = date_obj.strftime(TARGET_DATE_FORMAT)

#desc cleaning
channel_info['description'] = channel_info['description'].rstrip()

channel_info['channelId'] = channelId

In [43]:
pd.DataFrame(channel_info, index=[0])[df_columns]

Unnamed: 0,channelId,title,publishedAt,country,description
0,UC8JbbaZ_jgdsoUqrZ2bXtQQ,Lekko Stronniczy,2011-02-24,PL,Lekko Stronniczy to codzienny program rozrywko...


In [44]:

# extract data for channels statistics table
df_columns = ['channelId','extract_date'] + CHANNELS_METRICS

channel_stats = {}
for field in CHANNELS_METRICS:
    channel_stats[field] = response['items'][0]['statistics'][field]


channel_stats['channelId'] = channelId
channel_stats['extract_date'] = today

In [45]:
pd.DataFrame(channel_stats, index=[0])[df_columns]

Unnamed: 0,channelId,extract_date,viewCount,subscriberCount,videoCount
0,UC8JbbaZ_jgdsoUqrZ2bXtQQ,2023-04-11,201849203,400000,2085


Get channel videos informations

In [46]:
max_results = 50
next_page_token = None
videos_results  = []
start_date = datetime.strptime(START_DATE, TARGET_DATE_FORMAT)

next_page = True
while next_page:

    search_request = youtube.search().list(
            channelId=channelId,
            part='id',
            order='date',
            type='video',
            maxResults=max_results,
            publishedAfter=start_date.isoformat() + 'Z',
            pageToken=next_page_token
        )

    try: 
        search_response = search_request.execute()
    except HttpError as e:
        print('Error response status code : {0}, reason : {1}'.format(e.status_code, e.error_details))

    videos_ids = ",".join([item['id']['videoId'] for item in search_response['items']])

    videos_request = youtube.videos().list(
        id=videos_ids,
        part='snippet,statistics')

    try: 
        videos_response = videos_request.execute()
    except HttpError as e:
        print('Error response status code : {0}, reason : {1}'.format(e.status_code, e.error_details))

    videos_results.extend(videos_response['items'])

    if 'nextPageToken' in search_response.keys():
        next_page_token = search_response['nextPageToken']
    else:
        next_page = False


In [47]:
videos = []
videos_metrics = []

for video in videos_results:
    video_id = video['id']

    video_record = {}
    for field in VIDEOS_SNIPPED:
        video_record[field] =  video['snippet'][field]

    video_metrics_record = {}
    for field in VIDOS_METRICS:
        video_metrics_record[field] = video['statistics'][field]

    date_obj = datetime.strptime(video_record['publishedAt'], SOURCE_DATE_FORMAT)
    video_record['publishedAt'] = date_obj.strftime(TARGET_DATE_FORMAT)

    video_record['video_id'], video_record['channelId'] = video_id, channelId
    video_metrics_record['video_id'], video_metrics_record['extract_date'] = video_id, today

    videos.append(video_record)
    videos_metrics.append(video_metrics_record)

In [48]:
pd.DataFrame(videos)[['video_id','channelId'] + VIDEOS_SNIPPED]

Unnamed: 0,video_id,channelId,title,categoryId,publishedAt,description
0,jYuKqJuRRl8,UC8JbbaZ_jgdsoUqrZ2bXtQQ,Sprawdzamy ile zarabia Adam Glapińskim z NBP i...,24,2023-04-10,Dziś w odcinku sprawdzamy ile zarabia Adam Gla...
1,qY3eJu0lmxg,UC8JbbaZ_jgdsoUqrZ2bXtQQ,"Kraków jak TVP: 2 miliardy tam, a w Hello Krak...",24,2023-04-07,Wiadomo jak jest TVP: 2 miliardy co roku lecą ...
2,HXoQlB2nDmc,UC8JbbaZ_jgdsoUqrZ2bXtQQ,Wakacje w kamperze? Karawaning nie jest dla ka...,24,2023-04-06,"Marzą Ci się wakacje w kamperze? Może myślisz,..."
3,AnZm8cgib8k,UC8JbbaZ_jgdsoUqrZ2bXtQQ,"Malik Montana: McDonald's drama, Malik odpowia...",24,2023-04-05,Jak wiecie jest drama: Malik Montana - McDonal...
4,WaY-WmdmLGk,UC8JbbaZ_jgdsoUqrZ2bXtQQ,Czy samotność niszczy mózg? ChatGPT robi roast...,24,2023-04-04,Samotność niszczy mózg - samotni ludzie mają m...
...,...,...,...,...,...,...
66,2XpZbJQY_Nw,UC8JbbaZ_jgdsoUqrZ2bXtQQ,"Jak IKEA to robi, że zawsze kupujemy tam więce...",24,2023-01-06,"Lekko Stronniczy to program, który ucząc bawi,..."
67,XchoV02lbuA,UC8JbbaZ_jgdsoUqrZ2bXtQQ,Nie dasz rady się nie zaśmiać - Lekko Stronnic...,24,2023-01-05,"Po prostu nie dasz rady, albowiem już na samym..."
68,MTfXN0NmyZ0,UC8JbbaZ_jgdsoUqrZ2bXtQQ,"Savoir-vivre w restauracji: co wypada, czego r...",24,2023-01-04,Jak się zachować w restauracji? Czy rozpocząć ...
69,zLy3RrU9cXU,UC8JbbaZ_jgdsoUqrZ2bXtQQ,Kilka słów o świętach bez religii w wykonaniu ...,24,2023-01-03,Ale prof. Marcin Matczak oczywiście nie zajmuj...


In [49]:
pd.DataFrame(videos_metrics)[['video_id', 'extract_date'] + VIDOS_METRICS]

Unnamed: 0,video_id,extract_date,viewCount,likeCount,commentCount
0,jYuKqJuRRl8,2023-04-11,18580,1543,66
1,qY3eJu0lmxg,2023-04-11,49812,2324,120
2,HXoQlB2nDmc,2023-04-11,53401,2246,127
3,AnZm8cgib8k,2023-04-11,59716,2692,114
4,WaY-WmdmLGk,2023-04-11,61807,2941,136
...,...,...,...,...,...
66,2XpZbJQY_Nw,2023-04-11,75179,3127,151
67,XchoV02lbuA,2023-04-11,82649,4359,234
68,MTfXN0NmyZ0,2023-04-11,79599,3378,98
69,zLy3RrU9cXU,2023-04-11,71095,2956,105
