In [1]:
import os
import re
import pandas as pd
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from oauth2client.tools import argparser
from datetime import datetime, timedelta

In [2]:
def parse_duration(duration):
    """Parse a duration string in ISO 8601 format and return the number of seconds."""
    match = re.match(r"PT(\d+H)?(\d+M)?(\d+S)?", duration)
    hours = int(match.group(1)[:-1]) if match.group(1) else 0
    minutes = int(match.group(2)[:-1]) if match.group(2) else 0
    seconds = int(match.group(3)[:-1]) if match.group(3) else 0
    return hours * 3600 + minutes * 60 + seconds

In [3]:
def get_time_gap_in_minutes(time_str):
    time_obj = datetime.fromisoformat(time_str[:-1])  # Remove the 'Z' suffix
    now = datetime.now()
    time_gap_seconds = (now - time_obj).total_seconds()
    time_gap_minutes = round(time_gap_seconds / 60, 2)
    return time_gap_minutes

In [4]:
def get_statics(datas,channelId,playlistName):
    category_id=[]
    views=[]
    likes=[]
    favorites = []
    comments=[]
    title=[]
    date=[]
    duration=[]
    tag=[]
    id=[]

    for i in range(len(datas)):
        request=youtube.videos().list(
        part='snippet,statistics,contentDetails',
        id=datas['video_id'][i],
        maxResults=100)
        
        response=request.execute()
        id.append(datas['video_id'][i])
        
        if response['items']==[]:
            title.append('-')
            category_id.append('-')
            views.append('-')
            likes.append('-')
            favorites.append('-')
            comments.append('-')
            date.append('-')
            duration.append('-')
            tag.append('-')
          
        else :
            title.append(response['items'][0]['snippet']['title'].replace(',',' ').replace('|',' '))
            category_id.append(response['items'][0]['snippet']['categoryId'])
            views.append(response['items'][0]['statistics']['viewCount'])
            if 'likeCount' in response['items'][0]['statistics']:
                likes.append(response['items'][0]['statistics']['likeCount'])
            else:
                likes.append('NaN')
            favorites.append(response['items'][0]['statistics']['favoriteCount'])
            if 'comments' in response['items'][0]['statistics']:
                comments.append(response['items'][0]['statistics']['commentCount'])
            else:
                comments.append('NaN')
            date.append(response['items'][0]['snippet']['publishedAt'])
            duration.append(parse_duration(response['items'][0]['contentDetails']['duration']))
            if 'tags' in response['items'][0]['snippet']:
                tag.append(response['items'][0]['snippet']['tags'])
            else:
                tag.append('NaN')
            
        
    df=pd.DataFrame([id,title,category_id,views,likes,favorites,comments,date,duration,tag]).T
    df.columns=['id','title','category_id','views','likes','favorites','comments','date','duration','tags']
    df.sort_values(by=['date'],inplace=True)
    df.reset_index(drop=True,inplace=True)
    today = datetime.today()

    currentDay = today.strftime("%Y%m%d")

    df.to_csv(f'./DATA/videos/{channelId}_{playlistName}_{currentDay}.csv', sep=',', na_rep='NaN')
        

In [5]:
path = r".\DATA\ids\raw"
file_lst = os.listdir(path)
f = open("key.txt",'r')
DEVELOPER_KEY= f.readline()
f.close()

YOUTUBE_API_SERVICE_NAME='youtube'
YOUTUBE_API_VERSION='v3'

youtube=build(YOUTUBE_API_SERVICE_NAME,YOUTUBE_API_VERSION,developerKey=DEVELOPER_KEY)

for filename in file_lst:
    datas = pd.read_csv(f"{path}\{filename}")
    print(filename)
    channelId, playlistName = filename[:-4].split('_')
    get_statics(datas,channelId, playlistName)


AJ_minecraft.csv
Bangtan TV_슈취타.csv
Davidsbeenhere_Kenya.csv
Endless Adventure_50 state camper van road trip.csv
GAMERIOT_CRIME BOSS ROCKAY CITY.csv
GoodTimewithScar_Limited Life.csv
Grian_Limited Life.csv
ImDontaiGaming_Resident Evil 4.csv
Jet Lag_New Zealand.csv
Kara and Nate_Japan Travle Vlog.csv
KBS Kpop_돌박이일.csv
KBS Kpop_리무진서비스.csv
KBS KPOP_아이돌 인간극장.csv
KBS Kpop_은채의 스타일기.csv
MovieHQ_Diable 4.csv
MovieHQ_Hogwarts Legacy.csv
OOTB_전과자.csv
PearlscentMoon_Limited Life.csv
Samuel and Audrey_Renovating hotel in Argentina.csv
SmallishBeans_Limited Life.csv
TencentVideo_My Little Doctor.csv
TheRadBrad_Resident Evil 4.csv
TmarTn2_MLB 23.csv
Toogii_Cafeowner Simulator.csv
Toogii_Hogwarts Legacy.csv
UDONSOBA OSAKA NARA_Chinese.csv
UDONSOBA OSAKA NARA_OSAKA.csv
WildLens_Inda.csv
WIRED_Autocomplete Interview.csv
Zahait_Masterplan Tycoon.csv
강형욱의 보듬TV_견종백과.csv
겁도 없꾸라_겁도 없꾸라.csv
공부왕찐천재 홍진경_수업시간.csv
곽튜브_찐따록.csv
글자네_단퐁회.csv
김지윤의 지식play_국제정치.csv
김한강_김한강 시리즈.csv
꼰대희_밥묵자.csv
끄적끄적_끄적끄적.csv
낄낄상회_가졳같은회사.