In [5]:
import os
import re
import pandas as pd
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from oauth2client.tools import argparser
from datetime import datetime, timedelta

In [6]:
def parse_duration(duration):
    """Parse a duration string in ISO 8601 format and return the number of seconds."""
    match = re.match(r"PT(\d+H)?(\d+M)?(\d+S)?", duration)
    hours = int(match.group(1)[:-1]) if match.group(1) else 0
    minutes = int(match.group(2)[:-1]) if match.group(2) else 0
    seconds = int(match.group(3)[:-1]) if match.group(3) else 0
    return hours * 3600 + minutes * 60 + seconds

In [7]:
def get_time_gap_in_hours(time_str):
    time_obj = datetime.fromisoformat(time_str[:-1])  # Remove the 'Z' suffix
    now = datetime.now()
    time_gap_seconds = (now - time_obj).total_seconds()
    time_gap_hours = round(time_gap_seconds / 3600, 2)
    return time_gap_hours

In [9]:
def get_statics(datas,channelId,playlistName):
    category_id=[]
    views=[]
    likes=[]
    favorites = []
    comments=[]
    title=[]
    date=[]
    date_gap=[]
    duration=[]
    tag=[]
    id=[]

    for i in range(len(datas)):
        request=youtube.videos().list(
        part='snippet,statistics,contentDetails',
        id=datas['video_id'][i],
        maxResults=100)
        
        response=request.execute()
        id.append(datas['video_id'][i])
        
        if response['items']==[]:
            title.append('-')
            category_id.append('-')
            views.append('-')
            likes.append('-')
            favorites.append('-')
            comments.append('-')
            date.append('-')
            date_gap.append('-')
            duration.append('-')
            tag.append('-')
            
        else :
            title.append(response['items'][0]['snippet']['title'].replace(',',' ').replace('|',' '))
            category_id.append(response['items'][0]['snippet']['categoryId'])
            views.append(response['items'][0]['statistics']['viewCount'])
            if 'likeCount' in response['items'][0]['statistics']:
                likes.append(response['items'][0]['statistics']['likeCount'])
            else:
                likes.append('NaN')
            favorites.append(response['items'][0]['statistics']['favoriteCount'])
            if 'comments' in response['items'][0]['statistics']:
                comments.append(response['items'][0]['statistics']['commentCount'])
            else:
                comments.append('NaN')
            date.append(response['items'][0]['snippet']['publishedAt'])
            date_gap.append(get_time_gap_in_hours(response['items'][0]['snippet']['publishedAt']))
            duration.append(parse_duration(response['items'][0]['contentDetails']['duration']))
            if 'tags' in response['items'][0]['snippet']:
                tag.append(response['items'][0]['snippet']['tags'])
            else:
                tag.append('NaN')
            
        
    df=pd.DataFrame([id,title,category_id,views,likes,favorites,comments,date,date_gap,duration,tag]).T
    df.columns=['id','title','category_id','views','likes','favorites','comments','date','date_gap','duration','tags']
    df.sort_values(by=['date'],inplace=True)
    df.reset_index(drop=True,inplace=True)
    today = datetime.today()

    currentDay = today.strftime("%Y%m%d")

    df.to_csv(f'./DATA/videos/{channelId}_{playlistName}_{currentDay}.csv', sep=',', na_rep='NaN')
        

In [10]:
path = ".\DATA\ids\ed"
file_lst = os.listdir(path)
f = open("key.txt",'r')
DEVELOPER_KEY= f.readline()
f.close()

YOUTUBE_API_SERVICE_NAME='youtube'
YOUTUBE_API_VERSION='v3'

youtube=build(YOUTUBE_API_SERVICE_NAME,YOUTUBE_API_VERSION,developerKey=DEVELOPER_KEY)

for filename in file_lst:
    datas = pd.read_csv(f".\DATA\ids\ed\{filename}")
    print(filename)
    channelId, playlistName = filename[:-4].split('_')
    get_statics(datas,channelId, playlistName)


BANGTANTV_Run BTS! Episode 1~40.csv
BANGTANTV_Run BTS! Episode 41~80.csv
BANGTANTV_Run BTS! Episode 81~120.csv
EBS 키즈_곰디와 친구들.csv
JTBC Voyage_비정상회담.csv
KBS Joy_[무엇이든 물어보살] 신통방통 리얼 고민 해결쇼 2022.csv
MBC 미스터리_[어둑시니pick 시즌1].csv
MBC 미스터리_[어둑시니pick 시즌2].csv
odg_ODG X Artist.csv
tvN_강식당1.csv
tvN_파트너게임.csv
감스트_손흥민 키우기.csv
감스트_열끼니.csv
감스트_위닝2019.csv
강형욱의 보듬TV_견종백과.csv
공부왕찐천재 홍진경_수업시간.csv
곽튜브_세계여행(2021).csv
글자네_단퐁회.csv
김한강_김한강 시리즈.csv
꼰대희_밥묵자.csv
녹두로_마인크래프트.csv
녹두로_몬스터헌터 선브.csv
녹두로_테라리아.csv
녹두로_포켓몬스바.csv
달라스튜디오_네고왕.csv
대암씨_더포레스트.csv
대암씨_데빌메이크라이.csv
대암씨_돈스타브.csv
대암씨_로보토미.csv
대암씨_몬스터헌터 월드.csv
대암씨_새티스팩토리.csv
대암씨_슬라임랜처.csv
대암씨_테라리아.csv
대암씨_픽스아크.csv
대암씨_할로우나이트.csv
디글_사랑의불시착.csv
디글_악의꽃.csv
디글_호구들의감빵생활 레전드.csv
디글_호텔델루나.csv
딩고스토리_mbti love.csv
딩고스토리_라이크.csv
딩고스토리_썰스데이1.csv
딩고스토리_썰스데이2.csv
딩고스토리_썰스데이3.csv
딩고스토리_썸남.csv
딩고스토리_엘턴십.csv
명예훈장_단퐁회.csv
빠니보틀_아메리카여행.csv
빠니보틀_유라시아 여행.csv
빽능_X맨.csv
빽능_불타는 청춘.csv
빽능_패밀리가 떴다.csv
사피엔스 스튜디오_벌거벗은세계사.csv
사피엔스 스튜디오_벌거벗은한국사.csv
삼성전자 반도체 뉴스룸_S로그.csv
새덕후_야생동물구조센터.csv
수제비_두음쌤2