In [None]:
!pip install google-api-python-client
!pip install isodate

Collecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: isodate
Successfully installed isodate-0.6.1
Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytube
Successfully installed pytube-15.0.0


In [None]:
from googleapiclient.discovery import build
import datetime
import pandas as pd
import re
import requests
import json
import isodate

In [None]:
def get_channel_stats(youtube, channel_id):
  request = youtube.channels().list(
      part = 'snippet,contentDetails,statistics',
      id = channel_id
  )
  response = request.execute()
  data = dict(
      Channel_name = response['items'][0]['snippet']['title'],
      Subscribers = response['items'][0]['statistics']['subscriberCount'],
      Views = response['items'][0]['statistics']['viewCount'],
      Total_videos = response['items'][0]['statistics']['videoCount'],
      Playlist_id = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
      )
  return data

In [None]:
def get_video_ids(api_key, channel_id, start_date, end_date):
  youtube = build('youtube', 'v3', developerKey=api_key)

  start_date_formatted = start_date.strftime('%Y-%m-%dT%H:%M:%SZ')
  end_date_formatted = end_date.strftime('%Y-%m-%dT%H:%M:%SZ')

  video_ids = []
  next_page_token = None

  while True:
    search_response = youtube.search().list(
        part = 'id',
        channelId = channel_id,
        maxResults = 6000,
        order='date',
        publishedAfter=start_date_formatted,
        publishedBefore=end_date_formatted,
        type='video',
        pageToken=next_page_token
    ).execute()

    for item in search_response['items']:
      video_ids.append(item['id']['videoId'])

    next_page_token = search_response.get('nextPageToken')
    if not next_page_token:
      break

  return video_ids

In [None]:
def get_video_duration(api_key, video_id):
    youtube = build('youtube', 'v3', developerKey=api_key)

    response_duration = youtube.videos().list(
        part='contentDetails',
        id=video_id
    ).execute()

    try:
        duration_iso = response_duration['items'][0]['contentDetails']['duration']
        duration = conversion_time(duration_iso)
        return duration
    except IndexError:
        return 'Video ID tidak valid'
    except KeyError:
        return 'Tidak dapat memperoleh durasi video'

In [None]:
def conversion_time(duration_iso):
    duration_iso = duration_iso[2:]
    hours = 0
    minutes = 0
    seconds = 0

    if 'H' in duration_iso:
        hours_match = re.search(r'(\d+)H', duration_iso)
        hours = int(hours_match.group(1))
        duration_iso = duration_iso.replace(hours_match.group(0), '')

    if 'M' in duration_iso:
        minutes_match = re.search(r'(\d+)M', duration_iso)
        minutes = int(minutes_match.group(1))
        duration_iso = duration_iso.replace(minutes_match.group(0), '')

    if 'S' in duration_iso:
        seconds_match = re.search(r'(\d+)S', duration_iso)
        seconds = int(seconds_match.group(1))

    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"

In [None]:
def get_video_details(youtube, video_ids):
  all_videos = []
  for i in range(0, len(video_ids), 50):
    request = youtube.videos().list(
        part = 'snippet, contentDetails, statistics',
        id = ','.join(video_ids[i:i+50])
    )
    #duration = get_video_duration(api_key, video)
    response = request.execute()
    for video in response['items']:
      video_stats = dict(
          Titles = video['snippet']['title'],
          Publish = video['snippet']['publishedAt'],
          Views = video['statistics']['viewCount'],
          Like = video['statistics']['likeCount'],
          Comment = video['statistics']['commentCount']
      )
      all_videos.append(video_stats)
  return all_videos

In [None]:
api_key = '[YOUR API KEY]'
channel_id = '[CHANNEL ID]'

youtube = build('youtube', 'v3', developerKey=api_key)

In [None]:
channel_statistics = get_channel_stats(youtube, channel_id)
channel_statistics

In [None]:
Playlist_id = channel_statistics['Playlist_id']
Playlist_id

In [None]:
#start_date = datetime.datetime(2023, 8, 1)
#end_date = datetime.datetime(2023, 8, 2)

#video_ids = get_video_ids(api_key, channel_id, start_date, end_date)
#jumlah_video = print('jumlah video:',len(video_ids))
#video_ids

video_ids = []
start_date = datetime.datetime(2023, 7, 1)
end_date = datetime.datetime(2023, 8, 1)

current_date = start_date
while current_date <= end_date:
  end_date_day = current_date + datetime.timedelta(days=1)
  video_ids_per_day = get_video_ids(api_key, channel_id, current_date, end_date_day)
  #video_ids_per_day
  print('Tanggal:', current_date, '|', 'Jumlah Video:', len(video_ids_per_day))
  video_ids.extend(video_ids_per_day)
  current_date += datetime.timedelta(days=1)

print('')
print('Total Video Keseluruhan:', len(video_ids))

In [None]:
video_details = get_video_details(youtube, video_ids)
Duration = []

for id in video_ids:
  duration = get_video_duration(api_key, id)
  Duration.append(duration)

for i, video in enumerate(video_details):
  video['Duration'] = Duration[i]

In [None]:
video_data = pd.DataFrame(video_details)
video_data['Publish'] = pd.to_datetime(video_data['Publish']).dt.date
video_data

In [None]:
video_data.to_csv('31.csv', index=False)

In [None]:
duplikat = video_data.duplicated()

print(duplikat)

In [None]:
nama_duplikat = video_data.duplicated(subset=['Titles'], keep=False)

# Menampilkan data dengan nama duplikat
data_nama_duplikat = pd.DataFrame(video_data[nama_duplikat])
data_nama_duplikat