In [158]:
import os
from googleapiclient.discovery import build
from google.oauth2 import service_account
import pandas as pd

In [159]:
api_key = 'AIzaSyCyETXDwxDF5tLb2A_n-oUr0lbE74JxXNA'

In [160]:
# Create a YouTube Data API client
youtube = build('youtube', 'v3', developerKey=api_key)

In [179]:
# Initialize variables for the initial request
playlist_id = 'PLpbom12S-UaJEDmUaFfWLws317OUKNceE' # Playlist ID
max_results = 50
page_token = None

# Initialize an empty list to store the data
data = []

while True:
    # Make an API request to retrieve playlist items
    request = youtube.playlistItems().list(
        part='snippet',
        maxResults=max_results,
        pageToken=page_token,
        playlistId=playlist_id
    )
    response = request.execute()

    # Process the response
    for item in response['items']:
        video_id = item['snippet']['resourceId']['videoId']
        video_response = youtube.videos().list(
            part='snippet,statistics,contentDetails',
            id=video_id
        ).execute()

        title = item['snippet']['title']

        video_items = video_response.get('items', [])  # Get the 'items' list from the response, or an empty list if it's missing
        if video_items:
            likes = video_response['items'][0]['statistics']['likeCount']
            views = video_response['items'][0]['statistics']['viewCount']
            published_date = video_response['items'][0]['snippet']['publishedAt']
            # Extract the average view duration in ISO 8601 format and convert it to seconds
            avg_view_duration_iso = video_response['items'][0]['contentDetails']['duration']
            avg_view_duration_seconds = pd.to_timedelta(avg_view_duration_iso).total_seconds()  
        else:
            likes = 0
            views = 0
            published_date = "NaN"
            avg_view_duration_seconds = 0

        data.append({
            'Title': title,
            'Likes': likes,
            'Views': views,
            'PublishedDate': published_date,
            'AvgViewDuration': avg_view_duration_seconds
        })

    # Add the current response data to the list
    data.extend(response['items'])

    # Check if there are more pages to retrieve
    page_token = response.get('nextPageToken')
    if not page_token:
        break

In [175]:
# Create a Pandas DataFrame
df = pd.DataFrame(data)

# Clean, format and convert
df["Likes"] = df["Likes"].fillna(value=0)
df['Likes'] = df['Likes'].astype(int)
df["Views"] = df["Views"].fillna(value=0)
df['Views'] = df['Views'].astype(int)
df['PublishedDate'] = pd.to_datetime(df['PublishedDate']).dt.strftime('%Y-%m-%d')

# Add Asset column from title info
df['Asset'] = df['Title'].str.split(' - ').str[0].str.split(' \(').str[0]

# Add LikesToViewsRatio column for the ratio of likes to views
df['LikesToViewsRatio'] = df['Likes'] / df['Views']

# Auxiliary df
df0 = df.loc[:, ['Asset','PublishedDate','Likes','Views','LikesToViewsRatio']]

ValueError: time data "0" doesn't match format "%Y-%m-%dT%H:%M:%S%z", at position 376. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.

In [None]:

df1 = df0.loc[:,['Asset','Likes','Views']].groupby('Asset').sum().sort_values(by='Likes', ascending=False).reset_index()

In [163]:
df2 = df0.groupby('PublishedDate').sum().reset_index()

In [None]:
# Save the DataFrame to a CSV file
df.to_csv('playlist_data.csv', index=False)

In [177]:
df

Unnamed: 0,Title,Likes,Views,PublishedDate,AvgViewDuration,kind,etag,id,snippet
0,KFOF11 - Resumo Mensal do Kinea FOF FII ref. J...,151,1079,2023-07-03T14:50:41Z,310.0,,,,
1,KNRI11 - Resumo Mensal do Kinea Renda ref. Jun...,365,26534,2023-07-03T12:32:37Z,518.0,,,,
2,Fundos Multimercados Kinea - Resumo Mensal ref...,182,2823,2023-06-29T21:00:24Z,316.0,,,,
3,KNRI11 - Resumo Mensal do Kinea Renda ref. Mai...,419,28498,2023-06-01T19:11:08Z,500.0,,,,
4,KFOF11 - Resumo Mensal do Kinea FOF FII ref. M...,59,393,2023-06-01T14:58:03Z,290.0,,,,
...,...,...,...,...,...,...,...,...,...
771,,0,0,,,youtube#playlistItem,IIvxbuOndWt03gUC-a5HZoJA1Qk,UExwYm9tMTJTLVVhSkVEbVVhRmZXTHdzMzE3T1VLTmNlRS...,"{'publishedAt': '2020-02-19T13:40:48Z', 'chann..."
772,,0,0,,,youtube#playlistItem,G-dwQ-ZlDS5G5FYGkDlc6XBfd2c,UExwYm9tMTJTLVVhSkVEbVVhRmZXTHdzMzE3T1VLTmNlRS...,"{'publishedAt': '2020-02-04T19:18:08Z', 'chann..."
773,,0,0,,,youtube#playlistItem,4oN6GLg5G_nehot2zI1ksi_kAFo,UExwYm9tMTJTLVVhSkVEbVVhRmZXTHdzMzE3T1VLTmNlRS...,"{'publishedAt': '2020-02-19T13:34:26Z', 'chann..."
774,,0,0,,,youtube#playlistItem,fzwRD_uHlh5_Who9pjTigk2P2a8,UExwYm9tMTJTLVVhSkVEbVVhRmZXTHdzMzE3T1VLTmNlRS...,"{'publishedAt': '2020-02-04T12:16:36Z', 'chann..."
