# populate list of youtube videos from political candidates

### Setting up the Pandas DataFrame
You can start by importing the necessary libraries and initializing an empty DataFrame with the required columns.

In [9]:
import pandas as pd

# Initialize an empty DataFrame
df = pd.DataFrame(columns=[
    "political_candidate", 
    "youtube_channel", 
    "video_url", 
    "video_title", 
    "video_description",
    "video_duration",
    "video_publishing_date",
    "status",
])

### Fetching video details for each candidate
You can use the functions mentioned in the previous answer to fetch video details for a given channel URL. You would loop through each candidate and their respective channel URLs to populate the DataFrame.

In [19]:
import pandas as pd
import requests

# YouTube API functions
def get_channel_id(api_key, channel_url):
    # Extract potential identifiers from the URL, remove '@' character
    identifier = channel_url.split('/')[-1].replace('@', '')
    
    # Try fetching channel ID using "forUsername" parameter
    url = f'https://www.googleapis.com/youtube/v3/channels?part=id&forUsername={identifier}&key={api_key}'
    response = requests.get(url).json()
    
    if 'items' in response and response['items']:
        return response['items'][0]['id']
    
    # If "forUsername" failed, try using "id" parameter (assuming identifier is a channel ID)
    url = f'https://www.googleapis.com/youtube/v3/channels?part=id&id={identifier}&key={api_key}'
    response = requests.get(url).json()
    
    if 'items' in response and response['items']:
        return response['items'][0]['id']
    
    # Try with custom URL format (assuming identifier is a custom URL username)
    url = f'https://www.googleapis.com/youtube/v3/channels?part=id&forUsername=c/{identifier}&key={api_key}'
    response = requests.get(url).json()
    
    if 'items' in response and response['items']:
        return response['items'][0]['id']
    
    # If all methods failed, print error and return None
    print(f"Error fetching channel ID for {identifier}. API Response: {response}")
    return None



def get_video_ids(api_key, channel_id, max_results=50):
    url = f'https://www.googleapis.com/youtube/v3/search?key={api_key}&channelId={channel_id}&part=id&order=date&maxResults={max_results}'
    response = requests.get(url).json()
    video_ids = [item['id']['videoId'] for item in response['items'] if 'videoId' in item['id']]
    return video_ids

def get_video_details(api_key, video_ids):
    video_details_list = []
    video_ids_str = ','.join(video_ids)
    url = f'https://www.googleapis.com/youtube/v3/videos?key={api_key}&id={video_ids_str}&part=snippet,contentDetails'
    response = requests.get(url).json()
    for item in response['items']:
        video_url = f'https://www.youtube.com/watch?v={item["id"]}'
        title = item['snippet']['title']
        description = item['snippet']['description']
        published_at = item['snippet']['publishedAt']
        duration = item['contentDetails']['duration']
        video_details_list.append({
            'video_url': video_url,
            'video_title': title,
            'video_description': description,
            'video_duration': duration,
            'video_publishing_date': published_at
        })
    return video_details_list

# Main function to get video details for a list of channels for a candidate
def get_candidate_videos(api_key, candidate, channels):
    rows = []
    for channel in channels:
        channel_id = get_channel_id(api_key, channel)
        if not channel_id:
            print(f"Skipping channel {channel} for {candidate} due to error.")
            continue  # Skip to the next channel if there's an error
        videos = get_video_details(api_key, get_video_ids(api_key, channel_id))
        for video in videos:
            video['political_candidate'] = candidate
            video['youtube_channel'] = channel
            video['video_impressions'] = None  # You can fill this data later
            video['status'] = None  # You can fill this data later
            rows.append(video)
    return rows


In [20]:
# Your input
api_key = 'youtube_data_api'  # Replace with your API key
candidates_channels = {
    'javier_milei': [
        'https://www.youtube.com/@MILEIPRESIDENTE', 'https://www.youtube.com/@ElPelucaMilei'],  # Replace with Javier's YouTube channels
    'patricia_bullrich': [
        'https://www.youtube.com/@lanacion',
        'https://www.youtube.com/todonoticias',
        'https://www.youtube.com/@Infobae'],  # Replace with Patricia's YouTube channels
    'sergio_massa': [
        'https://www.youtube.com/@diariok']  # Replace with Sergio's YouTube channels
}

In [21]:
# Fetching the data and creating the DataFrame
rows = []
for candidate, channels in candidates_channels.items():
    rows.extend(get_candidate_videos(api_key, candidate, channels))
df = pd.DataFrame(rows)

Error fetching channel ID for MILEIPRESIDENTE. API Response: {'kind': 'youtube#channelListResponse', 'etag': 'RuuXzTIr0OoDqI4S0RU6n4FqKEM', 'pageInfo': {'totalResults': 0, 'resultsPerPage': 5}}
Skipping channel https://www.youtube.com/@MILEIPRESIDENTE for javier_milei due to error.
Error fetching channel ID for ElPelucaMilei. API Response: {'kind': 'youtube#channelListResponse', 'etag': 'RuuXzTIr0OoDqI4S0RU6n4FqKEM', 'pageInfo': {'totalResults': 0, 'resultsPerPage': 5}}
Skipping channel https://www.youtube.com/@ElPelucaMilei for javier_milei due to error.
Error fetching channel ID for lanacion. API Response: {'kind': 'youtube#channelListResponse', 'etag': 'RuuXzTIr0OoDqI4S0RU6n4FqKEM', 'pageInfo': {'totalResults': 0, 'resultsPerPage': 5}}
Skipping channel https://www.youtube.com/@lanacion for patricia_bullrich due to error.
Error fetching channel ID for diariok. API Response: {'kind': 'youtube#channelListResponse', 'etag': 'RuuXzTIr0OoDqI4S0RU6n4FqKEM', 'pageInfo': {'totalResults': 0, 

In [None]:
# Export to CSV
df.to_csv('political_candidates_videos.csv', index=False)