In [1]:
from googleapiclient.discovery import build
import api_config as conf

In [2]:
# # Create a YouTube API client
# youtube = build("youtube", "v3", developerKey=conf.YOUTUBE_API_KEY)

# def search_song(song_name):
#     """Search for a song on YouTube and get the first result."""
#     request = youtube.search().list(
#         q=song_name,
#         part="snippet",
#         maxResults=1,  # Get only the first result
#         type="video"  # Only search for videos (not channels or playlists)
#     )
#     response = request.execute()
    
#     if "items" not in response or not response["items"]:
#         print("No results found.")
#         return None

#     first_result = response["items"][0]
#     video_id = first_result["id"]["videoId"]
#     video_title = first_result["snippet"]["title"]
#     channel_title = first_result["snippet"]["channelTitle"]
#     publish_date = first_result["snippet"]["publishedAt"]

#     return {
#         "video_id": video_id,
#         "title": video_title,
#         "artist": channel_title,
#         "release_date": publish_date
#     }

# def get_video_details(video_id):
#     """Get detailed statistics for a video by ID."""
#     request = youtube.videos().list(
#         part="statistics",
#         id=video_id
#     )
#     response = request.execute()
    
#     if "items" not in response or not response["items"]:
#         print("No video details found.")
#         return None
    
#     stats = response["items"][0]["statistics"]
#     return {
#         "views": stats.get("viewCount", "N/A"),
#         "likes": stats.get("likeCount", "N/A"),
#         "comments": stats.get("commentCount", "N/A")
#     }

# # Example usage:
# song_query = "Starboy"  # Change this to the song you want
# song_info = search_song(song_query)

# if song_info:
#     print("🎵 Song Information:")
#     print(song_info)
    
#     video_stats = get_video_details(song_info["video_id"])
#     if video_stats:
#         song_info.update(video_stats)

#     print("\n📊 Video Statistics:")
#     print(song_info)

In [3]:
import pandas as pd
import time
import json
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import api_config as conf

youtube = build("youtube", "v3", developerKey=conf.YOUTUBE_API_KEY)

# Load song names from DataFrame (Assume you already have df with 'song_name' column)
df = pd.read_csv("D:\git_repository\dataEn_final_project\collect_data\datasets\spotify.csv")
# df = pd.DataFrame({"song_name": ["Shape of You Ed Sheeran", "Blinding Lights The Weeknd", "Dance Monkey Tones and I"]})

# Storage for results
results = []

# Function to search for a song and get video details
def get_song_details(song_name):
    """Search for a song on YouTube and retrieve video details."""
    try:
        # Search for the song
        search_response = youtube.search().list(
            q=song_name,
            part="snippet",
            maxResults=1,
            type="video"
        ).execute()
        
        if "items" not in search_response or not search_response["items"]:
            return {"song_name": song_name, "error": "No results found"}
        
        first_result = search_response["items"][0]
        video_id = first_result["id"]["videoId"]
        video_title = first_result["snippet"]["title"]
        channel_title = first_result["snippet"]["channelTitle"]
        publish_date = first_result["snippet"]["publishedAt"]

        # Fetch video statistics
        video_response = youtube.videos().list(
            part="statistics",
            id=video_id
        ).execute()
        
        if "items" not in video_response or not video_response["items"]:
            return {"song_name": song_name, "error": "No video details found"}
        
        stats = video_response["items"][0]["statistics"]

        return {
            "song_name": song_name,
            "video_id": video_id,
            "title": video_title,
            "artist": channel_title,
            "release_date": publish_date,
            "views": stats.get("viewCount", "N/A"),
            "likes": stats.get("likeCount", "N/A"),
            "comments": stats.get("commentCount", "N/A"),
            "error": None
        }
    
    except HttpError as e:
        if e.resp.status == 403:  # API quota exceeded
            print("🚨 API quota exceeded. Saving progress...")
            save_progress()
            return None  # Stop execution so you can continue later
        else:
            return {"song_name": song_name, "error": f"API error: {str(e)}"}

    except Exception as e:
        return {"song_name": song_name, "error": f"Unexpected error: {str(e)}"}

# Function to save progress
def save_progress():
    """Save current results to a CSV file."""
    progress_df = pd.DataFrame(results)
    progress_df.to_csv("youtube_song_data.csv", index=False)
    print("✅ Progress saved to 'youtube_song_data.csv'")

# Load previous progress if available
try:
    previous_results = pd.read_csv("youtube_song_data.csv")
    completed_songs = set(previous_results["song_name"])
    results = previous_results.to_dict("records")  # Restore previous results
    print(f"🔄 Loaded previous results: {len(completed_songs)} songs")
except FileNotFoundError:
    completed_songs = set()

# Query all songs
for index, row in df.iterrows():
    song_name = row["track_name"]
    
    if song_name in completed_songs:
        print(f"⏩ Skipping '{song_name}' (already processed)")
        continue  # Skip already processed songs

    print(f"🔍 Searching for '{song_name}'...")
    song_data = get_song_details(song_name)

    if song_data is None:
        print("🚨 API quota exceeded. Stopping execution.")
        break  # Stop querying when quota is exceeded

    results.append(song_data)
    time.sleep(0.5)  # Prevent hitting API rate limits

# Save progress at the end
save_progress()

# Convert results to DataFrame
youtube_df = pd.DataFrame(results)



🔄 Loaded previous results: 2574 songs
⏩ Skipping 'When I'm With You' (already processed)
⏩ Skipping 'Moonlit Floor (Kiss Me)' (already processed)
⏩ Skipping 'Who' (already processed)
⏩ Skipping 'New Woman (feat. ROSALÍA)' (already processed)
⏩ Skipping 'Die With A Smile' (already processed)
⏩ Skipping 'BIRDS OF A FEATHER' (already processed)
⏩ Skipping 'Rockstar' (already processed)
⏩ Skipping '3D (feat. Jack Harlow)' (already processed)
⏩ Skipping 'Standing Next to You' (already processed)
⏩ Skipping 'Seven (feat. Latto) (Clean Ver.)' (already processed)
⏩ Skipping 'One Of The Girls (with JENNIE, Lily Rose Depp)' (already processed)
⏩ Skipping 'Seven (feat. Latto)' (already processed)
⏩ Skipping 'Seven (feat. Latto) (Explicit Ver.)' (already processed)
⏩ Skipping 'That's What I Like' (already processed)
⏩ Skipping 'Cruel Summer' (already processed)
⏩ Skipping 'Snooze' (already processed)
⏩ Skipping 'we can't be friends (wait for your love)' (already processed)
⏩ Skipping 'Messy' (alre