In [4]:
import pandas as pd
import re
import requests
from datetime import datetime


API_KEY = "your api id"

file_path = r"C:\Users\hp\Downloads\youtube_channels_sample - Youtube.csv"
df = pd.read_csv(file_path)

youtube_urls = df["Youtube Urls"].dropna().tolist()


def get_channel_id(username):
    url = f"https://www.googleapis.com/youtube/v3/search?part=snippet&type=channel&q={username}&key={API_KEY}"
    response = requests.get(url)
    data = response.json()

    if "items" in data:
        return data["items"][0]["id"]["channelId"]
    return None


channel_ids = [get_channel_id(re.search(r"youtube\.com/@([\w-]+)", url).group(1)) if re.search(r"youtube\.com/@([\w-]+)", url) else None for url in youtube_urls]


def get_channel_stats(channel_id):
    url = f"https://www.googleapis.com/youtube/v3/channels?part=statistics&id={channel_id}&key={API_KEY}"
    response = requests.get(url)
    data = response.json()

    if "items" in data:
        stats = data["items"][0]["statistics"]
        return {
            "Channel ID": channel_id,
            "Subscribers": int(stats.get("subscriberCount", 0)),
            "Total Views": int(stats.get("viewCount", 0)),
            "Total Videos": int(stats.get("videoCount", 0)),
        }
    return None


def get_video_metrics(channel_id):
    url = f"https://www.googleapis.com/youtube/v3/search?part=snippet&channelId={channel_id}&order=date&type=video&maxResults=50&key={API_KEY}"
    response = requests.get(url)
    data = response.json()

    total_likes, total_comments, video_titles, upload_dates = 0, 0, [], []
    
    if "items" in data:
        for video in data["items"]:
            video_id = video["id"]["videoId"]
            video_titles.append(video["snippet"]["title"])
            upload_dates.append(video["snippet"]["publishedAt"])

           
            stats_url = f"https://www.googleapis.com/youtube/v3/videos?part=statistics&id={video_id}&key={API_KEY}"
            stats_response = requests.get(stats_url).json()
            if "items" in stats_response:
                stats = stats_response["items"][0]["statistics"]
                total_likes += int(stats.get("likeCount", 0))
                total_comments += int(stats.get("commentCount", 0))

    
    engagement_rate = ((total_likes + total_comments) / max(1, sum([get_channel_stats(channel_id)["Total Views"]]))) * 100


    upload_dates = [datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ") for date in upload_dates]
    months_active = max(1, (datetime.now() - min(upload_dates)).days // 30)
    upload_frequency = len(upload_dates) / months_active

    return {
        "Channel ID": channel_id,
        "Avg. Likes": total_likes // max(1, len(video_titles)),
        "Video Titles": video_titles[:5],  # Showing the top 5 latest titles
        "Engagement Rate": round(engagement_rate, 2),
        "Upload Frequency": round(upload_frequency, 2),
    }


channel_data = [get_channel_stats(channel_id) | get_video_metrics(channel_id) for channel_id in channel_ids if channel_id]


df_stats = pd.DataFrame(channel_data)


df_stats_sorted = df_stats.sort_values(by="Engagement Rate", ascending=False)

# Display the final ranked channels
print(df_stats_sorted)


                  Channel ID  Subscribers  Total Views  Total Videos  \
11  UCPgfM-dk3XAb4T3DtT6Nwsw      6590000   1044098971          1076   
4   UCFAr3FQxRhSzVNOD3vq1gMQ      1320000     59555654           443   
0   UCZDCqcWZVNyvTSsoc-chKJg      4480000    335273139           335   
16  UCdvOCtR3a9ICLAw0DD3DpXg      1870000    549808822           903   
3   UCsQ6_w5iUOUOMAUPclg2kcg       970000     69701225           306   
2   UCT2xzagsBVtStUCA2ulvKmQ      7010000   1001836732          1728   
6   UC2zYoCWdj3qLtAspeirqWUQ       777000      8067586           341   
19  UC9dyrsYEmD4iGJ8Oz1G5dpw     20300000   2026789478          1093   
1   UCr6hHjMNvgQxErQ1ZJV4drw      1130000    128336093          1413   
9   UCfj7SPM25A3RQSYE1YFPyVQ      1230000    107493838          1380   
7   UClmVVcYuH0ZPm3qsPm1TQIg      1670000     12694504           683   
18  UCWHCXSKASuSzao_pplQ7SPw      3010000    709571773          2951   
10  UCBM6AwaDl8uPVAN_BUIqPUw      5240000   1973200456          