In [4]:
import os
import pandas as pd
from tqdm import tqdm
from googleapiclient.discovery import build
from google.cloud import bigquery

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "C:/Users/tanju/Desktop/upheld-momentum-463013-v7-910c47d3ace5.json"

PROJECT_ID = "upheld-momentum-463013-v7"
DATASET_ID = "dbt_tdereli"
TARGET_DATASET = "dbt_tdereli"
SOURCE_TABLE = "stg_youtube_trending"
DEST_TABLE = "channel_info_enriched" 


YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)

client = bigquery.Client(project=PROJECT_ID)


query = f"""
    SELECT DISTINCT video_id
    FROM `{PROJECT_ID}.{DATASET_ID}.{SOURCE_TABLE}`
"""
video_ids = [row["video_id"] for row in client.query(query)]
print(f"Found {len(video_ids)} unique video IDs.")


def fetch_channel_info(batch_ids):
    try:
        response = youtube.videos().list(
            part="snippet",
            id=",".join(batch_ids)
        ).execute()

        returned_items = response.get("items", [])
        returned_ids = {item["id"] for item in returned_items}
        missing_ids = set(batch_ids) - returned_ids

        if missing_ids:
            print(f"[WARN] Missing video IDs from API: {missing_ids}")

        # Build a dict for quick lookup of returned videos
        item_lookup = {item["id"]: item for item in returned_items}
        
        result = []
        for vid in batch_ids:
            if vid in item_lookup:
                item = item_lookup[vid]
                result.append({
                    "video_id": vid,
                    "channel_id": item["snippet"]["channelId"],
                    "channel_title": item["snippet"]["channelTitle"],
                })
            else:
                # Still add the row with Nones so it's aligned
                result.append({
                    "video_id": vid,
                    "channel_id": None,
                    "channel_title": None,
                })

        return result

    except Exception as e:
        print(f"[ERROR] Failed fetching batch: {e}")
        return []




channel_info = []
BATCH_SIZE = 50
for i in tqdm(range(0, len(video_ids), BATCH_SIZE), desc="Fetching channel info"):
    batch = video_ids[i:i + BATCH_SIZE]
    channel_info.extend(fetch_channel_info(batch))

print(f"Successfully enriched {len(channel_info)} videos.")


df = pd.DataFrame(channel_info)




job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=[
        bigquery.SchemaField("video_id", "STRING"),
        bigquery.SchemaField("channel_id", "STRING"),
        bigquery.SchemaField("channel_title", "STRING"),
    ]
)

table_ref = f"{PROJECT_ID}.{TARGET_DATASET}.{DEST_TABLE}"
load_job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
load_job.result()

print(f"Uploaded enriched data to BigQuery table name `{DEST_TABLE}`.")

Found 8831 unique video IDs.


Fetching channel info:   4%|▍         | 7/177 [00:03<00:43,  3.87it/s]

[WARN] Missing video IDs from API: {'1XBMMwEpCeg'}


Fetching channel info:   6%|▌         | 11/177 [00:03<00:31,  5.35it/s]

[WARN] Missing video IDs from API: {'UmZYLCoPahE'}
[WARN] Missing video IDs from API: {'D7QxvWShyuY'}


Fetching channel info:   7%|▋         | 12/177 [00:04<00:32,  5.04it/s]

[WARN] Missing video IDs from API: {'qJR6-BsldSo'}


Fetching channel info:   9%|▉         | 16/177 [00:04<00:33,  4.84it/s]

[WARN] Missing video IDs from API: {'RiLyU_oXSUE'}


Fetching channel info:  14%|█▎        | 24/177 [00:06<00:30,  5.06it/s]

[WARN] Missing video IDs from API: {'KJqdNp9oMFQ'}


Fetching channel info:  14%|█▍        | 25/177 [00:06<00:30,  4.91it/s]

[WARN] Missing video IDs from API: {'iSvwbSI41XU'}


Fetching channel info:  15%|█▌        | 27/177 [00:07<00:29,  5.09it/s]

[WARN] Missing video IDs from API: {'0UTowVjDwgI'}


Fetching channel info:  20%|██        | 36/177 [00:08<00:22,  6.18it/s]

[WARN] Missing video IDs from API: {'XH8XSKQM7pI', 'P4xVWOF6FEE'}


Fetching channel info:  24%|██▎       | 42/177 [00:09<00:27,  4.90it/s]

[WARN] Missing video IDs from API: {'YdP1iNnxkQA'}


Fetching channel info:  25%|██▍       | 44/177 [00:10<00:26,  5.02it/s]

[WARN] Missing video IDs from API: {'LajIKPikBVs'}


Fetching channel info:  29%|██▉       | 51/177 [00:11<00:22,  5.49it/s]

[WARN] Missing video IDs from API: {'kQ_s4SgeYoU'}


Fetching channel info:  30%|██▉       | 53/177 [00:11<00:19,  6.30it/s]

[WARN] Missing video IDs from API: {'K9No0wRfS7E'}


Fetching channel info:  34%|███▍      | 60/177 [00:13<00:22,  5.29it/s]

[WARN] Missing video IDs from API: {'0gquxXZaTDw', 'BqLoczjNStQ'}


Fetching channel info:  37%|███▋      | 65/177 [00:14<00:20,  5.43it/s]

[WARN] Missing video IDs from API: {'osbpBeeXqD4'}


Fetching channel info:  46%|████▌     | 81/177 [00:17<00:19,  4.88it/s]

[WARN] Missing video IDs from API: {'Ba2_3bnnCyY'}


Fetching channel info:  47%|████▋     | 84/177 [00:17<00:18,  5.09it/s]

[WARN] Missing video IDs from API: {'SqadGUN6qa4'}


Fetching channel info:  49%|████▊     | 86/177 [00:18<00:17,  5.25it/s]

[WARN] Missing video IDs from API: {'VQ7YLPVDnbE'}


Fetching channel info:  53%|█████▎    | 93/177 [00:19<00:15,  5.43it/s]

[WARN] Missing video IDs from API: {'R0v-v3rd3VY'}


Fetching channel info:  55%|█████▍    | 97/177 [00:20<00:12,  6.51it/s]

[WARN] Missing video IDs from API: {'0eQhmLde4LI'}
[WARN] Missing video IDs from API: {'QCcacnK51cg'}


Fetching channel info:  59%|█████▉    | 104/177 [00:22<00:26,  2.79it/s]

[WARN] Missing video IDs from API: {'SmNWm4v1yWA'}


Fetching channel info:  63%|██████▎   | 112/177 [00:24<00:14,  4.50it/s]

[WARN] Missing video IDs from API: {'45PFcl61zx0'}


Fetching channel info:  69%|██████▉   | 123/177 [00:26<00:09,  5.55it/s]

[WARN] Missing video IDs from API: {'bRyFVGc-hPg'}


Fetching channel info:  71%|███████   | 125/177 [00:27<00:08,  6.19it/s]

[WARN] Missing video IDs from API: {'w1OQB0TaARw'}


Fetching channel info:  73%|███████▎  | 129/177 [00:27<00:09,  4.96it/s]

[WARN] Missing video IDs from API: {'Se5nggefysA'}


Fetching channel info:  74%|███████▍  | 131/177 [00:28<00:09,  4.94it/s]

[WARN] Missing video IDs from API: {'dRQowMTZvHA'}
[WARN] Missing video IDs from API: {'C7WoHk15jg4'}


Fetching channel info:  80%|████████  | 142/177 [00:30<00:06,  5.59it/s]

[WARN] Missing video IDs from API: {'ZSn_TJedRbg'}


Fetching channel info:  81%|████████▏ | 144/177 [00:30<00:05,  5.90it/s]

[WARN] Missing video IDs from API: {'W4yKzGxtseE'}


Fetching channel info:  83%|████████▎ | 147/177 [00:31<00:04,  6.08it/s]

[WARN] Missing video IDs from API: {'X6Qd3CXVBk4'}


Fetching channel info:  84%|████████▎ | 148/177 [00:31<00:05,  4.88it/s]

[WARN] Missing video IDs from API: {'VJ5fUaazxQk'}


Fetching channel info:  85%|████████▍ | 150/177 [00:31<00:05,  4.93it/s]

[WARN] Missing video IDs from API: {'4oB2uiRKC_4'}


Fetching channel info:  86%|████████▋ | 153/177 [00:32<00:04,  5.44it/s]

[WARN] Missing video IDs from API: {'Pmprf9uXJa4'}


Fetching channel info:  90%|█████████ | 160/177 [00:33<00:03,  5.01it/s]

[WARN] Missing video IDs from API: {'9qdqkpNO_I8'}


Fetching channel info:  92%|█████████▏| 162/177 [00:34<00:02,  5.18it/s]

[WARN] Missing video IDs from API: {'ybtHW-5hIPw'}
[WARN] Missing video IDs from API: {'E7IQmjWuJnE'}


Fetching channel info:  93%|█████████▎| 165/177 [00:34<00:02,  5.49it/s]

[WARN] Missing video IDs from API: {'WPQIexo41lo'}
[WARN] Missing video IDs from API: {'qJHpux2dolU'}


Fetching channel info:  99%|█████████▉| 175/177 [00:36<00:00,  4.92it/s]

[WARN] Missing video IDs from API: {'Ww7Y_uNTFPQ'}


Fetching channel info: 100%|██████████| 177/177 [00:37<00:00,  4.77it/s]


Successfully enriched 8831 videos.
Uploaded enriched data to BigQuery table name `channel_info_enriched`.
