In [None]:
import os
import pandas as pd
from tqdm import tqdm
from googleapiclient.discovery import build
from google.cloud import bigquery

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "C:/Users/tanju/Desktop/upheld-momentum-463013-v7-60b336c3c385.json"

PROJECT_ID = "upheld-momentum-463013-v7"
DATASET_ID = "dbt_tdereli"
TARGET_DATASET = "dbt_tdereli"
SOURCE_TABLE = "stg_youtube_trending"
DEST_TABLE = "channel_info_enriched" 


YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)

client = bigquery.Client(project=PROJECT_ID)


query = f"""
    SELECT DISTINCT video_id
    FROM `{PROJECT_ID}.{DATASET_ID}.{SOURCE_TABLE}`
    WHERE video_id IS NOT NULL
"""
video_ids = [row["video_id"] for row in client.query(query)]
print(f"Found {len(video_ids)} unique video IDs.")


def fetch_channel_info(batch_ids):
    try:
        response = youtube.videos().list(
            part="snippet",
            id=",".join(batch_ids)
        ).execute()
        result = []
        for item in response.get("items", []):
            result.append({
                "video_id": item["id"],
                "channel_id": item["snippet"]["channelId"],
                "channel_title": item["snippet"]["channelTitle"],
            })
        return result
    except Exception as e:
        print(f"Error fetching batch: {e}")
        return []


channel_info = []
BATCH_SIZE = 50
for i in tqdm(range(0, len(video_ids), BATCH_SIZE), desc="Fetching channel info"):
    batch = video_ids[i:i + BATCH_SIZE]
    channel_info.extend(fetch_channel_info(batch))

print(f"Successfully enriched {len(channel_info)} videos.")


df = pd.DataFrame(channel_info)

job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",
    schema=[
        bigquery.SchemaField("video_id", "STRING"),
        bigquery.SchemaField("channel_id", "STRING"),
        bigquery.SchemaField("channel_title", "STRING"),
    ]
)

table_ref = f"{PROJECT_ID}.{TARGET_DATASET}.{DEST_TABLE}"
load_job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
load_job.result()

print(f"Uploaded enriched data to BigQuery table `{DEST_TABLE}`.")

Found 6579 unique video IDs.


Fetching channel info: 100%|██████████| 132/132 [00:19<00:00,  6.84it/s]


Successfully enriched 6549 videos.
Uploaded enriched data to BigQuery table `channel_info_enriched`.
