In [39]:
from googleapiclient.discovery import build
import pandas as pd
import seaborn as sns
import os
from apiclient.discovery import build

In [40]:
# Retrieve the API key from the environment
api_key = os.environ.get('YOUTUBE_API')
youtube = build('youtube','v3', developerKey = api_key)

In [54]:
## Function to get channel data

def get_data(youtube, channel_ids):

    channel_data = []

    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id= ','.join(channel_ids)  #convert list of IDs into string
    )
    response = request.execute()

    for i in range(len(response['items'])):
        country = response['items'][i]['snippet'].get('country')  # Get the country value or None if missing
        data = dict(
            Channel_name=response['items'][i]['snippet']['title'],
            Country=country,
            Subscriber=response['items'][i]['statistics']['subscriberCount'],
            View=response['items'][i]['statistics']['viewCount'],
            TotalVDO=response['items'][i]['statistics']['videoCount'],
            Published=response['items'][i]['snippet']['publishedAt'],
            Description=response['items'][i]['snippet']['description']
        )
        channel_data.append(data)

    return channel_data

In [56]:
yt_stat = get_data(youtube,channel_ids)

In [50]:
## Global media channels

channel_ids = ['UC16niRr50-MSBwiO3YDb3RA',#BBC
               'UCupvZG-5ko_eiXAupbDfxWw',#CNN
               'UCIALMKvObZNtJ6AmdCLP7Lg',#Bloomberg
               'UChqUTb7kYRX8-EiaN3XFrSQ', #Reuters
               'UCK7tptUDHh-RYDsdxO1-5QQ', #WSJ
               'UCNye-wNBqNL5ZzHSJj3l8Bg',#aljazeeraenglish
               'UCZFMm1mMw0F81Z37aaEzTUA', #NDTV
               'UCIvaYmXn910QMdemBG3v1pQ',#Zee News
               'UCSPEjw8F2nQDtmUKPFNF7_A',#NHKWORLDJAPAN
               'UCo_QfGG3FlWVh3Z0AoxuwBA',#thejapantimes
               'UCgrNz-aDmcr2uuto8_DL2jg', #CGTN
               'UCahujLjSL34EPNxtwKRi_vg', #chinadaily6722
               'UC5BMQOsAB8hKUyHu9KI6yig', #kbsworldtv
               'UCzznO4xSV8BKnUBPyswtCUw', #ArirangCoKrArirangNEWS
               'UC83jt4dlz1Gjl58fzQrrKZg', #ChannelNewsAsia
               'UC4p_I9eiRewn2KoU-nawrDg', #straitstimesonline
               'UCpWvshQVx1d7BqCsPnVuNIw', #theStarOnline
               'UCk1v3FzlMu3r34LYgoHpH2w', #TheStandardNews
               'UCrFDdD-EE05N7gjwZho2wqw' #thairathonline

]

# Import JSON

In [70]:
def save_data(title, data):
    with open(title,'w', encoding='utf-8') as f:
        json.dump(data,f, ensure_ascii=False, indent=2)

In [69]:
import json

def load_data(title):
    with open(title, encoding='utf-8') as f:
        return json.load(f)

In [71]:
save_data('media_data.json', yt_stat)


## Load data from JSON

In [72]:
yt_stat = load_data('media_data.json')

In [77]:
from datetime import datetime

## Create DataFrame

In [57]:
yt_data = pd.DataFrame(yt_stat)

In [58]:
yt_data.head()

Unnamed: 0,Channel_name,Country,Subscriber,View,TotalVDO,Published,Description
0,NHK WORLD-JAPAN,JP,2500000,665586803,2287,2007-03-11T09:01:54Z,NHK WORLD -JAPAN is the international broadcas...
1,Arirang News,KR,642000,291953075,145014,2009-06-22T06:12:43Z,Arirang NEWS delivers the latest news on Natio...
2,Wall Street Journal,US,4600000,1643492294,27087,2007-06-18T18:06:47Z,The new WSJ Video takes you inside carefully s...
3,KBS WORLD TV,KR,19100000,15333328534,65536,2007-01-06T03:50:16Z,KBS WORLD TV is a television channel for inter...
4,Thairath Online,TH,16600000,14976372442,249760,2010-09-27T03:41:29Z,ไทยรัฐออนไลน์ & ไทยรัฐทีวี ช่อง 32 นำเสนอข่าวส...


In [64]:
print(yt_data.dtypes)

Channel_name    object
Country         object
Subscriber      object
View            object
TotalVDO        object
Published       object
Description     object
dtype: object


## Data Cleaning
- Convert Channel_name, Country, Description to String
- Convert Subscriber, View, TotalVDO to int
- Convert Published to DateTime formate

In [66]:
yt_data['Subscriber'] = pd.to_numeric(yt_data['Subscriber'])
yt_data['View'] = pd.to_numeric(yt_data['View'])
yt_data['TotalVDO'] = pd.to_numeric(yt_data['TotalVDO'])
yt_data['Published'] = pd.to_datetime(yt_data['Published'])
yt_data['Channel_name'] = yt_data['Channel_name'].astype(str)
yt_data['Description'] = yt_data['Description'].astype(str)

In [67]:
print(yt_data.dtypes)

Channel_name                 object
Country                      object
Subscriber                    int64
View                          int64
TotalVDO                      int64
Published       datetime64[ns, UTC]
Description                  object
dtype: object
