## Youtube Crawler

In [1]:
!pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib
!pip install python-dotenv


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os
import google_auth_oauthlib.flow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from dotenv import load_dotenv
import pprint
pp = pprint.PrettyPrinter(indent=2, width=80)

In [3]:
def load_api_key():
    load_dotenv(verbose=True)
    return os.environ.get("YOUTUBE_API_KEY")

def get_video_stats(video_id, api_key):
    """
    Fetches statistics for a YouTube video including view count.
    
    Parameters:
        video_id (str): The YouTube video ID (found in the URL after 'v=')
        api_key (str): Your YouTube Data API key
    
    Returns:
        dict: Video statistics including viewCount
    """
    # Create a YouTube API service instance
    youtube = build('youtube', 'v3', developerKey=api_key)
    
    try:
        # Make an API request to get video statistics
        request = youtube.videos().list(
            part='statistics,snippet,status',
            id=video_id  # The video ID we want to analyze
        )
        
        # Execute the request
        response = request.execute()
        
        # Extract and return the statistics
        if response:  # Check if we got any results
            video_data = response['items']
            #return return_detailed_stats(video_data)
            return video_data
        else:
            return None
            
    except HttpError as e:
        print(f'An HTTP error {e.resp.status} occurred: {e.content}')
        return None

def return_detailed_stats(video_data):
     
     detailed_stats = {
                # Basic statistics
                'viewCount': int(video_data['statistics'].get('viewCount', 0)),
                'likeCount': int(video_data['statistics'].get('likeCount', 0)),
                'commentCount': int(video_data['statistics'].get('commentCount', 0)),
                'favoriteCount': int(video_data['statistics'].get('favoriteCount', 0)),
                
                # Video metadata from snippet
                'title': video_data['snippet'].get('title', 'N/A'),
                'publishedAt': video_data['snippet'].get('publishedAt', 'N/A'),
                'channelTitle': video_data['snippet'].get('channelTitle', 'N/A'),
                'tags': video_data['snippet'].get('tags', []),
                'categoryId': video_data['snippet'].get('categoryId', 'N/A'),
                
                # Content details
                'duration': video_data['contentDetails'].get('duration', 'N/A'),
                'definition': video_data['contentDetails'].get('definition', 'N/A'),  # HD or SD
                'caption': video_data['contentDetails'].get('caption', 'N/A')  # Has captions?
            }
     return detailed_stats

def print_detailed_stats(stats):     
    print("\n=== Video Statistics ===")
    print(f"Views: {stats['viewCount']:,}")
    print(f"Likes: {stats['likeCount']:,}")
    print(f"Comments: {stats['commentCount']:,}")
    print(f"Favorites: {stats['favoriteCount']:,}")
    
    print("\n=== Video Details ===")
    print(f"Title: {stats['title']}")
    print(f"Channel: {stats['channelTitle']}")
    print(f"Published: {stats['publishedAt']}")
    print(f"Duration: {stats['duration']}")
    print(f"Definition: {stats['definition'].upper()}")
    print(f"Has Captions: {'Yes' if stats['caption'] == 'true' else 'No'}")
    
    if stats['tags']:
        print("\n=== Video Tags ===")
        for tag in stats['tags']:
            print(f"- {tag}")


In [4]:
if __name__ == "__main__":
    # Replace these with your actual values
    API_KEY = load_api_key()
    VIDEO_ID = '3KRoczsPIFU'  # 三生有幸 testing
    
    stats = get_video_stats(VIDEO_ID, API_KEY)
    pp.pprint(stats)

    #if stats:
        #print_detailed_stats(stats)

[ { 'etag': 'VDpjfwdnY7esbsfxosQvCR9QFAk',
    'id': '3KRoczsPIFU',
    'kind': 'youtube#video',
    'snippet': { 'categoryId': '22',
                 'channelId': 'UCoqJShrM2GJ9oe-rSkpktQg',
                 'channelTitle': 'YANTING official',
                 'defaultAudioLanguage': 'zh-HK',
                 'description': '2024年頭炮由Yan '
                                'Ting周殷廷親自作曲、填詞、監製兼任MV導演的新歌《三生有幸》。\n'
                                '\n'
                                '經歷了《遲了悔改》和《意外現場》後，到第三部曲《三生有幸》，\n'
                                '曾經一段刻骨銘心的愛情，遺留了許多回憶，\n'
                                '當中包含悔意、遺憾與內疚，\n'
                                '待你結婚當天，我會出席、會祝福你嗎？\n'
                                '\n'
                                '立即到各大數碼平台收聽《三生有幸》：\n'
                                'https://umhk.lnk.to/3lifeofluckID\n'
                                '\n'
                                '曲：周殷廷\n'
                                '詞：周殷廷 / 林寶\n'
                                '編

## Spotify Crawler

In [18]:
import requests
import base64
from datetime import datetime

In [19]:
def load_spotify_keys():
    """Load Spotify API credentials from environment variables"""
    load_dotenv(verbose=True)
    return os.environ.get("SPOTIFY_API_KEY"), os.environ.get("SPOTIFY_API_VAL")

def get_spotify_token(client_id, client_secret):
    """Get access token from Spotify API"""
    # Encode client credentials
    credentials = f"{client_id}:{client_secret}"
    credentials_b64 = base64.b64encode(credentials.encode()).decode()
    
    # Set up token request
    token_url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": f"Basic {credentials_b64}",
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {"grant_type": "client_credentials"}
    
    try:
        response = requests.post(token_url, headers=headers, data=data)
        if response.status_code == 200:
            return response.json()['access_token']
        else:
            print(f"Token request failed: {response.status_code}")
            return None
    except Exception as e:
        print(f"Error getting token: {str(e)}")
        return None

def get_track_stats(track_id, access_token):
    """
    Fetches statistics for a Spotify track including popularity and markets.
    
    Parameters:
        track_id (str): The Spotify track ID
        access_token (str): Valid Spotify access token
    
    Returns:
        dict: Track statistics including popularity, markets, etc.
    """
    base_url = "https://api.spotify.com/v1"
    headers = {"Authorization": f"Bearer {access_token}"}
    
    try:
        # Get track details
        track_response = requests.get(
            f"{base_url}/tracks/{track_id}",
            headers=headers
        )
        
        if track_response.status_code == 200:
            track_data = track_response.json()
            
            # Get audio features for additional stats
            features_response = requests.get(
                f"{base_url}/audio-features/{track_id}",
                headers=headers
            )
            
            features_data = features_response.json() if features_response.status_code == 200 else {}
            
            # Combine relevant statistics
            stats = {
                'track_name': track_data.get('name'),
                'artist_name': track_data['artists'][0]['name'] if track_data.get('artists') else 'Unknown',
                'popularity': track_data.get('popularity'),  # 0-100 popularity score
                'markets_count': len(track_data.get('available_markets', [])),
                'total_markets': len(track_data.get('available_markets', [])),
                'release_date': track_data.get('album', {}).get('release_date'),
                'explicit': track_data.get('explicit'),
                'preview_available': bool(track_data.get('preview_url')),
                'duration_ms': track_data.get('duration_ms'),
                'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }
            
            return stats
        else:
            print(f"Error fetching track stats: {track_response.status_code}")
            return None
            
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

def print_track_stats(stats):
    """Pretty print the track statistics"""
    if not stats:
        print("No statistics available")
        return
        
    print("\n=== Track Statistics ===")
    print(f"Track: {stats['track_name']}")
    print(f"Artist: {stats['artist_name']}")
    print(f"Popularity Score: {stats['popularity']}/100")
    print(f"Available in {stats['markets_count']} markets")
    print(f"Release Date: {stats['release_date']}")

    print(f"Duration: {stats['duration_ms']/1000:.1f} seconds")
    
    print(f"\nData retrieved at: {stats['timestamp']}")

In [20]:
if __name__ == "__main__":
    # Get credentials and token
    client_id, client_secret = load_spotify_keys()
    access_token = get_spotify_token(client_id, client_secret)
    
    if access_token:
        # Example track ID (you can replace this with any Spotify track ID)
        TRACK_ID = "5cLcSZ4ZKWrIx4GJgVA4Uv"  # Example track
        
        # Get and display stats
        stats = get_track_stats(TRACK_ID, access_token)
        print_track_stats(stats)


=== Track Statistics ===
Track: Lonely Christmas
Artist: Eason Chan
Popularity Score: 45/100
Available in 185 markets
Release Date: 2013-05-21
Duration: 280.8 seconds

Data retrieved at: 2025-01-11 00:00:08
