In [None]:
import os
import requests
import json
import re
import pickle
import random
import sys
import time
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaFileUpload
from google.colab import auth
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from oauth2client.file import Storage

# Twitch API setup (replace with your credentials)
TWITCH_CLIENT_ID = 'your_twitch_client_id'
TWITCH_CLIENT_SECRET = 'your_twitch_client_secret'

# OAuth scopes needed for YouTube uploads
YOUTUBE_SCOPES = ['https://www.googleapis.com/auth/youtube.upload',
                 'https://www.googleapis.com/auth/youtube',
                 'https://www.googleapis.com/auth/youtube.force-ssl']

CLIENT_SECRETS_FILE = "/content/client_secrets.json"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
MAX_RETRIES = 10
VALID_PRIVACY_STATUSES = ("public", "private", "unlisted")
REDIRECT_URI = "http://localhost/"  # Added explicit redirect URI
MAX_DURATION = 43190  # 11hr 59min 50sec in seconds

# Install required tools in Colab
def install_dependencies():
    print("Installing required dependencies...")
    os.system("pip install -q streamlink google-auth-oauthlib oauth2client")
    os.system("apt-get -qq update")
    os.system("apt-get -qq install -y ffmpeg")
    print("Dependencies installed.")

# Get Twitch API access token
def get_twitch_access_token():
    url = 'https://id.twitch.tv/oauth2/token'
    payload = {
        'client_id': TWITCH_CLIENT_ID,
        'client_secret': TWITCH_CLIENT_SECRET,
        'grant_type': 'client_credentials'
    }
    response = requests.post(url, data=payload)
    if response.status_code != 200:
        raise Exception(f"Failed to get Twitch access token: {response.text}")
    return response.json()['access_token']

# Function to authenticate with YouTube in Colab using manual token approach
def get_youtube_service():
    print("Authenticating with YouTube...")

    # First, check if we have a client secrets file
    if not os.path.exists(CLIENT_SECRETS_FILE):
        print(f"WARNING: {CLIENT_SECRETS_FILE} not found.")
        print("You need to create a project in Google Cloud Console, enable YouTube API,")
        print("and download the OAuth credentials as client_secrets.json.")
        print("Visit: https://console.cloud.google.com/apis/credentials")

        # Create instructions for user to follow
        create_client_secrets_instructions()

        # Check again after instructions
        if not os.path.exists(CLIENT_SECRETS_FILE):
            raise Exception(f"YouTube API credentials file {CLIENT_SECRETS_FILE} not found.")

    # Check for saved credentials
    creds = None
    token_file = 'youtube_token.pickle'

    # Try to load existing credentials
    if os.path.exists(token_file):
        print("Loading saved credentials...")
        with open(token_file, 'rb') as token:
            try:
                creds = pickle.load(token)
            except Exception as e:
                print(f"Error loading credentials: {e}")
                creds = None

    # If there are no valid credentials, let the user log in
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            print("Refreshing expired credentials...")
            try:
                creds.refresh(Request())
            except Exception as e:
                print(f"Failed to refresh credentials: {e}")
                creds = None

        if not creds:
            print("Getting new credentials using manual flow...")
            flow = InstalledAppFlow.from_client_secrets_file(
                CLIENT_SECRETS_FILE, YOUTUBE_SCOPES,
                redirect_uri=REDIRECT_URI)
            
            # UPDATED: Explicitly set the redirect URI to match what was configured
            auth_url, _ = flow.authorization_url(
                prompt='consent',
                access_type='offline' 
            )
            
            print("\n" + "=" * 70)
            print("MANUAL AUTHENTICATION REQUIRED")
            print("=" * 70)
            print("\n1. Copy the following URL and open it in your browser:")
            print("\n" + auth_url + "\n")
            print("2. Sign in with your Google account that has YouTube access")
            print("3. Allow the permissions requested")
            print("4. After authorizing, you'll be redirected to a page that might show an error")
            print("5. Copy the FULL URL from the address bar (including the 'code=' parameter)")
            print("6. Paste the FULL URL below\n")

            # Get the authorization URL from the user
            auth_response = input("Enter the full redirect URL: ")
            
            try:
                # Extract the code parameter from the URL
                if "code=" in auth_response:
                    code = auth_response.split("code=")[1].split("&")[0]
                else:
                    code = auth_response  
            except:
                print("Could not extract authorization code from input. Using it as-is.")
                code = auth_response

            # Exchange the authorization code for credentials
            try:
                flow.fetch_token(
                    code=code,
                )
                creds = flow.credentials
                
                # Save the credentials for the next run
                print("Saving credentials for future use...")
                with open(token_file, 'wb') as token:
                    pickle.dump(creds, token)
                    print("Credentials saved to", token_file)
            except Exception as e:
                print(f"Error fetching token: {e}")
                print("Detailed error information:", str(e))
                raise

    print("Authentication successful!")
    return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, credentials=creds)

def create_client_secrets_instructions():
    """Provides instructions for creating client_secrets.json file"""
    print("\n======= HOW TO CREATE CLIENT_SECRETS.JSON ========")
    print("1. Go to https://console.cloud.google.com/")
    print("2. Create a new project or select an existing one")
    print("3. Enable the YouTube Data API v3")
    print("4. Go to 'Credentials' and create an OAuth client ID")
    print("5. Select 'Desktop app' as the application type")
    print("6. Add 'http://localhost/' as an authorized redirect URI")  
    print("7. Download the JSON file and rename it to 'client_secrets.json'")
    print("8. Upload it to this Colab notebook's working directory")
    print("====================================================\n")

    # Template file for fallback
    sample_content = {
        "installed": {
            "client_id": "YOUR_CLIENT_ID.apps.googleusercontent.com",
            "project_id": "YOUR_PROJECT_ID",
            "auth_uri": "https://accounts.google.com/o/oauth2/auth",
            "token_uri": "https://oauth2.googleapis.com/token",
            "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
            "client_secret": "YOUR_CLIENT_SECRET",
            "redirect_uris": ["http://localhost/"]
        }
    }

    with open("client_secrets_template.json", "w") as f:
        json.dump(sample_content, f, indent=4)

    print("I've created a template file 'client_secrets_template.json'")
    print("Replace the placeholders with your actual credentials and rename to 'client_secrets.json'")

# Function to get VOD metadata from Twitch API
def get_vod_metadata(vod_id):
    access_token = get_twitch_access_token()
    url = f'https://api.twitch.tv/helix/videos?id={vod_id}'
    headers = {
        'Client-ID': TWITCH_CLIENT_ID,
        'Authorization': f'Bearer {access_token}'
    }
    response = requests.get(url, headers=headers)

    if response.status_code != 200:
        raise Exception(f"Twitch API error: {response.status_code} - {response.text}")

    data = response.json().get('data', [])
    if not data:
        raise Exception(f"No VOD found with ID: {vod_id}")

    vod_data = data[0]
    title = vod_data['title']
    duration = parse_twitch_duration(vod_data['duration'])
    return {
        'title': title,
        'duration': duration,
        'url': f'https://www.twitch.tv/videos/{vod_id}',
        'thumbnail_url': vod_data.get('thumbnail_url', ''),
        'created_at': vod_data.get('created_at', ''),
        'view_count': vod_data.get('view_count', 0),
        'user_name': vod_data.get('user_name', '')
    }

# Convert Twitch duration format to seconds
def parse_twitch_duration(duration_str):
    hours = minutes = seconds = 0
    if 'h' in duration_str:
        hours = int(duration_str.split('h')[0])
        duration_str = duration_str.split('h')[1]
    if 'm' in duration_str:
        minutes = int(duration_str.split('m')[0])
        duration_str = duration_str.split('m')[1]
    if 's' in duration_str:
        seconds = int(duration_str.split('s')[0])
    return hours * 3600 + minutes * 60 + seconds

# Function to split duration and calculate parts
def calculate_splits(duration):
    if duration <= MAX_DURATION:
        return [duration]
    parts = duration // MAX_DURATION
    remainder = duration % MAX_DURATION
    if remainder < MAX_DURATION * 0.05:
        balanced_part = duration // (parts + 1)
        return [balanced_part] * (parts + 1)
    return [MAX_DURATION] * parts + ([remainder] if remainder else [])

# Clean title for file system compatibility
def clean_title_for_file(title):
    # Remove emojis and other non-ASCII characters
    clean_title = re.sub(r'[^\x00-\x7F]+', '', title)
    # Replace problematic characters with underscores
    clean_title = re.sub(r'[^\w\s\-\.,\(\)\[\]\{\}]', '_', clean_title)
    # Remove consecutive underscores
    clean_title = re.sub(r'_+', '_', clean_title)
    # Remove leading/trailing underscores
    clean_title = clean_title.strip('_')
    # Trim whitespace
    clean_title = clean_title.strip()
    # If title is empty after cleaning, use a default
    if not clean_title or clean_title.isspace():
        clean_title = "TwitchVOD"

    file_name = clean_title.replace(' ', '_')

    if len(file_name) > 200:
        file_name = file_name[:200]
        
    return file_name

# Function to download a specific chunk of a VOD
def download_vod_chunk(vod_url, title, start_time, duration):
    """
    Download a specific time chunk of a Twitch VOD
    
    Args:
        vod_url: URL of the Twitch VOD
        title: Title to use for the file
        start_time: Start time in seconds
        duration: Duration to download in seconds
        
    Returns:
        tuple: (filename, quality, resolution)
    """
    clean_title = clean_title_for_file(title)
    
    # Format start time for streamlink
    hours = start_time // 3600
    minutes = (start_time % 3600) // 60
    seconds = start_time % 60
    start_offset = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
    
    # Add chunk info to filename
    file_name = f"{clean_title}_chunk_{start_time}"
    log_file_path = f"{file_name}_download_log.txt"
    
    print(f"Original title: {title}")
    print(f"Cleaned title for file: {file_name}")
    print(f"Downloading chunk starting at {start_offset} for {duration} seconds")
    
    # Try different quality options if one fails
    qualities = ["best", "1080p60", "1080p", "720p60", "720p", "480p", "360p", "worst"]
    
    # Create a log file to record the download process
    with open(log_file_path, "w") as log_file:
        log_file.write(f"Download log for: {title} (chunk at {start_offset})\n")
        log_file.write(f"VOD URL: {vod_url}\n")
        log_file.write(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
        
        for quality in qualities:
            try:
                log_file.write(f"Attempting quality: {quality}\n")
                
                # Use streamlink with offset and duration arguments
                command = f'streamlink "{vod_url}" {quality} --hls-start-offset {start_offset} --hls-duration {duration}s -o "{file_name}.mp4"'
                print(f"Attempting to download with quality '{quality}'...")
                print(f"Executing: {command}")
                result = os.system(command)

                if result == 0 and os.path.exists(f"{file_name}.mp4") and os.path.getsize(f"{file_name}.mp4") > 0:
                    file_size = os.path.getsize(f"{file_name}.mp4") / (1024*1024)  # Size in MB
                    log_file.write(f"SUCCESS: Downloaded with quality '{quality}'\n")
                    log_file.write(f"File size: {file_size:.2f} MB\n")
                    print(f"Successfully downloaded VOD chunk with quality '{quality}'")
                    
                    # Get video resolution using ffprobe if available
                    try:
                        resolution_cmd = f'ffprobe -v error -select_streams v:0 -show_entries stream=width,height -of csv=s=x:p=0 "{file_name}.mp4"'
                        resolution = os.popen(resolution_cmd).read().strip()
                        log_file.write(f"Video resolution: {resolution}\n")
                        print(f"Video resolution: {resolution}")
                        
                        bitrate_cmd = f'ffprobe -v error -select_streams v:0 -show_entries stream=bit_rate -of default=noprint_wrappers=1:nokey=1 "{file_name}.mp4"'
                        bitrate = os.popen(bitrate_cmd).read().strip()
                        if bitrate:
                            bitrate_mb = int(bitrate) / 1000000  # Convert to Mbps
                            log_file.write(f"Video bitrate: {bitrate_mb:.2f} Mbps\n")
                            print(f"Video bitrate: {bitrate_mb:.2f} Mbps")
                    except:
                        log_file.write("Could not determine video resolution/bitrate\n")
                        resolution = "unknown"
                    
                    return file_name, quality, resolution
                else:
                    log_file.write(f"FAILED: Could not download with quality '{quality}'\n")
                    print(f"Failed to download with quality '{quality}', trying next option...")
            except Exception as e:
                error_msg = f"Error downloading with quality '{quality}': {str(e)}"
                log_file.write(f"{error_msg}\n")
                print(error_msg)

        log_file.write("ALL QUALITY OPTIONS FAILED\n")

    raise Exception("Failed to download VOD chunk with any quality setting")

# Function to upload to YouTube with quality info
def upload_to_youtube(file_path, title, description=None, tags=None, privacy="private", youtube_service=None, video_info=None):
    if description is None:
        description = 'Uploaded from Twitch VOD'
    if tags is None:
        tags = ['Twitch', 'VOD']
    if video_info is None:
        video_info = {}

    clean_title = title[:100]  # YouTube title limit is 100 characters

    youtube = youtube_service
    if youtube is None:
        youtube = get_youtube_service()

    print(f"Preparing to upload: {file_path}")
    print(f"Title: {clean_title}")

    # Check if the file exists
    if not os.path.exists(f"{file_path}.mp4"):
        raise Exception(f"File not found: {file_path}.mp4")

    # Get file size for progress reporting
    file_size = os.path.getsize(f"{file_path}.mp4")
    print(f"File size: {file_size / (1024*1024):.2f} MB")

    # Update description with video info if available
    if video_info:
        tech_info = "\n\nVideo Technical Information:\n"
        if "resolution" in video_info:
            tech_info += f"Resolution: {video_info['resolution']}\n"
        if "file_size_mb" in video_info:
            tech_info += f"File size: {video_info['file_size_mb']:.2f} MB\n"
        if "quality" in video_info:
            tech_info += f"Twitch quality: {video_info['quality']}\n"
        description += tech_info

    # Define the body of the request
    body = {
        'snippet': {
            'title': clean_title,
            'description': description,
            'tags': tags,
            'categoryId': '22'  # People & Blogs category
        },
        'status': {
            'privacyStatus': privacy,
            'selfDeclaredMadeForKids': False
        }
    }

    # Create the media upload object
    media = MediaFileUpload(
        f"{file_path}.mp4",
        chunksize=1024*1024*8,  # 8MB chunks
        resumable=True,
        mimetype='video/mp4'
    )

    # Create the insert request
    insert_request = youtube.videos().insert(
        part=','.join(body.keys()),
        body=body,
        media_body=media
    )

    # This implements an exponential backoff strategy for resumable uploads
    print("Starting upload...")
    response = None
    error = None
    retry = 0
    upload_log_path = None

    while response is None:
        try:
            status, response = insert_request.next_chunk()
            if status:
                print(f"Uploaded {int(status.progress() * 100)}%")
            if response is not None:
                if 'id' in response:
                    video_id = response['id']
                    print(f"Upload complete! Video ID: {video_id}")
                    print(f"Video URL: https://youtu.be/{video_id}")
                    
                    # Log upload details
                    upload_log_path = f"upload_log_{video_id}.txt"
                    with open(upload_log_path, "w") as log_file:
                        log_file.write(f"Upload log for: {clean_title}\n")
                        log_file.write(f"Video ID: {video_id}\n")
                        log_file.write(f"Video URL: https://youtu.be/{video_id}\n")
                        log_file.write(f"Upload time: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
                        log_file.write(f"File size: {file_size / (1024*1024):.2f} MB\n")
                        if video_info:
                            for key, value in video_info.items():
                                log_file.write(f"{key}: {value}\n")
                    
                    return video_id, upload_log_path
                else:
                    raise Exception(f"The upload failed with an unexpected response: {response}")
        except HttpError as e:
            error = f"An HTTP error {e.resp.status} occurred:\n{e.content}"
            if e.resp.status in [500, 502, 503, 504]:  # Retriable status codes
                pass  
            else:
                raise
        except (IOError, TimeoutError) as e:
            error = f"A retriable error occurred: {e}"

        if error is not None:
            print(error)
            retry += 1
            if retry > MAX_RETRIES:
                raise Exception("No longer attempting to retry.")

            max_sleep = 2 ** retry
            sleep_seconds = random.random() * max_sleep
            print(f"Sleeping {sleep_seconds:.1f} seconds and then retrying...")
            time.sleep(sleep_seconds)
            error = None

# Format duration for display
def format_duration(seconds):
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    secs = seconds % 60
    return f"{hours}h {minutes}m {secs}s"

# Clean up files after processing
def cleanup_files(file_paths):
    """
    Clean up files after successful processing
    
    Args:
        file_paths: List of file paths to clean up
    """
    print("\nCleaning up temporary files...")
    for file_path in file_paths:
        if file_path and os.path.exists(file_path):
            try:
                os.remove(file_path)
                print(f"Removed: {file_path}")
            except Exception as e:
                print(f"Failed to remove {file_path}: {str(e)}")

# Process VOD in chunks
def process_vod_in_chunks(vod_id, youtube_service=None):
    try:
        # Get metadata for the VOD
        print(f"Fetching metadata for VOD ID: {vod_id}")
        metadata = get_vod_metadata(vod_id)
        title = metadata['title']
        duration = metadata['duration']
        vod_url = metadata['url']

        print(f"\nVOD Information:")
        print(f"Title: {title}")
        print(f"Channel: {metadata['user_name']}")
        print(f"Duration: {format_duration(duration)}")
        print(f"Views: {metadata['view_count']}")
        print(f"Created at: {metadata['created_at']}")

        # Calculate splits if needed
        splits = calculate_splits(duration)
        if len(splits) > 1:
            print(f"\nVOD will be split into {len(splits)} parts due to length")
            for i, split_duration in enumerate(splits):
                print(f"  Part {i+1}: {format_duration(split_duration)}")

        # Confirm with user
        confirmation = input("\nProceed with download and upload? (y/n): ")
        if confirmation.lower() != 'y':
            print("Operation cancelled by user.")
            return False

        # Generate base description with VOD information
        description_base = f"""
Twitch VOD: {title}
Channel: {metadata['user_name']}
Original broadcast date: {metadata['created_at']}
Original URL: {vod_url}

This video was automatically uploaded from Twitch.
        """.strip()

        # Generate meaningful tags
        tags = ['Twitch', 'VOD', metadata['user_name']]

        # Add any hashtags from title as tags
        hashtags = re.findall(r'#\w+', title)
        if hashtags:
            tags.extend([tag.strip('#') for tag in hashtags])

        # Process each chunk sequentially
        video_ids = []
        start_time = 0
        all_files_to_cleanup = []  # List to track all files created

        for i, split_duration in enumerate(splits):
            part_num = i + 1
            part_full_title = f"{title} (Part {part_num}/{len(splits)})" if len(splits) > 1 else title
            part_description = f"{description_base}"
            if len(splits) > 1:
                part_description += f"\n\nPart {part_num} of {len(splits)}"
            
            print(f"\n{'='*50}")
            print(f"Processing part {part_num} of {len(splits)}")
            print(f"Download chunk starting at {format_duration(start_time)} for {format_duration(split_duration)}")
            
            chunk_files_to_cleanup = []  # Track files for this chunk
            
            # Download this specific chunk
            try:
                chunk_file, quality, resolution = download_vod_chunk(vod_url, f"{title}_part_{part_num}", start_time, split_duration)
                chunk_files_to_cleanup.append(f"{chunk_file}.mp4")
                chunk_files_to_cleanup.append(f"{chunk_file}_download_log.txt")
                
                # Add video info for this chunk
                chunk_video_info = {
                    "quality": quality,
                    "resolution": resolution,
                    "file_size_mb": os.path.getsize(f"{chunk_file}.mp4") / (1024*1024),
                    "start_time": format_duration(start_time),
                    "duration": format_duration(split_duration)
                }
                
                # Update description with technical info
                tech_description = f"\n\nTechnical Information:\n"
                tech_description += f"Downloaded with Twitch quality setting: {quality}\n"
                tech_description += f"Video resolution: {resolution}\n"
                tech_description += f"File size: {chunk_video_info['file_size_mb']:.2f} MB\n"
                tech_description += f"Segment: {format_duration(start_time)} to {format_duration(start_time + split_duration)}"
                
                full_description = part_description + tech_description
                
                # Upload this chunk
                print(f"\nUploading part {part_num} to YouTube...")
                video_id, upload_log_path = upload_to_youtube(
                    chunk_file,
                    part_full_title,
                    full_description,
                    tags=tags,
                    youtube_service=youtube_service,
                    video_info=chunk_video_info
                )
                video_ids.append(video_id)
                
                if upload_log_path:
                    chunk_files_to_cleanup.append(upload_log_path)
                
                # Clean up after upload
                print(f"Cleaning up files for part {part_num}...")
                cleanup_files(chunk_files_to_cleanup)
                
                # Move to next chunk
                start_time += split_duration
                
            except Exception as e:
                print(f"Error processing part {part_num}: {str(e)}")
                # Add files to the cleanup list even if there was an error
                all_files_to_cleanup.extend(chunk_files_to_cleanup)
                # Continue with next part if one fails
                start_time += split_duration
                continue
        
        # Clean up any remaining files
        if all_files_to_cleanup:
            cleanup_files(all_files_to_cleanup)
        
        if video_ids:
            print(f"\nUploaded {len(video_ids)} parts successfully!")
            for i, video_id in enumerate(video_ids):
                print(f"Part {i+1}: https://youtu.be/{video_id}")
            return True
        else:
            print("No parts were successfully uploaded.")
            return False
            
    except Exception as e:
        print(f"\nError processing VOD {vod_id}: {str(e)}")
        return False

# Main program for Colab
def main():
    print("==== Twitch VOD Downloader and YouTube Uploader for Colab ====")
    print("This program will download Twitch VODs in chunks and upload them to YouTube.")
    print("Optimized for Colab's limited storage: Each chunk is downloaded, uploaded, and deleted before the next.")

    # Install dependencies first
    install_dependencies()

    # Authenticate with YouTube once (reuse the service)
    try:
        youtube_service = get_youtube_service()
    except Exception as e:
        print(f"Error during initial authentication: {str(e)}")
        print("You can still try to process VODs, authentication will be attempted again.")
        youtube_service = None

    while True:
        # Get VOD ID from user
        print("\n" + "-" * 50)
        vod_input = input("Enter Twitch VOD ID or URL (or 'q' to quit): ")

        if vod_input.lower() == 'q':
            print("Exiting program. Goodbye!")
            break

        # Extract VOD ID if full URL was provided
        vod_id = vod_input
        if 'twitch.tv/videos/' in vod_input:
            vod_id = vod_input.split('twitch.tv/videos/')[1].split('?')[0]

        # Process the VOD in chunks
        process_vod_in_chunks(vod_id, youtube_service=youtube_service)

if __name__ == "__main__":
    main()