In [1]:
import yt_dlp
from pydub import AudioSegment
import os
from chat_downloader import ChatDownloader
from datetime import datetime
import json

def download_twitch_vod(vod_url, output_dir="/mnt/d/Projects/twitch_videos"):
    """
    Download a Twitch VOD in 1080p (if available), extract 16kHz audio as WAV, and save chat logs.
    
    Args:
        vod_url (str): URL of the Twitch VOD
        output_dir (str): Directory to save the files
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Extract video ID from URL for naming
    video_id = vod_url.split('/')[-1]
    base_filename = f"twitch_{video_id}"
    
    # Configure yt-dlp options for 1080p
    ydl_opts = {
        'format': 'bestvideo[height<=1080]+bestaudio/best[height<=1080]',
        'outtmpl': os.path.join(output_dir, f'{base_filename}.%(ext)s'),
        'quiet': False,
        # Format selection to prefer 1080p
        'format_sort': ['res:1080'],
    }
    
    try:
        # Download video
        print("Downloading video...")
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(vod_url, download=True)
            video_file = ydl.prepare_filename(info)
        
        # Convert video to WAV with 16kHz
        print("Extracting audio and converting to 16kHz...")
        video = AudioSegment.from_file(video_file)
        # Convert to mono and set sample rate to 16kHz
        audio = video.set_channels(1).set_frame_rate(16000)
        wav_path = os.path.join(output_dir, f"{base_filename}.wav")
        audio.export(wav_path, format="wav", parameters=["-ar", "16000"])
        
        # Download chat
        print("Downloading chat...")
        chat_path = os.path.join(output_dir, f"{base_filename}_chat.txt")
        chat_downloader = ChatDownloader()
        chat = chat_downloader.get_chat(vod_url)
        
        with open(chat_path, 'w', encoding='utf-8') as f:
            for message in chat:
                timestamp = datetime.fromtimestamp(message['timestamp'] / 1000).strftime('%H:%M:%S')
                author = message.get('author', {}).get('name', 'Anonymous')
                content = message.get('message', '')
                f.write(f"[{timestamp}] {author}: {content}\n")
        
        print(f"Successfully processed VOD {video_id}")
        print(f"Files saved in {output_dir}:")
        print(f"- Video: {os.path.basename(video_file)} (1080p or best available)")
        print(f"- Audio: {base_filename}.wav (16kHz mono)")
        print(f"- Chat: {base_filename}_chat.txt")
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")

# Example usage:
# download_twitch_vod("https://www.twitch.tv/videos/YOUR_VOD_ID")

In [2]:
vod_url = "https://www.twitch.tv/videos/2465574148"  # Replace with actual VOD URL
download_twitch_vod(vod_url)

Downloading video...
[twitch:vod] Extracting URL: https://www.twitch.tv/videos/2465574148
[twitch:vod] 2465574148: Downloading stream metadata GraphQL
[twitch:vod] 2465574148: Downloading video access token GraphQL
[twitch:vod] 2465574148: Downloading m3u8 information
[twitch:vod] 2465574148: Downloading storyboard metadata JSON
[info] v2465574148: Downloading 1 format(s): 1080p60
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 1539
[download] Destination: /mnt/d/Projects/twitch_videos/twitch_2465574148.mp4
[download] 100% of   14.58GiB in 00:23:58 at 10.38MiB/s                      
[FixupM3u8] Fixing MPEG-TS in MP4 container of "/mnt/d/Projects/twitch_videos/twitch_2465574148.mp4"
Extracting audio and converting to 16kHz...
Downloading chat...
An error occurred: year 57359 is out of range


In [1]:
import yt_dlp
from pydub import AudioSegment
import os
from chat_downloader import ChatDownloader
from datetime import datetime
import json
import csv
vod_url = "https://www.twitch.tv/videos/2465574148"
chat_path = "/mnt/d/Projects/twitch_videos/chat.txt"
chat_downloader = ChatDownloader()
chat = chat_downloader.get_chat(vod_url)



In [2]:
chat

<chat_downloader.sites.common.Chat at 0x7fd5980e1e40>

In [2]:
# Download chat
chat_path = "/mnt/d/Projects/twitch_videos/chat.txt"
chat_downloader = ChatDownloader()
chat = chat_downloader.get_chat(vod_url)

# Write messages to CSV
print("Writing chat messages to CSV...")
with open(chat_path, 'w', newline='', encoding='utf-8') as f:
    csv_writer = csv.writer(f)
    # Write header
    csv_writer.writerow(['Time', 'Author', 'Message', 'Badges', 'Subscriber', 'Moderator'])
    
    # Write messages
    for message in chat:
        try:
            # Get time in seconds directly from the message
            time_in_seconds = message.get('time_in_seconds', 0)
            
            # Convert to HH:MM:SS format
            hours = int(time_in_seconds // 3600)
            minutes = int((time_in_seconds % 3600) // 60)
            seconds = int(time_in_seconds % 60)
            timestamp = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
            
            # Get message details
            author = message.get('author', {}).get('name', 'Anonymous')
            content = message.get('message', '')
            badges = [badge.get('title', '') for badge in message.get('author', {}).get('badges', [])]
            badges_str = ','.join(badges) if badges else ''
            
            # Check if user is subscriber or moderator
            is_subscriber = any('Subscriber' in badge for badge in badges)
            is_moderator = any('Moderator' in badge for badge in badges)
            
            # Write to CSV
            csv_writer.writerow([
                timestamp,    # HH:MM:SS format
                author,
                content,
                badges_str,
                is_subscriber,
                is_moderator
            ])
        except Exception as e:
            print(f"Error processing message: {str(e)}")
            continue


Writing chat messages to CSV...


In [7]:
import csv
# Download chat
print("Downloading chat...")
chat_path = "/mnt/d/Projects/twitch_videos/chat.txt"
chat_downloader = ChatDownloader()
chat = chat_downloader.get_chat(vod_url)

# Get VOD start time from first message
first_message = None
chat_messages = []

# Collect all messages and find the first timestamp
print("Collecting chat messages...")
for message in chat:
    chat_messages.append(message)
    

Downloading chat...
Collecting chat messages...


In [9]:
chat_messages[-10:]

[{'message_id': '54d868f5-8b78-4616-955d-461168941cb8',
  'author': {'id': '1226254334',
   'name': 'greatwhite31',
   'display_name': 'greatwhite31',
   'colour': '#FF4500'},
  'time_in_seconds': 15340,
  'timestamp': 1747947658746000,
  'message': 'Yo',
  'time_text': '4:15:40',
  'message_type': 'text_message'},
 {'message_id': '4fe93e1c-28a0-4371-8cb9-9d9329b9e311',
  'author': {'id': '71424707',
   'name': 'theonlykondor',
   'display_name': 'TheOnlyKondor',
   'colour': '#FF69B4'},
  'time_in_seconds': 15340,
  'timestamp': 1747947659110000,
  'message': 'have a great night Tim and chat!',
  'time_text': '4:15:40',
  'message_type': 'text_message'},
 {'message_id': '56cc5d87-9d32-48a8-9d6e-821a873c53f0',
  'author': {'id': '89540127',
   'name': 'kingmalikai',
   'display_name': 'KingMalikai',
   'badges': [{'name': 'moderator',
     'version': 1,
     'title': 'Moderator',
     'clickAction': None,
     'clickURL': None,
     'icons': [{'url': 'https://static-cdn.jtvnw.net/badge

In [4]:
import csv
# Download chat
print("Downloading chat...")
chat_path = "/mnt/d/Projects/twitch_videos/chat.txt"
chat_downloader = ChatDownloader()
chat = chat_downloader.get_chat(vod_url)

# Get VOD start time from first message
first_message = None
chat_messages = []

# Collect all messages and find the first timestamp
print("Collecting chat messages...")
for message in chat:
    chat_messages.append(message)
    if first_message is None and 'timestamp' in message:
        first_message = message
        print("First message debug info:")
        print(f"Timestamp: {message.get('timestamp')}")
        print(f"Content offset seconds: {message.get('content_offset_seconds')}")
        print(f"Relative timestamp: {message.get('relative_timestamp')}")
        print(f"All available fields: {message.keys()}")
        
if first_message and 'timestamp' in first_message:
    vod_start_ms = first_message['timestamp']
    
    # Write messages to CSV
    print("Writing chat messages to CSV...")
    with open(chat_path, 'w', newline='', encoding='utf-8') as f:
        csv_writer = csv.writer(f)
        # Write header
        csv_writer.writerow(['Timestamp', 'Author', 'Message', 'Badges', 'Subscriber', 'Moderator'])
        
        # Write messages
        for message in chat_messages:
            try:
                # Get the relative time from the message's 'relative_timestamp' field if available
                # or calculate it from the 'content_offset_seconds' field
                relative_time_sec = message.get('content_offset_seconds', 0)
                if relative_time_sec == 0:
                    # Fallback to calculating from timestamp if content_offset_seconds is not available
                    relative_time_ms = message.get('timestamp', 0) - vod_start_ms
                    relative_time_sec = max(0, relative_time_ms / 1000)
                
                # Convert to HH:MM:SS format
                hours = int(relative_time_sec // 3600)
                minutes = int((relative_time_sec % 3600) // 60)
                seconds = int(relative_time_sec % 60)
                timestamp = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
                
                # Get message details
                author = message.get('author', {}).get('name', 'Anonymous')
                content = message.get('message', '')
                badges = [badge.get('title', '') for badge in message.get('author', {}).get('badges', [])]
                badges_str = ','.join(badges) if badges else ''
                
                # Check if user is subscriber or moderator
                is_subscriber = any('Subscriber' in badge for badge in badges)
                is_moderator = any('Moderator' in badge for badge in badges)
                
                # Write to CSV
                csv_writer.writerow([
                    timestamp,    # HH:MM:SS format
                    author,
                    content,
                    badges_str,
                    is_subscriber,
                    is_moderator
                ])
            except Exception as e:
                print(f"Error processing message: {str(e)}")
                continue

Downloading chat...
Collecting chat messages...
First message debug info:
Timestamp: 1747932343994000
Content offset seconds: None
Relative timestamp: None
All available fields: dict_keys(['message_id', 'author', 'time_in_seconds', 'timestamp', 'message', 'time_text', 'message_type'])
Writing chat messages to CSV...
