In [1]:
import os
import json
import logging
from pathlib import Path

# Set up logging for notebook output
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')

In [2]:
# Function to extract YouTube videoID
import re

def extract_youtube_video_id(url: str) -> str:
    """
    Extracts the videoID from a YouTube URL.
    Supports standard and shortened YoutTube URL
    """

    # Standard YouTube URL
    match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
    if match:
        return match.group(1)
    raise ValueError("Invalid YouTube URL format")

# test:
# youtube_url = "https://www.youtube.com/watch?v=KgVu9G-VcQk&pp=ygUPMzAgbWlucyBwb2RjYXN0" # joe rogan
# video_id = extract_youtube_video_id(youtube_url)
# video_id

In [3]:
# Function to download audio from YouTube
import subprocess

def download_youtube_audio(video_id: str, output_dir: str = "audio") -> Path:
    """
    Downloads the audio from a YouTube video using yt-dlp.
    Returns the path to the downloaded audio file.
    """
    os.makedirs(output_dir, exist_ok=True)

    output_path = Path(output_dir) / f"{video_id}.m4a"
    youtube_url = f"https://www.youtube.com/watch?v={video_id}"
    command = [
        "yt-dlp",
        "-f", "bestaudio[ext=m4a]",
        "-o", str(output_path),
        youtube_url
    ]

    result = subprocess.run(command, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"yt-dlp failed: {result.stderr}")
    return output_path

# test
# audio_path = download_youtube_audio(video_id)
# print(f"Audio downloaded to: {audio_path}")

In [4]:
import assemblyai as aai
import time
import os
from dotenv import load_dotenv

# Transcribe and optionally return the full transcript object
def transcribe_with_assemblyai(audio_file_path: str, api_key: str = None, return_transcript: bool = False):
    """
    Transcribes `audio_file_path` with AssemblyAI.
    If `return_transcript` is True, returns the full transcript object;
    otherwise returns transcript.words.
    """
    if api_key is None:
        load_dotenv()
        api_key = os.getenv("ASSEMBLYAI_API_KEY")
    aai.settings.api_key = api_key

    config = aai.TranscriptionConfig(
        speech_model=aai.SpeechModel.best,
        speaker_labels=True,
        speakers_expected=2,
    )
    transcript = aai.Transcriber(config=config).transcribe(audio_file_path)

    if transcript.status == "error":
        raise RuntimeError(f"Transcription failed: {transcript.error}")
    return transcript if return_transcript else transcript.words

# test:
# transcript = transcribe_with_assemblyai(f"audio/{video_id}.m4a", return_transcript=True)
# transcript

In [5]:
# Save word-level transcript to JSON
import json

def save_word_level_transcript(obj, output_path: str):
    """
    Accepts either transcript.words or the full transcript object.
    Saves word-level data to JSON.
    """
    # If a full transcript is passed, use its .words attribute
    words = obj.words if hasattr(obj, "words") else obj

    word_data = []
    for idx, word in enumerate(words):
        word_data.append({
            "id": idx + 1,
            "text": word.text,
            "start": word.start,
            "end": word.end,
            "confidence": word.confidence,
            "speaker": word.speaker
        })
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(word_data, f, indent=2)
    print(f"Saved word-level transcript to {output_path}")

# test:
# save_word_level_transcript(transcript.words, "transcript.json")

In [6]:
# Grouping words into thought segments
from typing import List, Dict, Optional
from datetime import timedelta
import re

def group_words_into_thoughts(word_data, pause_threshold_ms=3000):
    """
    Groups words (AssemblyAI Word objects) into thoughts based on speaker changes and pauses.
    Adds word_start and word_end (1-based indices in the original word_data).
    """
    thoughts = []
    current_thought = []
    word_indices = []  # Track the original indices of words in each thought

    for i, word in enumerate(word_data):
        if current_thought and (
            current_thought[-1].speaker != word.speaker or
            word.start - current_thought[-1].end > pause_threshold_ms
        ):
            thoughts.append((current_thought, word_indices))
            current_thought = []
            word_indices = []
        current_thought.append(word)
        word_indices.append(i)
    if current_thought:
        thoughts.append((current_thought, word_indices))

    # Build thought dicts with word_start and word_end
    thought_dicts = []
    for idx, (words, indices) in enumerate(thoughts):
        text = ' '.join(w.text for w in words)
        start = words[0].start
        end = words[-1].end
        duration = end - start
        avg_conf = sum(w.confidence * (w.end - w.start) for w in words) / max(1, sum(w.end - w.start for w in words))
        word_start = indices[0] + 1  # 1-based index
        word_end = indices[-1] + 1   # 1-based index
        thought_dicts.append({
            'id': f"t_{idx+1:04d}",
            'speaker': words[0].speaker,
            'text': text,
            'start': start,
            'end': end,
            'duration': duration,
            'confidence': round(avg_conf, 6),
            'word_count': len(words),
            'word_start': word_start,
            'word_end': word_end
        })

    # Metadata
    speakers = list(set(t['speaker'] for t in thought_dicts))
    total_duration = word_data[-1].end if word_data else 0
    metadata = {
        'total_thoughts': len(thought_dicts),
        'total_words': len(word_data),
        'speakers': speakers,
        'duration_ms': total_duration,
        'duration_readable': str(timedelta(milliseconds=total_duration)),
        'avg_confidence': round(sum(t['confidence'] for t in thought_dicts) / len(thought_dicts), 6) if thought_dicts else 0,
    }

    return {'thoughts': thought_dicts, 'metadata': metadata}

# test
# result = group_words_into_thoughts(transcript.words)
# print(result)


In [7]:
# Save thought segments and metadata to JSON

def save_thought_segments(thoughts_result, output_path):
    """
    Saves the thoughts and metadata to a JSON file.
    """
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(thoughts_result, f, indent=2)
    print(f"Saved thought segments and metadata to {output_path}")

# test
# save_thought_segments(result, "processed_thoughts.json")

In [None]:
# A word index map from word-level transcript
def word_index_lookup(word_data):
    """
    Builds a map from 1-based word index (as string) to word metadata.
    """
    return {
        str(word['id']): {
            'start': word['start'],
            'end': word['end'],
            'text': word['text'],
            'speaker': word['speaker']
        }
        for word in word_data
    }

# Test:
# with open("transcript.json") as f:
#     word_data = json.load(f)
# word_index_map = word_index_lookup(word_data)
# print(word_index_map['2'])

In [9]:
# Export VTT subtitles
def export_vtt_subtitles(transcript, output_path: str, chars_per_caption: int = 30):
    """
    Writes VTT subtitles to `output_path` using AssemblyAI's built-in exporter.
    """
    vtt_text = transcript.export_subtitles_vtt(chars_per_caption=chars_per_caption)
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(vtt_text)
    print(f"Saved VTT subtitles to {output_path}")

# Test
# export_vtt_subtitles(transcript, f"subtitle_{video_id}.vtt", chars_per_caption=25)

In [10]:
# Pipeline execution.
youtube_url  = "https://www.youtube.com/watch?v=3qHkcs3kG44"
video_id     = extract_youtube_video_id(youtube_url)
audio_path   = download_youtube_audio(video_id)
transcript   = transcribe_with_assemblyai(str(audio_path), return_transcript=True)

save_word_level_transcript(transcript.words, f"transcript_{video_id}_word_level.json")
thoughts_res = group_words_into_thoughts(transcript.words)
save_thought_segments(thoughts_res, f"processed_thoughts_{video_id}.json")
export_vtt_subtitles(transcript, f"subtitle_{video_id}.vtt")

print("Pipeline finished ✔")

2025-07-16 09:50:26,215 INFO: HTTP Request: POST https://api.assemblyai.com/v2/upload "HTTP/1.1 200 OK"
2025-07-16 09:50:26,545 INFO: HTTP Request: POST https://api.assemblyai.com/v2/transcript "HTTP/1.1 200 OK"
2025-07-16 09:50:26,865 INFO: HTTP Request: GET https://api.assemblyai.com/v2/transcript/1302f387-8216-4696-8368-70f0ec746ba7 "HTTP/1.1 200 OK"
2025-07-16 09:50:30,115 INFO: HTTP Request: GET https://api.assemblyai.com/v2/transcript/1302f387-8216-4696-8368-70f0ec746ba7 "HTTP/1.1 200 OK"
2025-07-16 09:50:33,364 INFO: HTTP Request: GET https://api.assemblyai.com/v2/transcript/1302f387-8216-4696-8368-70f0ec746ba7 "HTTP/1.1 200 OK"
2025-07-16 09:50:36,695 INFO: HTTP Request: GET https://api.assemblyai.com/v2/transcript/1302f387-8216-4696-8368-70f0ec746ba7 "HTTP/1.1 200 OK"
2025-07-16 09:50:39,943 INFO: HTTP Request: GET https://api.assemblyai.com/v2/transcript/1302f387-8216-4696-8368-70f0ec746ba7 "HTTP/1.1 200 OK"
2025-07-16 09:50:43,200 INFO: HTTP Request: GET https://api.assembly

Saved word-level transcript to transcript_3qHkcs3kG44_word_level.json
Saved thought segments and metadata to processed_thoughts_3qHkcs3kG44.json


2025-07-16 09:52:22,487 INFO: HTTP Request: GET https://api.assemblyai.com/v2/transcript/1302f387-8216-4696-8368-70f0ec746ba7/vtt?chars_per_caption=30 "HTTP/1.1 200 OK"


Saved VTT subtitles to subtitle_3qHkcs3kG44.vtt
Pipeline finished ✔


In [None]:
"""
notes to myself:
You've completed a major part of the pipeline. That's amazing. be curious.

next steps:
pass it to the LLM. (mention the encoding you've done, ans give you answer in the encoding thoughtID & wordID)
when you get the respone. you need to decode the thoughtID and the wordID. You need the wordID to exactly cut the video from.
smart move: don't actually build a video/audio cutting engine right now to cut the clips. but use a  music player that takes you to that specific moment.
"""

In [17]:
with open(f"transcript_{video_id}_word_level.json") as f:
    word_data = json.load(f)
word_index_map = word_index_lookup(word_data)
print(word_index_map['2430'])

{'start': 717030, 'end': 717310, 'text': 'You', 'speaker': 'A'}


In [None]:
# 16 July 2025
# LLM Integration

In [34]:
import os
import json
import time
import asyncio
from dotenv import load_dotenv
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor

# Load environment variables
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

# Configuration
CHUNKS_FOLDER = "chunks_3qHkcs3kG44"  # Folder with chunk JSON files
OUTPUT_FOLDER = "results_3qHkcs3kG44"  # Where to save outputs
MAX_WORKERS = 5  # Number of parallel threads (tune based on your rate limit)
MAX_RETRIES = 5
RETRY_BASE_WAIT = 2  # seconds base for exponential backoff

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Prompt template
PROMPT_TEMPLATE = """
You are an expert at identifying motivational, philosophical, and inspirational moments from speech.

Below is a list of thought segments from a YouTube interview, each with an ID, word start, and word end. The word start is the count number of the first word of that particular thought and the word end is the count number of the last word of that thought. Analyze them deeply and return a list of IDs that have the highest inspirational, philosophical, or viral potential.

Ignore filler or generic phrases. Focus on unique insights, emotional resonance, or motivational power.

Thoughts:
{formatted_thoughts}

Only return a JSON like this:
{{ "top_thought_ids": [["t_0004", 6, 20], ["t_0035", 303, 330], ["t_0700", 1235, 1535]] }}

Reference:
For thought ID: t_0004 the best moment is the sentence spoken from word 6 to 20. You need to give me the exact word start and word end of the best moment.
"""

def load_chunk(path):
    with open(path, "r") as f:
        data = json.load(f)
    cleaned = [
        {
            "id": t["id"],
            "word_start": t["word_start"],
            "word_end": t["word_end"],
            "text": t["text"]
        }
        for t in data["thoughts"]
    ]
    formatted = "\n\n".join([
        f"ID: {t['id']}\nStart: {t['word_start']}\nEnd: {t['word_end']}\nText: {t['text']}"
        for t in cleaned
    ])
    return formatted

def call_openai_stream(prompt):
    retries = 0
    while retries <= MAX_RETRIES:
        try:
            response = client.chat.completions.create(
                model="gpt-4o",
                stream=True,
                messages=[
                    {"role": "system", "content": "You identify high-impact inspirational moments from transcripts."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.7,
            )
            full_output = ""
            for chunk in response:
                content = getattr(chunk.choices[0].delta, "content", "") or ""
                full_output += content
            return full_output
        except Exception as e:
            msg = str(e).lower()
            if "429" in msg or "rate limit" in msg:
                wait_time = RETRY_BASE_WAIT * (2 ** retries)
                print(f"⚠️ Rate limit hit. Retry {retries + 1} in {wait_time}s...")
                time.sleep(wait_time)
                retries += 1
            else:
                print(f"❌ OpenAI call failed: {e}")
                break
    return None

def process_chunk(chunk_path):
    print(f"🧠 Processing {chunk_path} ...")
    formatted_thoughts = load_chunk(chunk_path)
    prompt = PROMPT_TEMPLATE.format(formatted_thoughts=formatted_thoughts)

    result = call_openai_stream(prompt)
    if result is None:
        print(f"⚠️ Failed to get response for {chunk_path}")
        return

    # Try to parse JSON, else save raw
    try:
        parsed = json.loads(result)
        out_path = os.path.join(OUTPUT_FOLDER, os.path.basename(chunk_path).replace(".json", "_result.json"))
        with open(out_path, "w") as f:
            json.dump(parsed, f, indent=2)
        print(f"✅ Saved result for {chunk_path} -> {out_path}")
    except Exception:
        raw_out_path = os.path.join(OUTPUT_FOLDER, os.path.basename(chunk_path).replace(".json", "_result_raw.txt"))
        with open(raw_out_path, "w") as f:
            f.write(result)
        print(f"⚠️ Failed to parse JSON for {chunk_path}, saved raw output -> {raw_out_path}")

def main():
    chunk_files = sorted([
        os.path.join(CHUNKS_FOLDER, f)
        for f in os.listdir(CHUNKS_FOLDER)
        if f.endswith(".json")
    ])

    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        executor.map(process_chunk, chunk_files)

if __name__ == "__main__":
    main()


🧠 Processing chunks_3qHkcs3kG44/chunk_0.json ...🧠 Processing chunks_3qHkcs3kG44/chunk_1.json ...

🧠 Processing chunks_3qHkcs3kG44/chunk_10.json ...
🧠 Processing chunks_3qHkcs3kG44/chunk_11.json ...
🧠 Processing chunks_3qHkcs3kG44/chunk_12.json ...


2025-07-16 10:43:41,269 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:43:41,304 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:43:41,719 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:43:42,664 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:43:42,670 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_10.json, saved raw output -> results_3qHkcs3kG44/chunk_10_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_13.json ...
⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_11.json, saved raw output -> results_3qHkcs3kG44/chunk_11_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_14.json ...


2025-07-16 10:43:44,503 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:43:44,935 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_12.json, saved raw output -> results_3qHkcs3kG44/chunk_12_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_15.json ...
⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_13.json, saved raw output -> results_3qHkcs3kG44/chunk_13_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_16.json ...


2025-07-16 10:43:46,184 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:43:46,533 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_14.json, saved raw output -> results_3qHkcs3kG44/chunk_14_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_17.json ...


2025-07-16 10:43:47,034 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_0.json, saved raw output -> results_3qHkcs3kG44/chunk_0_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_18.json ...
⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_15.json, saved raw output -> results_3qHkcs3kG44/chunk_15_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_19.json ...
⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_16.json, saved raw output -> results_3qHkcs3kG44/chunk_16_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_2.json ...


2025-07-16 10:43:47,813 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:43:47,931 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:43:47,960 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_17.json, saved raw output -> results_3qHkcs3kG44/chunk_17_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_3.json ...


2025-07-16 10:43:48,061 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:48,062 INFO: Retrying request to /chat/completions in 8.192000 seconds


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_1.json, saved raw output -> results_3qHkcs3kG44/chunk_1_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_4.json ...


2025-07-16 10:43:48,315 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:48,316 INFO: Retrying request to /chat/completions in 3.802000 seconds


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_19.json, saved raw output -> results_3qHkcs3kG44/chunk_19_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_5.json ...


2025-07-16 10:43:49,121 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:49,125 INFO: Retrying request to /chat/completions in 4.822000 seconds


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_18.json, saved raw output -> results_3qHkcs3kG44/chunk_18_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_6.json ...


2025-07-16 10:43:49,361 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:49,363 INFO: Retrying request to /chat/completions in 2.254000 seconds


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_2.json, saved raw output -> results_3qHkcs3kG44/chunk_2_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_7.json ...


2025-07-16 10:43:50,608 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:50,610 INFO: Retrying request to /chat/completions in 7.595000 seconds
2025-07-16 10:43:51,885 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:51,887 INFO: Retrying request to /chat/completions in 1.214000 seconds
2025-07-16 10:43:52,383 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:52,385 INFO: Retrying request to /chat/completions in 2.262000 seconds
2025-07-16 10:43:54,649 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:54,651 INFO: Retrying request to /chat/completions in 3.296000 seconds
2025-07-16 10:43:54,906 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"


⚠️ Rate limit hit. Retry 1 in 2s...


2025-07-16 10:43:55,165 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:43:56,509 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:56,510 INFO: Retrying request to /chat/completions in 4.782000 seconds
2025-07-16 10:43:57,657 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_6.json, saved raw output -> results_3qHkcs3kG44/chunk_6_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_8.json ...


2025-07-16 10:43:58,183 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:58,186 INFO: Retrying request to /chat/completions in 1.694000 seconds
2025-07-16 10:43:58,521 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:43:58,523 INFO: Retrying request to /chat/completions in 6.540000 seconds
2025-07-16 10:43:58,556 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"


⚠️ Rate limit hit. Retry 1 in 2s...


2025-07-16 10:44:00,134 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:00,136 INFO: Retrying request to /chat/completions in 0.282000 seconds
2025-07-16 10:44:00,858 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:00,860 INFO: Retrying request to /chat/completions in 1.422000 seconds
2025-07-16 10:44:01,035 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:44:01,603 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"


⚠️ Rate limit hit. Retry 1 in 2s...
⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_8.json, saved raw output -> results_3qHkcs3kG44/chunk_8_result_raw.txt
🧠 Processing chunks_3qHkcs3kG44/chunk_9.json ...


2025-07-16 10:44:01,994 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:01,997 INFO: Retrying request to /chat/completions in 2.294000 seconds
2025-07-16 10:44:02,866 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:02,869 INFO: Retrying request to /chat/completions in 2.620000 seconds


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_4.json, saved raw output -> results_3qHkcs3kG44/chunk_4_result_raw.txt


2025-07-16 10:44:04,322 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:04,323 INFO: Retrying request to /chat/completions in 5.130000 seconds
2025-07-16 10:44:04,577 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:04,580 INFO: Retrying request to /chat/completions in 0.276000 seconds
2025-07-16 10:44:05,353 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"


⚠️ Rate limit hit. Retry 1 in 2s...


2025-07-16 10:44:05,718 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:44:05,731 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"


⚠️ Rate limit hit. Retry 2 in 4s...
⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_9.json, saved raw output -> results_3qHkcs3kG44/chunk_9_result_raw.txt


2025-07-16 10:44:07,944 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:07,947 INFO: Retrying request to /chat/completions in 4.670000 seconds
2025-07-16 10:44:09,700 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:09,702 INFO: Retrying request to /chat/completions in 3.496000 seconds
2025-07-16 10:44:10,311 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_5.json, saved raw output -> results_3qHkcs3kG44/chunk_5_result_raw.txt


2025-07-16 10:44:12,881 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:12,883 INFO: Retrying request to /chat/completions in 4.526000 seconds
2025-07-16 10:44:13,484 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"


⚠️ Rate limit hit. Retry 2 in 4s...


2025-07-16 10:44:17,891 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:17,893 INFO: Retrying request to /chat/completions in 0.212000 seconds
2025-07-16 10:44:17,908 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 10:44:18,393 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
2025-07-16 10:44:18,395 INFO: Retrying request to /chat/completions in 7.210000 seconds


✅ Saved result for chunks_3qHkcs3kG44/chunk_7.json -> results_3qHkcs3kG44/chunk_7_result.json


2025-07-16 10:44:26,597 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


⚠️ Failed to parse JSON for chunks_3qHkcs3kG44/chunk_3.json, saved raw output -> results_3qHkcs3kG44/chunk_3_result_raw.txt
