@@ -780,6 +780,10 @@ async def _warm_pool(self):
780780 # Start H2 connection (runs alongside H1 pool)
781781 if self ._h2 :
782782 self ._spawn (self ._h2_connect_and_warm ())
783+ # H1 container keepalive — runs unconditionally so the Apps Script
784+ # container never goes cold even when H2 is unavailable. When H2 IS
785+ # active its _keepalive_loop skips the ping; they do not double-fire.
786+ self ._spawn (self ._h1_container_keepalive ())
783787
784788 def _spawn (self , coro ) -> asyncio .Task :
785789 """Create a task and keep a strong reference for clean cancellation."""
@@ -913,6 +917,34 @@ async def _keepalive_loop(self):
913917 except Exception as e :
914918 log .debug ("Keepalive failed: %s" , e )
915919
920+ async def _h1_container_keepalive (self ):
921+ """Keep the Apps Script container warm via H1 when H2 keepalive is absent.
922+
923+ H2's _keepalive_loop handles pings when H2 is connected. When H2 is
924+ unavailable (library not installed, connection dropped) this coroutine
925+ takes over so the container never goes cold and causes slow cold-starts
926+ on the first video / streaming request after an idle period.
927+ """
928+ while True :
929+ try :
930+ await asyncio .sleep (240 ) # same cadence as H2 keepalive
931+ if self ._h2_available ():
932+ continue # H2 keepalive is already pinging, skip
933+ payload = self ._build_payload (
934+ "HEAD" , "http://example.com/" , {}, b""
935+ )
936+ t0 = time .perf_counter ()
937+ # _relay_payload_h1 has its own per-attempt timeout internally;
938+ # no outer wait_for needed (and adding one with a shorter
939+ # timeout would cancel valid in-progress relays early).
940+ await self ._relay_payload_h1 (payload )
941+ dt = (time .perf_counter () - t0 ) * 1000
942+ log .debug ("H1 container keepalive: %.0fms" , dt )
943+ except asyncio .CancelledError :
944+ break
945+ except Exception as exc :
946+ log .debug ("H1 container keepalive failed: %s" , exc )
947+
916948 async def _do_warm (self ):
917949 """Open WARM_POOL_COUNT connections in parallel — failures are fine."""
918950 count = WARM_POOL_COUNT
@@ -1666,13 +1698,20 @@ async def _relay_with_retry(self, payload: dict) -> bytes:
16661698 log .debug ("H2 relay failed (%s), reconnecting" , e )
16671699 try :
16681700 await self ._h2 .reconnect ()
1669- self ._record_h2_success ()
1701+ # Do NOT record success here — only a successful relay
1702+ # response proves the connection works. Recording
1703+ # success after reconnect was resetting the failure
1704+ # streak and causing an infinite reconnect storm.
16701705 except Exception as reconnect_exc :
16711706 self ._record_h2_failure (reconnect_exc )
16721707 log .warning ("H2 reconnect failed, falling back to H1" )
16731708 break
16741709 else :
1675- raise
1710+ # Last H2 attempt failed — fall through to H1 rather
1711+ # than raising here, which would bypass H1 entirely.
1712+ log .debug ("H2 relay failed on final attempt (%s), "
1713+ "falling back to H1" , e )
1714+ break
16761715
16771716 # HTTP/1.1 fallback (pool-based)
16781717 async with self ._semaphore :
0 commit comments