11import { createError , defineEventHandler , getQuery , setHeader } from 'h3'
2- import { useRuntimeConfig } from 'nitropack/runtime'
2+ import { defineCachedFunction , useRuntimeConfig } from 'nitropack/runtime'
3+ import { $fetch } from 'ofetch'
34import { ELEMENT_NODE , parse , renderSync , TEXT_NODE , walkSync } from 'ultrahtml'
45import { createCachedJsonFetch } from './utils/cached-upstream'
5- import { proxyAssetUrl , rewriteUrl , rewriteUrlsInText , RSRC_RE , scopeCss } from './utils/instagram-embed'
6+ import { isEmbedShell , proxyAssetUrl , rewriteUrl , rewriteUrlsInText , RSRC_RE , scopeCss } from './utils/instagram-embed'
67import { withSigning } from './utils/withSigning'
78
89export { proxyAssetUrl , proxyImageUrl , rewriteUrl , rewriteUrlsInText , scopeCss } from './utils/instagram-embed'
@@ -12,10 +13,35 @@ const SRCSET_SPLIT_RE = /\s+/
1213
1314// Instagram embed HTML is semi-fresh (likes, captions may update); 10min
1415// matches the outbound Cache-Control header and dedupes per post+captions.
15- const cachedEmbedFetch = createCachedJsonFetch < string > (
16- 'nuxt-scripts-instagram-embed' ,
17- 600 ,
18- url => url ,
16+ // Throws on shell responses so nitro doesn't cache them.
17+ const cachedEmbedFetch = defineCachedFunction (
18+ async ( url : string , headers : Record < string , string > ) : Promise < string > => {
19+ const html = await $fetch < string > ( url , { timeout : 10000 , headers } )
20+ if ( isEmbedShell ( html ) ) {
21+ throw createError ( {
22+ statusCode : 502 ,
23+ statusMessage : 'Instagram returned an empty embed shell (post unavailable or upstream rate-limiting)' ,
24+ } )
25+ }
26+ return html
27+ } ,
28+ {
29+ // v2 — bump to evict any v1 entries that cached the empty JS shell
30+ // before the shell-detection / UA fix landed.
31+ name : 'nuxt-scripts-instagram-embed-v2' ,
32+ maxAge : 600 ,
33+ swr : true ,
34+ staleMaxAge : 600 ,
35+ // Vary on headers too — Instagram's response is UA-dependent, so
36+ // different callers (e.g. unit tests, future UA changes) must not
37+ // collide on the same key.
38+ getKey : ( url : string , headers : Record < string , string > ) => {
39+ const parts = [ url ]
40+ for ( const [ k , v ] of Object . entries ( headers ) . sort ( ( [ a ] , [ b ] ) => a . localeCompare ( b ) ) )
41+ parts . push ( `${ k } =${ v } ` )
42+ return parts . join ( '|' )
43+ } ,
44+ } ,
1945)
2046
2147// Static CSS from Instagram's CDN is versioned; 24h cache is safe because the
@@ -79,10 +105,12 @@ export default withSigning(defineEventHandler(async (event) => {
79105 const embedUrl = `${ cleanUrl } embed/${ captions ? 'captioned/' : '' } `
80106
81107 const html = await cachedEmbedFetch ( embedUrl , {
82- headers : {
83- 'Accept' : 'text/html' ,
84- 'User-Agent' : 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' ,
85- } ,
108+ 'Accept' : 'text/html' ,
109+ // Meta's own crawler UA. Googlebot's UA is also accepted by Instagram
110+ // but is IP-verified, so it fails from hosts outside Google's ranges
111+ // (e.g. Cloudflare/Vercel) and Instagram serves the JS shell instead
112+ // of the SSR'd post.
113+ 'User-Agent' : 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' ,
86114 } ) . catch ( ( error : any ) => {
87115 throw createError ( {
88116 statusCode : error . statusCode || 500 ,
0 commit comments