diff --git a/prerender-java/README.md b/prerender-java/README.md new file mode 100644 index 0000000..7aca8c7 --- /dev/null +++ b/prerender-java/README.md @@ -0,0 +1,111 @@ +# prerender-java + +Jakarta Servlet Filter for [Prerender.io](https://prerender.io). Intercepts requests from bots and crawlers and serves prerendered HTML, so your JavaScript-rendered app is fully indexable by search engines and social media scrapers. + +Compatible with any **Jakarta EE** application server — Tomcat 10+, Jetty 11+, Spring Boot 3+, Quarkus, Micronaut. + +Requires **Java 17+**. + +## Installation + +### Maven + +```xml + + io.prerender + prerender-java + 1.0.0 + +``` + +### Gradle + +```groovy +implementation 'io.prerender:prerender-java:1.0.0' +``` + +## Setup + +### Option 1: Environment variables (recommended) + +```bash +export PRERENDER_TOKEN=your-token +``` + +Register the filter in `web.xml`: + +```xml + + PrerenderFilter + io.prerender.PrerenderFilter + + + PrerenderFilter + /* + +``` + +### Option 2: web.xml init-params + +```xml + + PrerenderFilter + io.prerender.PrerenderFilter + + prerenderToken + your-token + + + + PrerenderFilter + /* + +``` + +### Spring Boot + +```java +@Bean +public FilterRegistrationBean prerenderFilter() { + FilterRegistrationBean registration = new FilterRegistrationBean<>(); + registration.setFilter(new PrerenderFilter()); + registration.addUrlPatterns("/*"); + registration.setOrder(Ordered.HIGHEST_PRECEDENCE); + return registration; +} +``` + +Set `PRERENDER_TOKEN` as an environment variable before starting the app. + +## Settings + +| Setting | Init-param | Env var | Default | +|---------|------------|---------|---------| +| Token | `prerenderToken` | `PRERENDER_TOKEN` | none | +| Service URL | `prerenderServiceUrl` | `PRERENDER_SERVICE_URL` | `https://service.prerender.io/` | + +Init-params take precedence over environment variables. + +## Self-hosted Prerender + +```bash +export PRERENDER_SERVICE_URL=http://your-prerender-server:3000 +``` + +## How it works + +Requests are prerendered when **all** of the following are true: + +- The HTTP method is `GET` +- The `User-Agent` matches a known bot/crawler (Googlebot, Bingbot, Twitterbot, GPTBot, ClaudeBot, etc.) + — OR the URL contains `_escaped_fragment_` + — OR the `X-Bufferbot` header is present +- The URL does not end with a static asset extension (`.js`, `.css`, `.png`, etc.) + +Everything else passes through to your normal servlet chain. + +If the Prerender service is unreachable, the filter falls back gracefully and serves the normal response. + +## License + +MIT diff --git a/prerender-java/pom.xml b/prerender-java/pom.xml new file mode 100644 index 0000000..2368bfc --- /dev/null +++ b/prerender-java/pom.xml @@ -0,0 +1,78 @@ + + + 4.0.0 + + io.prerender + prerender-java + 1.0.0 + jar + + prerender-java + Jakarta Servlet Filter for prerendering JavaScript-rendered pages via Prerender.io + https://github.com/prerender/integrations + + + + MIT License + https://opensource.org/licenses/MIT + + + + + scm:git:git@github.com:prerender/integrations.git + scm:git:git@github.com:prerender/integrations.git + https://github.com/prerender/integrations + + + + 17 + 17 + UTF-8 + + + + + jakarta.servlet + jakarta.servlet-api + 6.0.0 + provided + + + + org.junit.jupiter + junit-jupiter + 5.10.2 + test + + + org.mockito + mockito-core + 5.11.0 + test + + + org.mockito + mockito-junit-jupiter + 5.11.0 + test + + + org.wiremock + wiremock + 3.5.4 + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.2.5 + + + + diff --git a/prerender-java/src/main/java/io/prerender/PrerenderConfig.java b/prerender-java/src/main/java/io/prerender/PrerenderConfig.java new file mode 100644 index 0000000..5a85c97 --- /dev/null +++ b/prerender-java/src/main/java/io/prerender/PrerenderConfig.java @@ -0,0 +1,59 @@ +package io.prerender; + +import java.util.List; + +class PrerenderConfig { + + static final List CRAWLER_USER_AGENTS = List.of( + "googlebot", "yahoo", "bingbot", "baiduspider", + "facebookexternalhit", "twitterbot", "rogerbot", "linkedinbot", + "embedly", "quora link preview", "showyoubot", "outbrain", + "pinterest", "slackbot", "w3c_validator", "perplexity", + "oai-searchbot", "chatgpt-user", "gptbot", "claudebot", "amazonbot" + ); + + static final List EXTENSIONS_TO_IGNORE = List.of( + ".js", ".css", ".xml", ".less", ".png", ".jpg", ".jpeg", ".gif", + ".pdf", ".doc", ".txt", ".ico", ".rss", ".zip", ".mp3", ".rar", + ".exe", ".wmv", ".avi", ".ppt", ".mpg", ".mpeg", ".tif", ".wav", + ".mov", ".psd", ".ai", ".xls", ".mp4", ".m4a", ".swf", ".dat", + ".dmg", ".iso", ".flv", ".m4v", ".torrent", ".ttf", ".woff", ".svg" + ); + + private static final String DEFAULT_SERVICE_URL = "https://service.prerender.io/"; + + private final String token; + private final String serviceUrl; + + PrerenderConfig(String token, String serviceUrl) { + this.token = token; + this.serviceUrl = (serviceUrl != null && !serviceUrl.isBlank()) + ? serviceUrl + : DEFAULT_SERVICE_URL; + } + + static PrerenderConfig fromInitParams(String initToken, String initServiceUrl) { + return new PrerenderConfig( + resolve(initToken, "PRERENDER_TOKEN"), + resolve(initServiceUrl, "PRERENDER_SERVICE_URL") + ); + } + + private static String resolve(String initParam, String envVar) { + return (initParam != null && !initParam.isBlank()) ? initParam : System.getenv(envVar); + } + + String getToken() { return token; } + + String getServiceUrl() { return serviceUrl; } + + static boolean isBot(String userAgent) { + String ua = userAgent.toLowerCase(); + return CRAWLER_USER_AGENTS.stream().anyMatch(ua::contains); + } + + static boolean isStaticAsset(String path) { + String lower = path.toLowerCase(); + return EXTENSIONS_TO_IGNORE.stream().anyMatch(lower::endsWith); + } +} diff --git a/prerender-java/src/main/java/io/prerender/PrerenderFilter.java b/prerender-java/src/main/java/io/prerender/PrerenderFilter.java new file mode 100644 index 0000000..ccafe76 --- /dev/null +++ b/prerender-java/src/main/java/io/prerender/PrerenderFilter.java @@ -0,0 +1,105 @@ +package io.prerender; + +import jakarta.servlet.Filter; +import jakarta.servlet.FilterChain; +import jakarta.servlet.FilterConfig; +import jakarta.servlet.ServletException; +import jakarta.servlet.ServletRequest; +import jakarta.servlet.ServletResponse; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class PrerenderFilter implements Filter { + + private static final Logger logger = Logger.getLogger(PrerenderFilter.class.getName()); + + private HttpClient httpClient; + private PrerenderConfig config; + + public PrerenderFilter() {} + + PrerenderFilter(HttpClient httpClient, PrerenderConfig config) { + this.httpClient = httpClient; + this.config = config; + } + + @Override + public void init(FilterConfig filterConfig) { + this.httpClient = HttpClient.newHttpClient(); + this.config = PrerenderConfig.fromInitParams( + filterConfig.getInitParameter("prerenderToken"), + filterConfig.getInitParameter("prerenderServiceUrl") + ); + } + + @Override + public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) + throws IOException, ServletException { + HttpServletRequest httpReq = (HttpServletRequest) request; + HttpServletResponse httpRes = (HttpServletResponse) response; + + if (!shouldPrerender(httpReq)) { + chain.doFilter(request, response); + return; + } + + try { + sendPrerendered(httpReq, httpRes); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + chain.doFilter(request, response); + } catch (IOException e) { + logger.log(Level.WARNING, "Prerender service unreachable, falling back", e); + chain.doFilter(request, response); + } + } + + @Override + public void destroy() {} + + private boolean shouldPrerender(HttpServletRequest request) { + if (!"GET".equalsIgnoreCase(request.getMethod())) return false; + if (PrerenderConfig.isStaticAsset(request.getRequestURI())) return false; + if (request.getParameter("_escaped_fragment_") != null) return true; + if (request.getHeader("X-Bufferbot") != null) return true; + String ua = request.getHeader("User-Agent"); + return ua != null && !ua.isBlank() && PrerenderConfig.isBot(ua); + } + + private void sendPrerendered(HttpServletRequest request, HttpServletResponse response) + throws IOException, InterruptedException { + HttpResponse prerenderResponse = httpClient.send( + buildPrerenderRequest(buildApiUrl(request), request.getHeader("User-Agent")), + HttpResponse.BodyHandlers.ofString() + ); + response.setStatus(prerenderResponse.statusCode()); + response.getWriter().write(prerenderResponse.body()); + } + + private String buildApiUrl(HttpServletRequest request) { + String serviceUrl = config.getServiceUrl(); + if (!serviceUrl.endsWith("/")) serviceUrl += "/"; + String url = request.getRequestURL().toString(); + String qs = request.getQueryString(); + return serviceUrl + (qs != null && !qs.isBlank() ? url + "?" + qs : url); + } + + private HttpRequest buildPrerenderRequest(String apiUrl, String userAgent) { + HttpRequest.Builder builder = HttpRequest.newBuilder() + .uri(URI.create(apiUrl)) + .header("User-Agent", userAgent != null ? userAgent : "") + .GET(); + if (config.getToken() != null && !config.getToken().isBlank()) { + builder.header("X-Prerender-Token", config.getToken()); + } + return builder.build(); + } +} diff --git a/prerender-java/src/test/java/io/prerender/PrerenderFilterTest.java b/prerender-java/src/test/java/io/prerender/PrerenderFilterTest.java new file mode 100644 index 0000000..fa00801 --- /dev/null +++ b/prerender-java/src/test/java/io/prerender/PrerenderFilterTest.java @@ -0,0 +1,163 @@ +package io.prerender; + +import com.github.tomakehurst.wiremock.client.WireMock; +import com.github.tomakehurst.wiremock.core.WireMockConfiguration; +import com.github.tomakehurst.wiremock.junit5.WireMockExtension; +import jakarta.servlet.FilterChain; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.net.http.HttpClient; + +import static com.github.tomakehurst.wiremock.client.WireMock.*; +import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.wireMockConfig; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +class PrerenderFilterTest { + + private static final String BOT_UA = "Mozilla/5.0 (compatible; Googlebot/2.1)"; + private static final String BROWSER_UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"; + private static final String PRERENDERED_HTML = "prerendered"; + + @RegisterExtension + static WireMockExtension wireMock = WireMockExtension.newInstance() + .options(wireMockConfig().dynamicPort()) + .build(); + + @Mock private HttpServletRequest request; + @Mock private HttpServletResponse response; + @Mock private FilterChain chain; + + private StringWriter responseWriter; + private PrerenderFilter filter; + + @BeforeEach + void setUp() throws Exception { + wireMock.resetAll(); + responseWriter = new StringWriter(); + when(response.getWriter()).thenReturn(new PrintWriter(responseWriter)); + PrerenderConfig config = new PrerenderConfig(null, "http://localhost:" + wireMock.getPort()); + filter = new PrerenderFilter(HttpClient.newHttpClient(), config); + } + + @Test + void browserRequest_passesThrough() throws Exception { + when(request.getMethod()).thenReturn("GET"); + when(request.getRequestURI()).thenReturn("/"); + when(request.getParameter("_escaped_fragment_")).thenReturn(null); + when(request.getHeader("X-Bufferbot")).thenReturn(null); + when(request.getHeader("User-Agent")).thenReturn(BROWSER_UA); + + filter.doFilter(request, response, chain); + + verify(chain).doFilter(request, response); + verify(response, never()).setStatus(anyInt()); + } + + @Test + void botRequest_receivesPrerenderedResponse() throws Exception { + wireMock.stubFor(get(anyUrl()) + .willReturn(aResponse().withStatus(200).withBody(PRERENDERED_HTML))); + + when(request.getMethod()).thenReturn("GET"); + when(request.getRequestURI()).thenReturn("/"); + when(request.getParameter("_escaped_fragment_")).thenReturn(null); + when(request.getHeader("X-Bufferbot")).thenReturn(null); + when(request.getHeader("User-Agent")).thenReturn(BOT_UA); + when(request.getRequestURL()).thenReturn(new StringBuffer("http://example.com/")); + when(request.getQueryString()).thenReturn(null); + + filter.doFilter(request, response, chain); + + verify(response).setStatus(200); + verify(chain, never()).doFilter(any(), any()); + assertEquals(PRERENDERED_HTML, responseWriter.toString()); + } + + @Test + void botRequest_staticAsset_passesThrough() throws Exception { + when(request.getMethod()).thenReturn("GET"); + when(request.getRequestURI()).thenReturn("/styles.css"); + + filter.doFilter(request, response, chain); + + verify(chain).doFilter(request, response); + verify(response, never()).setStatus(anyInt()); + } + + @Test + void escapedFragment_triggersPrerender() throws Exception { + wireMock.stubFor(get(anyUrl()) + .willReturn(aResponse().withStatus(200).withBody(PRERENDERED_HTML))); + + when(request.getMethod()).thenReturn("GET"); + when(request.getRequestURI()).thenReturn("/"); + when(request.getParameter("_escaped_fragment_")).thenReturn(""); + when(request.getHeader("User-Agent")).thenReturn(BROWSER_UA); + when(request.getRequestURL()).thenReturn(new StringBuffer("http://example.com/")); + when(request.getQueryString()).thenReturn("_escaped_fragment_="); + + filter.doFilter(request, response, chain); + + verify(response).setStatus(200); + verify(chain, never()).doFilter(any(), any()); + } + + @Test + void xBufferbot_triggersPrerender() throws Exception { + wireMock.stubFor(get(anyUrl()) + .willReturn(aResponse().withStatus(200).withBody(PRERENDERED_HTML))); + + when(request.getMethod()).thenReturn("GET"); + when(request.getRequestURI()).thenReturn("/"); + when(request.getParameter("_escaped_fragment_")).thenReturn(null); + when(request.getHeader("X-Bufferbot")).thenReturn("true"); + when(request.getHeader("User-Agent")).thenReturn(BROWSER_UA); + when(request.getRequestURL()).thenReturn(new StringBuffer("http://example.com/")); + when(request.getQueryString()).thenReturn(null); + + filter.doFilter(request, response, chain); + + verify(response).setStatus(200); + verify(chain, never()).doFilter(any(), any()); + } + + @Test + void postRequest_passesThrough() throws Exception { + when(request.getMethod()).thenReturn("POST"); + + filter.doFilter(request, response, chain); + + verify(chain).doFilter(request, response); + verify(response, never()).setStatus(anyInt()); + } + + @Test + void networkError_fallsBackToNormalResponse() throws Exception { + wireMock.stubFor(get(anyUrl()) + .willReturn(aResponse().withFault(com.github.tomakehurst.wiremock.http.Fault.CONNECTION_RESET_BY_PEER))); + + when(request.getMethod()).thenReturn("GET"); + when(request.getRequestURI()).thenReturn("/"); + when(request.getParameter("_escaped_fragment_")).thenReturn(null); + when(request.getHeader("X-Bufferbot")).thenReturn(null); + when(request.getHeader("User-Agent")).thenReturn(BOT_UA); + when(request.getRequestURL()).thenReturn(new StringBuffer("http://example.com/")); + when(request.getQueryString()).thenReturn(null); + + filter.doFilter(request, response, chain); + + verify(chain).doFilter(request, response); + } +}