chore: replace safe browsing with web risk (#2305)

* chore: replace safe browsing with web risk * chore: add SOCIAL_ENGINEERING_EXTENDED_COVERAGE
opengovsg · Mar 21, 2024 · 1e1f53b · 1e1f53b
1 parent 7c8cea6
commit 1e1f53b
Show file tree

Hide file tree

Showing 7 changed files with 196 additions and 212 deletions.
diff --git a/src/server/modules/threat/interfaces/SafeBrowsingRepository.ts b/src/server/modules/threat/interfaces/SafeBrowsingRepository.ts
@@ -1,17 +1,17 @@
-import { HasCacheDuration } from '../../../repositories/types'
+import { WebRiskThreat } from '../../../repositories/types'
 
 export interface SafeBrowsingRepository {
   /**
    * Sets or replaces the Safe Browsing threat matches associated with a URL.
    * @param  {string} url The URL.
-   * @param  {Array<HasCacheDuration>} matches The threat matches returned by Safe Browsing for the URL.
+   * @param  {WebRiskThreat} threat The threat match returned by Safe Browsing for the URL.
    */
-  set(url: string, matches: HasCacheDuration[]): Promise<void>
+  set(url: string, threat: WebRiskThreat): Promise<void>
 
   /**
    * Retrieves Safe Browsing threat matches for a URL, if any.
    * @param  {string} url The URL.
-   * @returns {Promise<Array<HasCacheDuration> | null>} An array of threat matches, or null if none.
+   * @returns {Promise<WebRiskThreat | null>} Threat match or null if none.
    */
-  get(url: string): Promise<HasCacheDuration[] | null>
+  get(url: string): Promise<WebRiskThreat | null>
 }
diff --git a/src/server/modules/threat/mappers/SafeBrowsingMapper.ts b/src/server/modules/threat/mappers/SafeBrowsingMapper.ts
@@ -1,27 +1,25 @@
 /* eslint-disable class-methods-use-this, lines-between-class-members, no-dupe-class-members */
 import { injectable } from 'inversify'
 import { TwoWayMapper } from '../../../mappers/TwoWayMapper'
-import { HasCacheDuration } from '../../../repositories/types'
+import { WebRiskThreat } from '../../../repositories/types'
 
 @injectable()
-export class SafeBrowsingMapper
-  implements TwoWayMapper<HasCacheDuration[], string>
-{
-  persistenceToDto(matches: string): HasCacheDuration[]
-  persistenceToDto(matches: string | null): HasCacheDuration[] | null {
-    if (!matches) {
+export class SafeBrowsingMapper implements TwoWayMapper<WebRiskThreat, string> {
+  persistenceToDto(threat: string): WebRiskThreat
+  persistenceToDto(threat: string | null): WebRiskThreat | null {
+    if (!threat) {
       return null
     }
-    return JSON.parse(matches)
+    return JSON.parse(threat)
   }
 
-  dtoToPersistence(matches: HasCacheDuration[]): string
-  dtoToPersistence(matches: HasCacheDuration[] | null): string | null {
-    if (!matches) {
+  dtoToPersistence(threat: WebRiskThreat): string
+  dtoToPersistence(threat: WebRiskThreat | null): string | null {
+    if (!threat) {
       return null
     }
 
-    return JSON.stringify(matches)
+    return JSON.stringify(threat)
   }
 }
 

diff --git a/src/server/modules/threat/repositories/SafeBrowsingRepository.ts b/src/server/modules/threat/repositories/SafeBrowsingRepository.ts
@@ -2,46 +2,43 @@
 
 import { inject, injectable } from 'inversify'
 import { safeBrowsingClient } from '../../../redis'
-import { HasCacheDuration } from '../../../repositories/types'
 import * as interfaces from '../interfaces'
 import { TwoWayMapper } from '../../../mappers/TwoWayMapper'
 import { DependencyIds } from '../../../constants'
 import { NotFoundError } from '../../../util/error'
+import { WebRiskThreat } from '../../../repositories/types'
+
+// set default threat cache
+const DEFAULT_CACHE_DURATION_IN_S = 300
 
 @injectable()
 export class SafeBrowsingRepository
   implements interfaces.SafeBrowsingRepository
 {
-  private safeBrowsingMapper: TwoWayMapper<HasCacheDuration[], string>
+  private safeBrowsingMapper: TwoWayMapper<WebRiskThreat, string>
 
   public constructor(
     @inject(DependencyIds.safeBrowsingMapper)
-    safeBrowsingMapper: TwoWayMapper<HasCacheDuration[], string>,
+    safeBrowsingMapper: TwoWayMapper<WebRiskThreat, string>,
   ) {
     this.safeBrowsingMapper = safeBrowsingMapper
   }
 
-  public set: (url: string, matches: HasCacheDuration[]) => Promise<void> = (
+  public set: (url: string, threat: WebRiskThreat) => Promise<void> = (
     url,
-    matches,
+    threat,
   ) => {
     return new Promise((resolve, reject) => {
-      if (!matches.length || !matches[0]) {
+      if (!threat) {
         reject(
-          new NotFoundError(`No matches found for ${url}, should not persist`),
+          new NotFoundError(`No threat found for ${url}, should not persist`),
         )
       }
-      // Threat matches from Safe Browsing API specify a
-      // cache duration in seconds, with an 's' suffix
-      const [{ cacheDuration }] = matches
-      const expiryInSeconds = Number(
-        cacheDuration.substring(0, cacheDuration.length - 1),
-      )
       safeBrowsingClient.set(
         url,
-        this.safeBrowsingMapper.dtoToPersistence(matches),
+        this.safeBrowsingMapper.dtoToPersistence(threat),
         'EX',
-        expiryInSeconds,
+        DEFAULT_CACHE_DURATION_IN_S,
         (redisSetError) => {
           if (redisSetError) {
             reject(redisSetError)
@@ -54,7 +51,7 @@ export class SafeBrowsingRepository
     })
   }
 
-  public get: (url: string) => Promise<HasCacheDuration[] | null> = (url) => {
+  public get: (url: string) => Promise<WebRiskThreat | null> = (url) => {
     return new Promise((resolve, reject) => {
       safeBrowsingClient.get(url, (redisError, string) => {
         if (redisError) {

diff --git a/src/server/modules/threat/repositories/__tests__/SafeBrowsingRepository.test.ts b/src/server/modules/threat/repositories/__tests__/SafeBrowsingRepository.test.ts
@@ -14,15 +14,14 @@ const { SafeBrowsingRepository } = require('..')
 
 const repository = new SafeBrowsingRepository(new SafeBrowsingMapper())
 
-const durationInSeconds = 1
+const durationInSeconds = 300
 const url = 'https://example.com'
-const matches = [
-  {
-    cacheDuration: `${durationInSeconds}s`,
-  },
-]
+const threat = {
+  threatTypes: ['MALWARE'],
+  expireTime: '2024-03-20T05:29:41.898456500Z',
+}
 
-describe('otp repository redis test', () => {
+describe('safe browsing repository redis test', () => {
   beforeEach(async () => {
     await new Promise<void>((resolve) => {
       redisMockClient.flushall(() => resolve())
@@ -32,8 +31,8 @@ describe('otp repository redis test', () => {
   })
 
   it('returns a value if present', async () => {
-    redisMockClient.set(url, JSON.stringify(matches))
-    await expect(repository.get(url)).resolves.toStrictEqual(matches)
+    redisMockClient.set(url, JSON.stringify(threat))
+    await expect(repository.get(url)).resolves.toStrictEqual(threat)
     expect(redisMockClient.get).toBeCalledWith(url, expect.any(Function))
   })
 
@@ -43,10 +42,10 @@ describe('otp repository redis test', () => {
   })
 
   it('sets a value if specified', async () => {
-    await repository.set(url, matches)
+    await repository.set(url, threat)
     expect(redisMockClient.set).toBeCalledWith(
       url,
-      JSON.stringify(matches),
+      JSON.stringify(threat),
       'EX',
       durationInSeconds,
       expect.any(Function),

diff --git a/src/server/modules/threat/services/SafeBrowsingService.ts b/src/server/modules/threat/services/SafeBrowsingService.ts
@@ -5,9 +5,18 @@ import { UrlThreatScanService } from '../interfaces'
 import { logger, safeBrowsingKey, safeBrowsingLogOnly } from '../../../config'
 import { SafeBrowsingRepository } from '../interfaces/SafeBrowsingRepository'
 import { DependencyIds } from '../../../constants'
-
-const ENDPOINT = `https://safebrowsing.googleapis.com/v4/threatMatches:find?key=${safeBrowsingKey}`
-const SAFE_BROWSING_LIMIT = 500
+import { WebRiskThreat } from '../../../repositories/types'
+
+const ENDPOINT = `https://webrisk.googleapis.com/v1/uris:search?key=${safeBrowsingKey}`
+const WEB_RISK_THREAT_TYPES = [
+  'SOCIAL_ENGINEERING',
+  'MALWARE',
+  'UNWANTED_SOFTWARE',
+  // SOCIAL_ENGINEERING_EXTENDED_COVERAGE improves coverage of malicious urls
+  // but may have small amount (<10%) of potential false positives,
+  // see details https://cloud.google.com/web-risk/docs/extended-coverage
+  'SOCIAL_ENGINEERING_EXTENDED_COVERAGE',
+]
 
 @injectable()
 export class SafeBrowsingService implements UrlThreatScanService {
@@ -22,117 +31,82 @@ export class SafeBrowsingService implements UrlThreatScanService {
     this.safeBrowsingRepository = safeBrowsingRepository
   }
 
-  /**
-   * Request template for Safe Browsing. Threat lists found
-   * at /v4/threatLists and are keyed by type, platform and entry.
-   * Both ANY_PLATFORM and all the platforms are specified, the latter
-   * so that we can look up the IP_RANGE lists, which are not keyed by
-   * ANY_PLATFORM.
-   */
-  private requestTemplate = Object.freeze({
-    client: {
-      clientId: 'GoGovSG',
-      clientVersion: '1.0.0',
-    },
-    threatInfo: {
-      threatTypes: [
-        'POTENTIALLY_HARMFUL_APPLICATION',
-        'UNWANTED_SOFTWARE',
-        'MALWARE',
-        'SOCIAL_ENGINEERING',
-      ],
-      platformTypes: ['ANY_PLATFORM', 'WINDOWS', 'LINUX', 'OSX'],
-      threatEntryTypes: ['URL', 'EXECUTABLE', 'IP_RANGE'],
-    },
-  })
-
   public isThreat: (url: string) => Promise<boolean> = async (url) => {
     if (!safeBrowsingKey) {
       logger.warn(`No Safe Browsing API key provided. Not scanning url: ${url}`)
       return false
     }
-    let matches = await this.safeBrowsingRepository.get(url)
-    if (!matches) {
-      matches = await this.lookup([url])
+    let threat = await this.safeBrowsingRepository.get(url)
+    if (!threat) {
+      threat = await this.fetchWebRiskData(url)
     }
-    return !safeBrowsingLogOnly && Boolean(matches)
+    return !safeBrowsingLogOnly && Boolean(threat)
   }
 
-  private async lookup(urls: string[]) {
-    let matches = null
-    const request = { ...this.requestTemplate } as any
+  private constructWebRiskEndpoint: (
+    url: string,
+    threatTypes: string[],
+  ) => string = (url, threatTypes) => {
+    const encodedUrl = encodeURIComponent(url)
 
-    request.threatInfo.threatEntries = urls.map((url) => {
-      return { url }
-    })
+    const threatTypesQuery = threatTypes
+      .map((type: string) => `threatTypes=${type}`)
+      .join('&')
 
-    const response = await fetch(ENDPOINT, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify(request),
-    })
-    const body = await response.json()
-    if (!response.ok) {
-      const error = new Error(
-        `Safe Browsing failure:\tError: ${response.statusText}\t message: ${body.error.message}`,
-      )
-      if (safeBrowsingLogOnly) {
-        logger.error(error.message)
-      } else {
-        throw error
-      }
-    } else if (body?.matches) {
-      const prefix = safeBrowsingLogOnly
-        ? 'Considered threat by Safe Browsing but ignoring'
-        : 'Malicious link content'
+    return `${ENDPOINT}&${threatTypesQuery}&uri=${encodedUrl}`
+  }
 
-      /**
-       * Result.matches is a list of threat matches for all matched urls, example:
-       * [{ "threatType": "SOCIAL_ENGINEERING", "threat": { "url": "https://testsafebrowsing.appspot.com/s/phishing.html" } }]
-       * we use _.groupBy to group threat matches by url so that the serialized form
-       * matches what is currently written into our repository for single url lookups.
-       */
-      const matchesByUrl = _.groupBy(body.matches, (obj) => obj.threat.url)
-      Object.entries(matchesByUrl).forEach(
-        ([url, threatMatch]: [string, any]) => {
+  private fetchWebRiskData: (url: string) => Promise<WebRiskThreat | null> =
+    async (url) => {
+      const endpoint = this.constructWebRiskEndpoint(url, WEB_RISK_THREAT_TYPES)
+
+      const response = await fetch(endpoint, { method: 'GET' })
+      const body = await response.json()
+      if (!response.ok) {
+        const error = new Error(
+          `Safe Browsing failure:\tError: ${response.statusText}\t message: ${body.error.message}`,
+        )
+        if (safeBrowsingLogOnly) {
+          logger.error(error.message)
+        } else {
+          throw error
+        }
+      } else if (body?.threat) {
+        const prefix = safeBrowsingLogOnly
+          ? 'Considered threat by Safe Browsing but ignoring'
+          : 'Malicious link content'
+
+        /**
+         * Result.threat.threatTypes is a list of threat matches for a url, example:
+         * "threat": { "threatTypes": ["MALWARE"], "expireTime": "2024-03-20T05:29:41.898456500Z"}.
+         */
+        logger.warn(
+          `${prefix}: ${url} yields ${JSON.stringify(body.threat, null, 2)}`,
+        )
+        try {
+          // writing to the safeBrowsingRepository should be non-blocking
+          this.safeBrowsingRepository.set(url, body.threat)
+        } catch (e) {
+          // writes are wrapped in a try-catch block to prevent errors from bubbling up
           logger.warn(
-            `${prefix}: ${url} yields ${JSON.stringify(threatMatch, null, 2)}`,
+            `failed to set ${url} in safeBrowsingRepository, skipping`,
           )
-          try {
-            // writing to the safeBrowsingRepository should be non-blocking
-            this.safeBrowsingRepository.set(url, threatMatch)
-          } catch (e) {
-            // writes are wrapped in a try-catch block to prevent errors from bubbling up
-            logger.warn(
-              `failed to set ${url} in safeBrowsingRepository, skipping`,
-            )
-          }
-        },
-      )
-      matches = body.matches
+        }
+        return body.threat
+      }
+      return null
     }
-    return matches
-  }
 
-  public isThreatBulk: (
-    urls: string[],
-    batchSize?: number,
-  ) => Promise<boolean> = async (urls, batchSize = SAFE_BROWSING_LIMIT) => {
+  public isThreatBulk: (urls: string[]) => Promise<boolean> = async (urls) => {
     if (!safeBrowsingKey) {
       logger.warn(`No Safe Browsing API key provided. Not scanning in bulk`)
       return false
     }
 
-    const urlChunks: string[][] = []
-
-    for (let i = 0; i < urls.length; i += batchSize) {
-      urlChunks.push(urls.slice(i, i + batchSize))
-    }
-
-    const matches = await Promise.all(
-      urlChunks.map((urlChunk) => this.lookup(urlChunk)),
+    const results = await Promise.all(
+      urls.map((url) => this.fetchWebRiskData(url)),
     )
-    const isThreat = matches.some((match) => Boolean(match) === true)
+    const isThreat = results.some((result) => Boolean(result))
     if (!safeBrowsingLogOnly && isThreat) {
       return true
     }