Skip to content

Commit

Permalink
chore: replace safe browsing with web risk (#2305)
Browse files Browse the repository at this point in the history
* chore: replace safe browsing with web risk

* chore: add SOCIAL_ENGINEERING_EXTENDED_COVERAGE
  • Loading branch information
gweiying committed Mar 21, 2024
1 parent 7c8cea6 commit 1e1f53b
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 212 deletions.
10 changes: 5 additions & 5 deletions src/server/modules/threat/interfaces/SafeBrowsingRepository.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import { HasCacheDuration } from '../../../repositories/types'
import { WebRiskThreat } from '../../../repositories/types'

export interface SafeBrowsingRepository {
/**
* Sets or replaces the Safe Browsing threat matches associated with a URL.
* @param {string} url The URL.
* @param {Array<HasCacheDuration>} matches The threat matches returned by Safe Browsing for the URL.
* @param {WebRiskThreat} threat The threat match returned by Safe Browsing for the URL.
*/
set(url: string, matches: HasCacheDuration[]): Promise<void>
set(url: string, threat: WebRiskThreat): Promise<void>

/**
* Retrieves Safe Browsing threat matches for a URL, if any.
* @param {string} url The URL.
* @returns {Promise<Array<HasCacheDuration> | null>} An array of threat matches, or null if none.
* @returns {Promise<WebRiskThreat | null>} Threat match or null if none.
*/
get(url: string): Promise<HasCacheDuration[] | null>
get(url: string): Promise<WebRiskThreat | null>
}
22 changes: 10 additions & 12 deletions src/server/modules/threat/mappers/SafeBrowsingMapper.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
/* eslint-disable class-methods-use-this, lines-between-class-members, no-dupe-class-members */
import { injectable } from 'inversify'
import { TwoWayMapper } from '../../../mappers/TwoWayMapper'
import { HasCacheDuration } from '../../../repositories/types'
import { WebRiskThreat } from '../../../repositories/types'

@injectable()
export class SafeBrowsingMapper
implements TwoWayMapper<HasCacheDuration[], string>
{
persistenceToDto(matches: string): HasCacheDuration[]
persistenceToDto(matches: string | null): HasCacheDuration[] | null {
if (!matches) {
export class SafeBrowsingMapper implements TwoWayMapper<WebRiskThreat, string> {
persistenceToDto(threat: string): WebRiskThreat
persistenceToDto(threat: string | null): WebRiskThreat | null {
if (!threat) {
return null
}
return JSON.parse(matches)
return JSON.parse(threat)
}

dtoToPersistence(matches: HasCacheDuration[]): string
dtoToPersistence(matches: HasCacheDuration[] | null): string | null {
if (!matches) {
dtoToPersistence(threat: WebRiskThreat): string
dtoToPersistence(threat: WebRiskThreat | null): string | null {
if (!threat) {
return null
}

return JSON.stringify(matches)
return JSON.stringify(threat)
}
}

Expand Down
29 changes: 13 additions & 16 deletions src/server/modules/threat/repositories/SafeBrowsingRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,43 @@

import { inject, injectable } from 'inversify'
import { safeBrowsingClient } from '../../../redis'
import { HasCacheDuration } from '../../../repositories/types'
import * as interfaces from '../interfaces'
import { TwoWayMapper } from '../../../mappers/TwoWayMapper'
import { DependencyIds } from '../../../constants'
import { NotFoundError } from '../../../util/error'
import { WebRiskThreat } from '../../../repositories/types'

// set default threat cache
const DEFAULT_CACHE_DURATION_IN_S = 300

@injectable()
export class SafeBrowsingRepository
implements interfaces.SafeBrowsingRepository
{
private safeBrowsingMapper: TwoWayMapper<HasCacheDuration[], string>
private safeBrowsingMapper: TwoWayMapper<WebRiskThreat, string>

public constructor(
@inject(DependencyIds.safeBrowsingMapper)
safeBrowsingMapper: TwoWayMapper<HasCacheDuration[], string>,
safeBrowsingMapper: TwoWayMapper<WebRiskThreat, string>,
) {
this.safeBrowsingMapper = safeBrowsingMapper
}

public set: (url: string, matches: HasCacheDuration[]) => Promise<void> = (
public set: (url: string, threat: WebRiskThreat) => Promise<void> = (
url,
matches,
threat,
) => {
return new Promise((resolve, reject) => {
if (!matches.length || !matches[0]) {
if (!threat) {
reject(
new NotFoundError(`No matches found for ${url}, should not persist`),
new NotFoundError(`No threat found for ${url}, should not persist`),
)
}
// Threat matches from Safe Browsing API specify a
// cache duration in seconds, with an 's' suffix
const [{ cacheDuration }] = matches
const expiryInSeconds = Number(
cacheDuration.substring(0, cacheDuration.length - 1),
)
safeBrowsingClient.set(
url,
this.safeBrowsingMapper.dtoToPersistence(matches),
this.safeBrowsingMapper.dtoToPersistence(threat),
'EX',
expiryInSeconds,
DEFAULT_CACHE_DURATION_IN_S,
(redisSetError) => {
if (redisSetError) {
reject(redisSetError)
Expand All @@ -54,7 +51,7 @@ export class SafeBrowsingRepository
})
}

public get: (url: string) => Promise<HasCacheDuration[] | null> = (url) => {
public get: (url: string) => Promise<WebRiskThreat | null> = (url) => {
return new Promise((resolve, reject) => {
safeBrowsingClient.get(url, (redisError, string) => {
if (redisError) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@ const { SafeBrowsingRepository } = require('..')

const repository = new SafeBrowsingRepository(new SafeBrowsingMapper())

const durationInSeconds = 1
const durationInSeconds = 300
const url = 'https://example.com'
const matches = [
{
cacheDuration: `${durationInSeconds}s`,
},
]
const threat = {
threatTypes: ['MALWARE'],
expireTime: '2024-03-20T05:29:41.898456500Z',
}

describe('otp repository redis test', () => {
describe('safe browsing repository redis test', () => {
beforeEach(async () => {
await new Promise<void>((resolve) => {
redisMockClient.flushall(() => resolve())
Expand All @@ -32,8 +31,8 @@ describe('otp repository redis test', () => {
})

it('returns a value if present', async () => {
redisMockClient.set(url, JSON.stringify(matches))
await expect(repository.get(url)).resolves.toStrictEqual(matches)
redisMockClient.set(url, JSON.stringify(threat))
await expect(repository.get(url)).resolves.toStrictEqual(threat)
expect(redisMockClient.get).toBeCalledWith(url, expect.any(Function))
})

Expand All @@ -43,10 +42,10 @@ describe('otp repository redis test', () => {
})

it('sets a value if specified', async () => {
await repository.set(url, matches)
await repository.set(url, threat)
expect(redisMockClient.set).toBeCalledWith(
url,
JSON.stringify(matches),
JSON.stringify(threat),
'EX',
durationInSeconds,
expect.any(Function),
Expand Down
160 changes: 67 additions & 93 deletions src/server/modules/threat/services/SafeBrowsingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,18 @@ import { UrlThreatScanService } from '../interfaces'
import { logger, safeBrowsingKey, safeBrowsingLogOnly } from '../../../config'
import { SafeBrowsingRepository } from '../interfaces/SafeBrowsingRepository'
import { DependencyIds } from '../../../constants'

const ENDPOINT = `https://safebrowsing.googleapis.com/v4/threatMatches:find?key=${safeBrowsingKey}`
const SAFE_BROWSING_LIMIT = 500
import { WebRiskThreat } from '../../../repositories/types'

const ENDPOINT = `https://webrisk.googleapis.com/v1/uris:search?key=${safeBrowsingKey}`
const WEB_RISK_THREAT_TYPES = [
'SOCIAL_ENGINEERING',
'MALWARE',
'UNWANTED_SOFTWARE',
// SOCIAL_ENGINEERING_EXTENDED_COVERAGE improves coverage of malicious urls
// but may have small amount (<10%) of potential false positives,
// see details https://cloud.google.com/web-risk/docs/extended-coverage
'SOCIAL_ENGINEERING_EXTENDED_COVERAGE',
]

@injectable()
export class SafeBrowsingService implements UrlThreatScanService {
Expand All @@ -22,117 +31,82 @@ export class SafeBrowsingService implements UrlThreatScanService {
this.safeBrowsingRepository = safeBrowsingRepository
}

/**
* Request template for Safe Browsing. Threat lists found
* at /v4/threatLists and are keyed by type, platform and entry.
* Both ANY_PLATFORM and all the platforms are specified, the latter
* so that we can look up the IP_RANGE lists, which are not keyed by
* ANY_PLATFORM.
*/
private requestTemplate = Object.freeze({
client: {
clientId: 'GoGovSG',
clientVersion: '1.0.0',
},
threatInfo: {
threatTypes: [
'POTENTIALLY_HARMFUL_APPLICATION',
'UNWANTED_SOFTWARE',
'MALWARE',
'SOCIAL_ENGINEERING',
],
platformTypes: ['ANY_PLATFORM', 'WINDOWS', 'LINUX', 'OSX'],
threatEntryTypes: ['URL', 'EXECUTABLE', 'IP_RANGE'],
},
})

public isThreat: (url: string) => Promise<boolean> = async (url) => {
if (!safeBrowsingKey) {
logger.warn(`No Safe Browsing API key provided. Not scanning url: ${url}`)
return false
}
let matches = await this.safeBrowsingRepository.get(url)
if (!matches) {
matches = await this.lookup([url])
let threat = await this.safeBrowsingRepository.get(url)
if (!threat) {
threat = await this.fetchWebRiskData(url)
}
return !safeBrowsingLogOnly && Boolean(matches)
return !safeBrowsingLogOnly && Boolean(threat)
}

private async lookup(urls: string[]) {
let matches = null
const request = { ...this.requestTemplate } as any
private constructWebRiskEndpoint: (
url: string,
threatTypes: string[],
) => string = (url, threatTypes) => {
const encodedUrl = encodeURIComponent(url)

request.threatInfo.threatEntries = urls.map((url) => {
return { url }
})
const threatTypesQuery = threatTypes
.map((type: string) => `threatTypes=${type}`)
.join('&')

const response = await fetch(ENDPOINT, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(request),
})
const body = await response.json()
if (!response.ok) {
const error = new Error(
`Safe Browsing failure:\tError: ${response.statusText}\t message: ${body.error.message}`,
)
if (safeBrowsingLogOnly) {
logger.error(error.message)
} else {
throw error
}
} else if (body?.matches) {
const prefix = safeBrowsingLogOnly
? 'Considered threat by Safe Browsing but ignoring'
: 'Malicious link content'
return `${ENDPOINT}&${threatTypesQuery}&uri=${encodedUrl}`
}

/**
* Result.matches is a list of threat matches for all matched urls, example:
* [{ "threatType": "SOCIAL_ENGINEERING", "threat": { "url": "https://testsafebrowsing.appspot.com/s/phishing.html" } }]
* we use _.groupBy to group threat matches by url so that the serialized form
* matches what is currently written into our repository for single url lookups.
*/
const matchesByUrl = _.groupBy(body.matches, (obj) => obj.threat.url)
Object.entries(matchesByUrl).forEach(
([url, threatMatch]: [string, any]) => {
private fetchWebRiskData: (url: string) => Promise<WebRiskThreat | null> =
async (url) => {
const endpoint = this.constructWebRiskEndpoint(url, WEB_RISK_THREAT_TYPES)

const response = await fetch(endpoint, { method: 'GET' })
const body = await response.json()
if (!response.ok) {
const error = new Error(
`Safe Browsing failure:\tError: ${response.statusText}\t message: ${body.error.message}`,
)
if (safeBrowsingLogOnly) {
logger.error(error.message)
} else {
throw error
}
} else if (body?.threat) {
const prefix = safeBrowsingLogOnly
? 'Considered threat by Safe Browsing but ignoring'
: 'Malicious link content'

/**
* Result.threat.threatTypes is a list of threat matches for a url, example:
* "threat": { "threatTypes": ["MALWARE"], "expireTime": "2024-03-20T05:29:41.898456500Z"}.
*/
logger.warn(
`${prefix}: ${url} yields ${JSON.stringify(body.threat, null, 2)}`,
)
try {
// writing to the safeBrowsingRepository should be non-blocking
this.safeBrowsingRepository.set(url, body.threat)
} catch (e) {
// writes are wrapped in a try-catch block to prevent errors from bubbling up
logger.warn(
`${prefix}: ${url} yields ${JSON.stringify(threatMatch, null, 2)}`,
`failed to set ${url} in safeBrowsingRepository, skipping`,
)
try {
// writing to the safeBrowsingRepository should be non-blocking
this.safeBrowsingRepository.set(url, threatMatch)
} catch (e) {
// writes are wrapped in a try-catch block to prevent errors from bubbling up
logger.warn(
`failed to set ${url} in safeBrowsingRepository, skipping`,
)
}
},
)
matches = body.matches
}
return body.threat
}
return null
}
return matches
}

public isThreatBulk: (
urls: string[],
batchSize?: number,
) => Promise<boolean> = async (urls, batchSize = SAFE_BROWSING_LIMIT) => {
public isThreatBulk: (urls: string[]) => Promise<boolean> = async (urls) => {
if (!safeBrowsingKey) {
logger.warn(`No Safe Browsing API key provided. Not scanning in bulk`)
return false
}

const urlChunks: string[][] = []

for (let i = 0; i < urls.length; i += batchSize) {
urlChunks.push(urls.slice(i, i + batchSize))
}

const matches = await Promise.all(
urlChunks.map((urlChunk) => this.lookup(urlChunk)),
const results = await Promise.all(
urls.map((url) => this.fetchWebRiskData(url)),
)
const isThreat = matches.some((match) => Boolean(match) === true)
const isThreat = results.some((result) => Boolean(result))
if (!safeBrowsingLogOnly && isThreat) {
return true
}
Expand Down
Loading

0 comments on commit 1e1f53b

Please sign in to comment.