Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: replace safe browsing with web risk #2305

Merged
merged 2 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/server/modules/threat/interfaces/SafeBrowsingRepository.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import { HasCacheDuration } from '../../../repositories/types'
import { WebRiskThreat } from '../../../repositories/types'

export interface SafeBrowsingRepository {
/**
* Sets or replaces the Safe Browsing threat matches associated with a URL.
* @param {string} url The URL.
* @param {Array<HasCacheDuration>} matches The threat matches returned by Safe Browsing for the URL.
* @param {WebRiskThreat} threat The threat match returned by Safe Browsing for the URL.
*/
set(url: string, matches: HasCacheDuration[]): Promise<void>
set(url: string, threat: WebRiskThreat): Promise<void>

/**
* Retrieves Safe Browsing threat matches for a URL, if any.
* @param {string} url The URL.
* @returns {Promise<Array<HasCacheDuration> | null>} An array of threat matches, or null if none.
* @returns {Promise<WebRiskThreat | null>} Threat match or null if none.
*/
get(url: string): Promise<HasCacheDuration[] | null>
get(url: string): Promise<WebRiskThreat | null>
}
22 changes: 10 additions & 12 deletions src/server/modules/threat/mappers/SafeBrowsingMapper.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
/* eslint-disable class-methods-use-this, lines-between-class-members, no-dupe-class-members */
import { injectable } from 'inversify'
import { TwoWayMapper } from '../../../mappers/TwoWayMapper'
import { HasCacheDuration } from '../../../repositories/types'
import { WebRiskThreat } from '../../../repositories/types'

@injectable()
export class SafeBrowsingMapper
implements TwoWayMapper<HasCacheDuration[], string>
{
persistenceToDto(matches: string): HasCacheDuration[]
persistenceToDto(matches: string | null): HasCacheDuration[] | null {
if (!matches) {
export class SafeBrowsingMapper implements TwoWayMapper<WebRiskThreat, string> {
persistenceToDto(threat: string): WebRiskThreat
persistenceToDto(threat: string | null): WebRiskThreat | null {
if (!threat) {
return null
}
return JSON.parse(matches)
return JSON.parse(threat)
}

dtoToPersistence(matches: HasCacheDuration[]): string
dtoToPersistence(matches: HasCacheDuration[] | null): string | null {
if (!matches) {
dtoToPersistence(threat: WebRiskThreat): string
dtoToPersistence(threat: WebRiskThreat | null): string | null {
if (!threat) {
return null
}

return JSON.stringify(matches)
return JSON.stringify(threat)
}
}

Expand Down
29 changes: 13 additions & 16 deletions src/server/modules/threat/repositories/SafeBrowsingRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,43 @@

import { inject, injectable } from 'inversify'
import { safeBrowsingClient } from '../../../redis'
import { HasCacheDuration } from '../../../repositories/types'
import * as interfaces from '../interfaces'
import { TwoWayMapper } from '../../../mappers/TwoWayMapper'
import { DependencyIds } from '../../../constants'
import { NotFoundError } from '../../../util/error'
import { WebRiskThreat } from '../../../repositories/types'

// set default threat cache
const DEFAULT_CACHE_DURATION_IN_S = 300

@injectable()
export class SafeBrowsingRepository
implements interfaces.SafeBrowsingRepository
{
private safeBrowsingMapper: TwoWayMapper<HasCacheDuration[], string>
private safeBrowsingMapper: TwoWayMapper<WebRiskThreat, string>

public constructor(
@inject(DependencyIds.safeBrowsingMapper)
safeBrowsingMapper: TwoWayMapper<HasCacheDuration[], string>,
safeBrowsingMapper: TwoWayMapper<WebRiskThreat, string>,
) {
this.safeBrowsingMapper = safeBrowsingMapper
}

public set: (url: string, matches: HasCacheDuration[]) => Promise<void> = (
public set: (url: string, threat: WebRiskThreat) => Promise<void> = (
url,
matches,
threat,
) => {
return new Promise((resolve, reject) => {
if (!matches.length || !matches[0]) {
if (!threat) {
reject(
new NotFoundError(`No matches found for ${url}, should not persist`),
new NotFoundError(`No threat found for ${url}, should not persist`),
)
}
// Threat matches from Safe Browsing API specify a
// cache duration in seconds, with an 's' suffix
const [{ cacheDuration }] = matches
const expiryInSeconds = Number(
cacheDuration.substring(0, cacheDuration.length - 1),
)
safeBrowsingClient.set(
url,
this.safeBrowsingMapper.dtoToPersistence(matches),
this.safeBrowsingMapper.dtoToPersistence(threat),
'EX',
expiryInSeconds,
DEFAULT_CACHE_DURATION_IN_S,
(redisSetError) => {
if (redisSetError) {
reject(redisSetError)
Expand All @@ -54,7 +51,7 @@ export class SafeBrowsingRepository
})
}

public get: (url: string) => Promise<HasCacheDuration[] | null> = (url) => {
public get: (url: string) => Promise<WebRiskThreat | null> = (url) => {
return new Promise((resolve, reject) => {
safeBrowsingClient.get(url, (redisError, string) => {
if (redisError) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@ const { SafeBrowsingRepository } = require('..')

const repository = new SafeBrowsingRepository(new SafeBrowsingMapper())

const durationInSeconds = 1
const durationInSeconds = 300
const url = 'https://example.com'
const matches = [
{
cacheDuration: `${durationInSeconds}s`,
},
]
const threat = {
threatTypes: ['MALWARE'],
expireTime: '2024-03-20T05:29:41.898456500Z',
}

describe('otp repository redis test', () => {
describe('safe browsing repository redis test', () => {
beforeEach(async () => {
await new Promise<void>((resolve) => {
redisMockClient.flushall(() => resolve())
Expand All @@ -32,8 +31,8 @@ describe('otp repository redis test', () => {
})

it('returns a value if present', async () => {
redisMockClient.set(url, JSON.stringify(matches))
await expect(repository.get(url)).resolves.toStrictEqual(matches)
redisMockClient.set(url, JSON.stringify(threat))
await expect(repository.get(url)).resolves.toStrictEqual(threat)
expect(redisMockClient.get).toBeCalledWith(url, expect.any(Function))
})

Expand All @@ -43,10 +42,10 @@ describe('otp repository redis test', () => {
})

it('sets a value if specified', async () => {
await repository.set(url, matches)
await repository.set(url, threat)
expect(redisMockClient.set).toBeCalledWith(
url,
JSON.stringify(matches),
JSON.stringify(threat),
'EX',
durationInSeconds,
expect.any(Function),
Expand Down
160 changes: 67 additions & 93 deletions src/server/modules/threat/services/SafeBrowsingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,18 @@ import { UrlThreatScanService } from '../interfaces'
import { logger, safeBrowsingKey, safeBrowsingLogOnly } from '../../../config'
import { SafeBrowsingRepository } from '../interfaces/SafeBrowsingRepository'
import { DependencyIds } from '../../../constants'

const ENDPOINT = `https://safebrowsing.googleapis.com/v4/threatMatches:find?key=${safeBrowsingKey}`
const SAFE_BROWSING_LIMIT = 500
import { WebRiskThreat } from '../../../repositories/types'

const ENDPOINT = `https://webrisk.googleapis.com/v1/uris:search?key=${safeBrowsingKey}`
const WEB_RISK_THREAT_TYPES = [
'SOCIAL_ENGINEERING',
'MALWARE',
'UNWANTED_SOFTWARE',
// SOCIAL_ENGINEERING_EXTENDED_COVERAGE improves coverage of malicious urls
// but may have small amount (<10%) of potential false positives,
// see details https://cloud.google.com/web-risk/docs/extended-coverage
'SOCIAL_ENGINEERING_EXTENDED_COVERAGE',
]

@injectable()
export class SafeBrowsingService implements UrlThreatScanService {
Expand All @@ -22,117 +31,82 @@ export class SafeBrowsingService implements UrlThreatScanService {
this.safeBrowsingRepository = safeBrowsingRepository
}

/**
* Request template for Safe Browsing. Threat lists found
* at /v4/threatLists and are keyed by type, platform and entry.
* Both ANY_PLATFORM and all the platforms are specified, the latter
* so that we can look up the IP_RANGE lists, which are not keyed by
* ANY_PLATFORM.
*/
private requestTemplate = Object.freeze({
client: {
clientId: 'GoGovSG',
clientVersion: '1.0.0',
},
threatInfo: {
threatTypes: [
'POTENTIALLY_HARMFUL_APPLICATION',
'UNWANTED_SOFTWARE',
'MALWARE',
'SOCIAL_ENGINEERING',
],
platformTypes: ['ANY_PLATFORM', 'WINDOWS', 'LINUX', 'OSX'],
threatEntryTypes: ['URL', 'EXECUTABLE', 'IP_RANGE'],
},
})

public isThreat: (url: string) => Promise<boolean> = async (url) => {
if (!safeBrowsingKey) {
logger.warn(`No Safe Browsing API key provided. Not scanning url: ${url}`)
return false
}
let matches = await this.safeBrowsingRepository.get(url)
if (!matches) {
matches = await this.lookup([url])
let threat = await this.safeBrowsingRepository.get(url)
if (!threat) {
threat = await this.fetchWebRiskData(url)
}
return !safeBrowsingLogOnly && Boolean(matches)
return !safeBrowsingLogOnly && Boolean(threat)
}

private async lookup(urls: string[]) {
let matches = null
const request = { ...this.requestTemplate } as any
private constructWebRiskEndpoint: (
url: string,
threatTypes: string[],
) => string = (url, threatTypes) => {
const encodedUrl = encodeURIComponent(url)

request.threatInfo.threatEntries = urls.map((url) => {
return { url }
})
const threatTypesQuery = threatTypes
.map((type: string) => `threatTypes=${type}`)
.join('&')

const response = await fetch(ENDPOINT, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(request),
})
const body = await response.json()
if (!response.ok) {
const error = new Error(
`Safe Browsing failure:\tError: ${response.statusText}\t message: ${body.error.message}`,
)
if (safeBrowsingLogOnly) {
logger.error(error.message)
} else {
throw error
}
} else if (body?.matches) {
const prefix = safeBrowsingLogOnly
? 'Considered threat by Safe Browsing but ignoring'
: 'Malicious link content'
return `${ENDPOINT}&${threatTypesQuery}&uri=${encodedUrl}`
}

/**
* Result.matches is a list of threat matches for all matched urls, example:
* [{ "threatType": "SOCIAL_ENGINEERING", "threat": { "url": "https://testsafebrowsing.appspot.com/s/phishing.html" } }]
* we use _.groupBy to group threat matches by url so that the serialized form
* matches what is currently written into our repository for single url lookups.
*/
const matchesByUrl = _.groupBy(body.matches, (obj) => obj.threat.url)
Object.entries(matchesByUrl).forEach(
([url, threatMatch]: [string, any]) => {
private fetchWebRiskData: (url: string) => Promise<WebRiskThreat | null> =
async (url) => {
const endpoint = this.constructWebRiskEndpoint(url, WEB_RISK_THREAT_TYPES)

const response = await fetch(endpoint, { method: 'GET' })
const body = await response.json()
if (!response.ok) {
const error = new Error(
`Safe Browsing failure:\tError: ${response.statusText}\t message: ${body.error.message}`,
)
if (safeBrowsingLogOnly) {
logger.error(error.message)
} else {
throw error
}
} else if (body?.threat) {
const prefix = safeBrowsingLogOnly
? 'Considered threat by Safe Browsing but ignoring'
: 'Malicious link content'

/**
* Result.threat.threatTypes is a list of threat matches for a url, example:
* "threat": { "threatTypes": ["MALWARE"], "expireTime": "2024-03-20T05:29:41.898456500Z"}.
*/
logger.warn(
`${prefix}: ${url} yields ${JSON.stringify(body.threat, null, 2)}`,
)
try {
// writing to the safeBrowsingRepository should be non-blocking
this.safeBrowsingRepository.set(url, body.threat)
} catch (e) {
// writes are wrapped in a try-catch block to prevent errors from bubbling up
logger.warn(
`${prefix}: ${url} yields ${JSON.stringify(threatMatch, null, 2)}`,
`failed to set ${url} in safeBrowsingRepository, skipping`,
)
try {
// writing to the safeBrowsingRepository should be non-blocking
this.safeBrowsingRepository.set(url, threatMatch)
} catch (e) {
// writes are wrapped in a try-catch block to prevent errors from bubbling up
logger.warn(
`failed to set ${url} in safeBrowsingRepository, skipping`,
)
}
},
)
matches = body.matches
}
return body.threat
}
return null
}
return matches
}

public isThreatBulk: (
urls: string[],
batchSize?: number,
) => Promise<boolean> = async (urls, batchSize = SAFE_BROWSING_LIMIT) => {
public isThreatBulk: (urls: string[]) => Promise<boolean> = async (urls) => {
if (!safeBrowsingKey) {
logger.warn(`No Safe Browsing API key provided. Not scanning in bulk`)
return false
}

const urlChunks: string[][] = []

for (let i = 0; i < urls.length; i += batchSize) {
urlChunks.push(urls.slice(i, i + batchSize))
}

const matches = await Promise.all(
urlChunks.map((urlChunk) => this.lookup(urlChunk)),
const results = await Promise.all(
urls.map((url) => this.fetchWebRiskData(url)),
)
const isThreat = matches.some((match) => Boolean(match) === true)
const isThreat = results.some((result) => Boolean(result))
if (!safeBrowsingLogOnly && isThreat) {
return true
}
Expand Down
Loading
Loading