From d854fcd35794ed014b874e9eb63062ff21a1ffc6 Mon Sep 17 00:00:00 2001 From: Joan Reyero Date: Wed, 8 Feb 2023 12:36:46 +0100 Subject: [PATCH 1/2] Re-made hackernews integration --- .../hackerNewsIntegrationService.ts | 12 +++-- .../integrations/types/hackerNewsTypes.ts | 53 ++++++++++++------- .../usecases/hackerNews/getPostsByKeywords.ts | 47 ++++++++++++---- 3 files changed, 79 insertions(+), 33 deletions(-) diff --git a/backend/src/serverless/integrations/services/integrations/hackerNewsIntegrationService.ts b/backend/src/serverless/integrations/services/integrations/hackerNewsIntegrationService.ts index f454b6171b..254125c88e 100644 --- a/backend/src/serverless/integrations/services/integrations/hackerNewsIntegrationService.ts +++ b/backend/src/serverless/integrations/services/integrations/hackerNewsIntegrationService.ts @@ -1,4 +1,5 @@ import sanitizeHtml from 'sanitize-html' +import moment from 'moment' import { MemberAttributeName } from '../../../../database/attributes/member/enums' import { HackerNewsMemberAttributes } from '../../../../database/attributes/member/hackerNews' import MemberAttributeSettingsService from '../../../../services/memberAttributeSettingsService' @@ -13,9 +14,9 @@ import Operations from '../../../dbOperations/operations' import getPost from '../../usecases/hackerNews/getPost' import { HackerNewsGrid } from '../../grid/hackerNewsGrid' import { - EagleEyeResponse, HackerNewsResponse, HackerNewsIntegrationSettings, + HackerNewsSearchResult, } from '../../types/hackerNewsTypes' import { AddActivitiesSingle } from '../../types/messageTypes' import getPostsByKeywords from '../../usecases/hackerNews/getPostsByKeywords' @@ -43,7 +44,10 @@ export class HackerNewsIntegrationService extends IntegrationServiceBase { const keywords = Array.from(new Set([...settings.keywords, ...settings.urls])) this.logger(context).info(`Fetching posts for keywords: ${keywords}`) const posts = await getPostsByKeywords( - { keywords, nDays: context.onboarding ? 1000000 : 3 }, + { + keywords, + after: context.onboarding ? 0 : moment().subtract(30, 'days').unix(), + }, context.serviceContext, this.logger(context), ) @@ -55,8 +59,8 @@ export class HackerNewsIntegrationService extends IntegrationServiceBase { } async getStreams(context: IStepContext): Promise { - return context.pipelineData.posts.map((a: EagleEyeResponse) => ({ - value: a.sourceId.slice(a.sourceId.lastIndexOf(':') + 1), + return context.pipelineData.posts.map((a: HackerNewsSearchResult) => ({ + value: a.postId, metadata: { channel: a.keywords[0], }, diff --git a/backend/src/serverless/integrations/types/hackerNewsTypes.ts b/backend/src/serverless/integrations/types/hackerNewsTypes.ts index 89bc808f3a..5093679316 100644 --- a/backend/src/serverless/integrations/types/hackerNewsTypes.ts +++ b/backend/src/serverless/integrations/types/hackerNewsTypes.ts @@ -1,22 +1,37 @@ -export interface EagleEyeResponse { - vectorId: number - sourceId: string - title: string - url: string - createdAt: string - text: string - username: string - platform: string - timestamp: string - userAttributes: any - postAttributes: { - commentsCount: number - score: number - } - keywords: string[] +export interface HackerNewsSearchResponseRaw { + hits: [ + { + created_at: string + title: string + url: string + author: string + points: number + story_text: string + comment_text: string | null + num_comments: number + story_id: string | null + story_title: string | null + story_url: string | null + parent_id: string | null + created_at_i: number + _tags: string[] + objectID: string + _highlightResult: { + [key: string]: { + value: string + matchLevel: string + matchedWords: string[] + fullyHighlighted?: boolean + } + } + }, + ] } -export type EagleEyeResponses = EagleEyeResponse[] +export interface HackerNewsSearchResult { + keywords: string[] + postId: number +} export interface HackerNewsPost { by: string @@ -49,7 +64,7 @@ export interface HackerNewsIntegrationSettings { urls: string[] } -export interface EagleEyeInput { +export interface HackerNewsKeywordSearchInput { keywords: string[] - nDays: number + after: number } diff --git a/backend/src/serverless/integrations/usecases/hackerNews/getPostsByKeywords.ts b/backend/src/serverless/integrations/usecases/hackerNews/getPostsByKeywords.ts index 0e19a3ac5e..6a3563b131 100644 --- a/backend/src/serverless/integrations/usecases/hackerNews/getPostsByKeywords.ts +++ b/backend/src/serverless/integrations/usecases/hackerNews/getPostsByKeywords.ts @@ -1,23 +1,50 @@ +import axios from 'axios' import { IServiceOptions } from '../../../../services/IServiceOptions' -import { EagleEyeResponses, EagleEyeInput } from '../../types/hackerNewsTypes' +import { + HackerNewsKeywordSearchInput, + HackerNewsSearchResponseRaw, + HackerNewsSearchResult, +} from '../../types/hackerNewsTypes' import { Logger } from '../../../../utils/logging' import { timeout } from '../../../../utils/timing' -import EagleEyeContentService from '../../../../services/eagleEyeContentService' async function getPostsByKeyword( - input: EagleEyeInput, + input: HackerNewsKeywordSearchInput, options: IServiceOptions, logger: Logger, -): Promise { +): Promise { await timeout(2000) try { - const eagleEyeService = new EagleEyeContentService(options) - return await eagleEyeService.keywordMatch({ - keywords: input.keywords, - nDays: input.nDays, - platform: 'hacker_news', - }) + const out = [] + const existing = new Set() + for (const keyword of input.keywords) { + const config = { + method: 'get', + maxBodyLength: Infinity, + url: 'http://hn.algolia.com/api/v1/search', + params: { + query: keyword, + numericFilters: `created_at_i>${input.after}`, + tags: '(story,ask_hn,show_hn,poll)', + }, + headers: {}, + } + + const response = await axios(config) + const data = response.data as HackerNewsSearchResponseRaw + + for (const item of data.hits) { + if (!existing.has(item.objectID)) { + out.push({ + keywords: [keyword], + postId: item.objectID, + }) + existing.add(item.objectID) + } + } + } + return out } catch (err) { logger.error({ err, input }, 'Error while getting posts by keyword in EagleEye') throw err From 74f85bab499ae1c68e8fc05d5d45219c83177faa Mon Sep 17 00:00:00 2001 From: Joan Reyero Date: Wed, 8 Feb 2023 12:47:42 +0100 Subject: [PATCH 2/2] Config --- backend/config/default.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/config/default.json b/backend/config/default.json index c31da9bcbb..13dfcfeaed 100644 --- a/backend/config/default.json +++ b/backend/config/default.json @@ -34,5 +34,6 @@ "maxRetrospectInSeconds": 3600 }, "github": {}, - "enrichment": {} + "enrichment": {}, + "eagleEye": {} }