From 548dd83637cffd64fdf097b92584e92125de8a68 Mon Sep 17 00:00:00 2001 From: Justin Maximillian Kimlim Date: Thu, 18 Apr 2024 19:15:14 +0700 Subject: [PATCH] feat: import opml handler --- .../app/src/main/graphql/schema.graphqls | 1 + .../Services/DataService/GQLSchema.swift | 2 + .../importers/uploadImportFileResolver.ts | 5 +- packages/api/src/schema.ts | 1 + packages/discover/src/types/OmnivoreSchema.ts | 1 + packages/import-handler/package.json | 1 + packages/import-handler/src/index.ts | 8 +- packages/import-handler/src/opml.ts | 74 +++++++++++++++++++ .../import-handler/test/opml/data/feeds.opml | 10 +++ .../import-handler/test/opml/opml.test.ts | 51 +++++++++++++ .../web/components/templates/UploadModal.tsx | 14 ++++ .../templates/homeFeed/AddLinkModal.tsx | 18 ++++- .../mutations/uploadImportFileMutation.ts | 1 + yarn.lock | 28 +++++++ 14 files changed, 211 insertions(+), 4 deletions(-) create mode 100644 packages/import-handler/src/opml.ts create mode 100644 packages/import-handler/test/opml/data/feeds.opml create mode 100644 packages/import-handler/test/opml/opml.test.ts diff --git a/android/Omnivore/app/src/main/graphql/schema.graphqls b/android/Omnivore/app/src/main/graphql/schema.graphqls index 5c2431d8a4..bb2de50ab7 100644 --- a/android/Omnivore/app/src/main/graphql/schema.graphqls +++ b/android/Omnivore/app/src/main/graphql/schema.graphqls @@ -2702,6 +2702,7 @@ enum UploadImportFileType { MATTER POCKET URL_LIST + OPML } type User { diff --git a/apple/OmnivoreKit/Sources/Services/DataService/GQLSchema.swift b/apple/OmnivoreKit/Sources/Services/DataService/GQLSchema.swift index cd3cf2b2c4..8872f51293 100644 --- a/apple/OmnivoreKit/Sources/Services/DataService/GQLSchema.swift +++ b/apple/OmnivoreKit/Sources/Services/DataService/GQLSchema.swift @@ -38617,6 +38617,8 @@ extension Enums { case pocket = "POCKET" case urlList = "URL_LIST" + + case opml = "OPML" } } diff --git a/packages/api/src/resolvers/importers/uploadImportFileResolver.ts b/packages/api/src/resolvers/importers/uploadImportFileResolver.ts index 0a1c0dd531..975cb9a08a 100644 --- a/packages/api/src/resolvers/importers/uploadImportFileResolver.ts +++ b/packages/api/src/resolvers/importers/uploadImportFileResolver.ts @@ -17,7 +17,7 @@ import { } from '../../utils/uploads' const MAX_DAILY_UPLOADS = 1 -const VALID_CONTENT_TYPES = ['text/csv', 'application/zip'] +const VALID_CONTENT_TYPES = ['text/csv', 'application/zip', 'text/opml'] const extensionForContentType = (contentType: string) => { switch (contentType) { @@ -25,6 +25,9 @@ const extensionForContentType = (contentType: string) => { return 'csv' case 'application/zip': return 'zip' + case 'text/opml': + case 'text/xml': + return 'opml' } return '.unknown' } diff --git a/packages/api/src/schema.ts b/packages/api/src/schema.ts index acae6f5200..a860ecaca9 100755 --- a/packages/api/src/schema.ts +++ b/packages/api/src/schema.ts @@ -2506,6 +2506,7 @@ const schema = gql` URL_LIST POCKET MATTER + OPML } enum UploadImportFileErrorCode { diff --git a/packages/discover/src/types/OmnivoreSchema.ts b/packages/discover/src/types/OmnivoreSchema.ts index 8d3e593fa5..9db1023c51 100644 --- a/packages/discover/src/types/OmnivoreSchema.ts +++ b/packages/discover/src/types/OmnivoreSchema.ts @@ -2599,6 +2599,7 @@ export enum UploadImportFileType { UrlList = 'URL_LIST', Pocket = 'POCKET', Matter = 'MATTER', + Opml = 'OPML', } export enum UploadImportFileErrorCode { diff --git a/packages/import-handler/package.json b/packages/import-handler/package.json index f62fae3f86..7da21cf806 100644 --- a/packages/import-handler/package.json +++ b/packages/import-handler/package.json @@ -48,6 +48,7 @@ "jsonwebtoken": "^8.5.1", "linkedom": "^0.14.21", "nodemon": "^2.0.15", + "opmlparser": "^0.8.0", "unzip-stream": "^0.3.1", "urlsafe-base64": "^1.0.0", "uuid": "^9.0.0" diff --git a/packages/import-handler/src/index.ts b/packages/import-handler/src/index.ts index d95751b478..5b9e7efec8 100644 --- a/packages/import-handler/src/index.ts +++ b/packages/import-handler/src/index.ts @@ -13,6 +13,7 @@ import { importMatterArchive } from './matterHistory' import { ImportStatus, updateMetrics } from './metrics' import { createRedisClient } from './redis' import { CONTENT_FETCH_URL, createCloudTask, emailUserUrl } from './task' +import { importOpml } from './opml' export enum ArticleSavingRequestStatus { Failed = 'FAILED', @@ -32,7 +33,7 @@ const signToken = promisify(jwt.sign) const storage = new Storage() -const CONTENT_TYPES = ['text/csv', 'application/zip'] +const CONTENT_TYPES = ['text/csv', 'application/zip', 'text/opml'] export type UrlHandler = ( ctx: ImportContext, @@ -176,6 +177,8 @@ const handlerForFile = (name: string): importHandlerFunc | undefined => { return importMatterArchive } else if (fileName.startsWith('URL_LIST') || fileName.startsWith('POCKET')) { return importCsv + } else if (fileName.startsWith('OPML')) { + return importOpml } return undefined @@ -192,6 +195,9 @@ const importSource = (name: string): string => { if (fileName.startsWith('POCKET')) { return 'pocket' } + if (fileName.startsWith('OPML')) { + return 'opml' + } return 'unknown' } diff --git a/packages/import-handler/src/opml.ts b/packages/import-handler/src/opml.ts new file mode 100644 index 0000000000..b7a7e94512 --- /dev/null +++ b/packages/import-handler/src/opml.ts @@ -0,0 +1,74 @@ +// @ts-ignore +import OpmlParser from 'opmlparser' +import { Stream } from 'stream' +import { ArticleSavingRequestStatus, ImportContext } from '.' +import { createMetrics, ImportStatus, updateMetrics } from './metrics' + +export const importOpml = async ( + ctx: ImportContext, + stream: Stream +): Promise => { + return new Promise(async (resolve, reject) => { + // create metrics in redis + await createMetrics(ctx.redisClient, ctx.userId, ctx.taskId, ctx.source) + + const opmlParser = new OpmlParser() + stream.pipe(opmlParser) + + // helper to know when to resolve + const workings = {} + + opmlParser.on('readable', async function () { + let feed + while ((feed = opmlParser.read())) { + const { xmlurl } = feed + const url = new URL(xmlurl) + // @ts-ignore + workings[xmlurl] = true + try { + if (feed.xmlurl === undefined) continue + + // update total counter + await updateMetrics( + ctx.redisClient, + ctx.userId, + ctx.taskId, + ImportStatus.TOTAL + ) + await ctx.urlHandler(ctx, url) + ctx.countImported += 1 + // update started counter + await updateMetrics( + ctx.redisClient, + ctx.userId, + ctx.taskId, + ImportStatus.STARTED + ) + // limit import to 20k urls + if (ctx.countImported + ctx.countFailed >= 20000) { + console.log('import limit reached') + reject() + } + } catch (error) { + console.log('invalid data', feed, error) + + ctx.countFailed += 1 + // update invalid counter + await updateMetrics( + ctx.redisClient, + ctx.userId, + ctx.taskId, + ImportStatus.INVALID + ) + } + // @ts-ignore + workings[xmlurl] = false + if (Object.values(workings).every((v) => !v)) { + resolve() + } + } + }) + }) + + // TODO: handle errors +} diff --git a/packages/import-handler/test/opml/data/feeds.opml b/packages/import-handler/test/opml/data/feeds.opml new file mode 100644 index 0000000000..e755551a07 --- /dev/null +++ b/packages/import-handler/test/opml/data/feeds.opml @@ -0,0 +1,10 @@ + + + + OPML Feeds + + + + + + \ No newline at end of file diff --git a/packages/import-handler/test/opml/opml.test.ts b/packages/import-handler/test/opml/opml.test.ts new file mode 100644 index 0000000000..dbe9937312 --- /dev/null +++ b/packages/import-handler/test/opml/opml.test.ts @@ -0,0 +1,51 @@ +import * as chai from 'chai' +import { expect } from 'chai' +import chaiString from 'chai-string' +import * as fs from 'fs' +import 'mocha' +import { ArticleSavingRequestStatus, ImportContext } from '../../src' +import { importOpml } from '../../src/opml' +import { stubImportCtx } from '../util' + +chai.use(chaiString) + +describe('Test OPML importer', () => { + let stub: ImportContext + + beforeEach(() => { + stub = stubImportCtx() + }) + + afterEach(async () => { + await stub.redisClient.quit() + }) + + describe('Load a simple OPML file', () => { + it('should call the handler for each URL', async () => { + const urls: URL[] = [] + const stream = fs.createReadStream('./test/opml/data/feeds.opml') + stub.urlHandler = (ctx: ImportContext, url): Promise => { + urls.push(url) + return Promise.resolve() + } + + await importOpml(stub, stream) + expect(stub.countFailed).to.equal(0) + expect(stub.countImported).to.equal(2) + expect(urls).to.eql([ + new URL('https://hnrss.org/newest'), + new URL('https://google.com'), + ]) + }) + it('increments the failed count when the URL is invalid', async () => { + const stream = fs.createReadStream('./test/opml/data/feeds.opml') + stub.urlHandler = (ctx: ImportContext, url): Promise => { + return Promise.reject('Failed to import url') + } + + await importOpml(stub, stream) + expect(stub.countFailed).to.equal(2) + expect(stub.countImported).to.equal(0) + }) + }) +}) diff --git a/packages/web/components/templates/UploadModal.tsx b/packages/web/components/templates/UploadModal.tsx index e19c74e04b..00ffa9955e 100644 --- a/packages/web/components/templates/UploadModal.tsx +++ b/packages/web/components/templates/UploadModal.tsx @@ -129,6 +129,9 @@ export function UploadModal(props: UploadModalProps): JSX.Element { ) { contentType = 'text/csv' } + if (file.name.endsWith('.xml') || file.name.endsWith('.opml')) { + contentType = 'text/opml' + } switch (contentType) { case 'text/csv': { let urlCount = 0 @@ -184,6 +187,16 @@ export function UploadModal(props: UploadModalProps): JSX.Element { uploadSignedUrl: result?.uploadSignedUrl, } } + case 'text/opml': + case 'text/xml': { + const result = await uploadImportFileRequestMutation( + UploadImportFileType.OPML, + contentType + ) + return { + uploadSignedUrl: result?.uploadSignedUrl, + } + } case 'application/pdf': case 'application/epub+zip': { const request = await uploadFileRequestMutation({ @@ -315,6 +328,7 @@ export function UploadModal(props: UploadModalProps): JSX.Element { noClick={true} accept={{ 'text/csv': ['.csv'], + 'text/opml': ['.opml', '.xml'], 'application/zip': ['.zip'], 'application/pdf': ['.pdf'], 'application/epub+zip': ['.epub'], diff --git a/packages/web/components/templates/homeFeed/AddLinkModal.tsx b/packages/web/components/templates/homeFeed/AddLinkModal.tsx index 8abf6fec4c..5a66d02a82 100644 --- a/packages/web/components/templates/homeFeed/AddLinkModal.tsx +++ b/packages/web/components/templates/homeFeed/AddLinkModal.tsx @@ -262,6 +262,7 @@ const UploadOPMLTab = (): JSX.Element => { description="Drag OPML file to add feeds" accept={{ 'text/csv': ['.csv'], + 'text/opml': ['.opml', '.xml'], 'application/zip': ['.zip'], 'application/pdf': ['.pdf'], 'application/epub+zip': ['.epub'], @@ -433,6 +434,9 @@ const UploadPad = (props: UploadPadProps): JSX.Element => { ) { contentType = 'text/csv' } + if (file.name.endsWith('.xml') || file.name.endsWith('.opml')) { + contentType = 'text/opml' + } switch (contentType) { case 'text/csv': { let urlCount = 0 @@ -488,6 +492,16 @@ const UploadPad = (props: UploadPadProps): JSX.Element => { uploadSignedUrl: result?.uploadSignedUrl, } } + case 'text/opml': + case 'text/xml': { + const result = await uploadImportFileRequestMutation( + UploadImportFileType.OPML, + contentType + ) + return { + uploadSignedUrl: result?.uploadSignedUrl, + } + } case 'application/pdf': case 'application/epub+zip': { const request = await uploadFileRequestMutation({ @@ -791,7 +805,7 @@ const TabBar = (props: TabBarProps) => { > Feed - {/* */} +