Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: import opml handler #3836

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions android/Omnivore/app/src/main/graphql/schema.graphqls
Original file line number Diff line number Diff line change
Expand Up @@ -2702,6 +2702,7 @@ enum UploadImportFileType {
MATTER
POCKET
URL_LIST
OPML
}

type User {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38617,6 +38617,8 @@ extension Enums {
case pocket = "POCKET"

case urlList = "URL_LIST"

case opml = "OPML"
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@ import {
} from '../../utils/uploads'

const MAX_DAILY_UPLOADS = 1
const VALID_CONTENT_TYPES = ['text/csv', 'application/zip']
const VALID_CONTENT_TYPES = ['text/csv', 'application/zip', 'text/opml']

const extensionForContentType = (contentType: string) => {
switch (contentType) {
case 'text/csv':
return 'csv'
case 'application/zip':
return 'zip'
case 'text/opml':
case 'text/xml':
return 'opml'
}
return '.unknown'
}
Expand Down
1 change: 1 addition & 0 deletions packages/api/src/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2506,6 +2506,7 @@ const schema = gql`
URL_LIST
POCKET
MATTER
OPML
}

enum UploadImportFileErrorCode {
Expand Down
1 change: 1 addition & 0 deletions packages/discover/src/types/OmnivoreSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2599,6 +2599,7 @@ export enum UploadImportFileType {
UrlList = 'URL_LIST',
Pocket = 'POCKET',
Matter = 'MATTER',
Opml = 'OPML',
}

export enum UploadImportFileErrorCode {
Expand Down
1 change: 1 addition & 0 deletions packages/import-handler/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"jsonwebtoken": "^8.5.1",
"linkedom": "^0.14.21",
"nodemon": "^2.0.15",
"opmlparser": "^0.8.0",
"unzip-stream": "^0.3.1",
"urlsafe-base64": "^1.0.0",
"uuid": "^9.0.0"
Expand Down
8 changes: 7 additions & 1 deletion packages/import-handler/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { importMatterArchive } from './matterHistory'
import { ImportStatus, updateMetrics } from './metrics'
import { createRedisClient } from './redis'
import { CONTENT_FETCH_URL, createCloudTask, emailUserUrl } from './task'
import { importOpml } from './opml'

export enum ArticleSavingRequestStatus {
Failed = 'FAILED',
Expand All @@ -32,7 +33,7 @@ const signToken = promisify(jwt.sign)

const storage = new Storage()

const CONTENT_TYPES = ['text/csv', 'application/zip']
const CONTENT_TYPES = ['text/csv', 'application/zip', 'text/opml']

export type UrlHandler = (
ctx: ImportContext,
Expand Down Expand Up @@ -176,6 +177,8 @@ const handlerForFile = (name: string): importHandlerFunc | undefined => {
return importMatterArchive
} else if (fileName.startsWith('URL_LIST') || fileName.startsWith('POCKET')) {
return importCsv
} else if (fileName.startsWith('OPML')) {
return importOpml
}

return undefined
Expand All @@ -192,6 +195,9 @@ const importSource = (name: string): string => {
if (fileName.startsWith('POCKET')) {
return 'pocket'
}
if (fileName.startsWith('OPML')) {
return 'opml'
}

return 'unknown'
}
Expand Down
74 changes: 74 additions & 0 deletions packages/import-handler/src/opml.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// @ts-ignore
import OpmlParser from 'opmlparser'
import { Stream } from 'stream'
import { ArticleSavingRequestStatus, ImportContext } from '.'
import { createMetrics, ImportStatus, updateMetrics } from './metrics'

export const importOpml = async (
ctx: ImportContext,
stream: Stream
): Promise<void> => {
return new Promise(async (resolve, reject) => {
// create metrics in redis
await createMetrics(ctx.redisClient, ctx.userId, ctx.taskId, ctx.source)

const opmlParser = new OpmlParser()
stream.pipe(opmlParser)

// helper to know when to resolve
const workings = {}

opmlParser.on('readable', async function () {
let feed
while ((feed = opmlParser.read())) {
const { xmlurl } = feed
const url = new URL(xmlurl)
// @ts-ignore
workings[xmlurl] = true
try {
if (feed.xmlurl === undefined) continue

// update total counter
await updateMetrics(
ctx.redisClient,
ctx.userId,
ctx.taskId,
ImportStatus.TOTAL
)
await ctx.urlHandler(ctx, url)
ctx.countImported += 1
// update started counter
await updateMetrics(
ctx.redisClient,
ctx.userId,
ctx.taskId,
ImportStatus.STARTED
)
// limit import to 20k urls
if (ctx.countImported + ctx.countFailed >= 20000) {
console.log('import limit reached')
reject()
}
} catch (error) {
console.log('invalid data', feed, error)

ctx.countFailed += 1
// update invalid counter
await updateMetrics(
ctx.redisClient,
ctx.userId,
ctx.taskId,
ImportStatus.INVALID
)
}
// @ts-ignore
workings[xmlurl] = false
if (Object.values(workings).every((v) => !v)) {
resolve()
}
}
})
})

// TODO: handle errors
}
10 changes: 10 additions & 0 deletions packages/import-handler/test/opml/data/feeds.opml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<opml version="2.0">
<head>
<title>OPML Feeds</title>
</head>
<body>
<outline text="https://hnrss.org/newest" title="https://hnrss.org/newest" type="rss" xmlUrl="https://hnrss.org/newest" htmlUrl="https://hnrss.org/newest" />
<outline text="https://google.com" title="https://google.com" type="rss" xmlUrl="https://google.com" htmlUrl="https://google.com" />
</body>
</opml>
51 changes: 51 additions & 0 deletions packages/import-handler/test/opml/opml.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import * as chai from 'chai'
import { expect } from 'chai'
import chaiString from 'chai-string'
import * as fs from 'fs'
import 'mocha'
import { ArticleSavingRequestStatus, ImportContext } from '../../src'
import { importOpml } from '../../src/opml'
import { stubImportCtx } from '../util'

chai.use(chaiString)

describe('Test OPML importer', () => {
let stub: ImportContext

beforeEach(() => {
stub = stubImportCtx()
})

afterEach(async () => {
await stub.redisClient.quit()
})

describe('Load a simple OPML file', () => {
it('should call the handler for each URL', async () => {
const urls: URL[] = []
const stream = fs.createReadStream('./test/opml/data/feeds.opml')
stub.urlHandler = (ctx: ImportContext, url): Promise<void> => {
urls.push(url)
return Promise.resolve()
}

await importOpml(stub, stream)
expect(stub.countFailed).to.equal(0)
expect(stub.countImported).to.equal(2)
expect(urls).to.eql([
new URL('https://hnrss.org/newest'),
new URL('https://google.com'),
])
})
it('increments the failed count when the URL is invalid', async () => {
const stream = fs.createReadStream('./test/opml/data/feeds.opml')
stub.urlHandler = (ctx: ImportContext, url): Promise<void> => {
return Promise.reject('Failed to import url')
}

await importOpml(stub, stream)
expect(stub.countFailed).to.equal(2)
expect(stub.countImported).to.equal(0)
})
})
})
14 changes: 14 additions & 0 deletions packages/web/components/templates/UploadModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ export function UploadModal(props: UploadModalProps): JSX.Element {
) {
contentType = 'text/csv'
}
if (file.name.endsWith('.xml') || file.name.endsWith('.opml')) {
contentType = 'text/opml'
}
switch (contentType) {
case 'text/csv': {
let urlCount = 0
Expand Down Expand Up @@ -184,6 +187,16 @@ export function UploadModal(props: UploadModalProps): JSX.Element {
uploadSignedUrl: result?.uploadSignedUrl,
}
}
case 'text/opml':
case 'text/xml': {
const result = await uploadImportFileRequestMutation(
UploadImportFileType.OPML,
contentType
)
return {
uploadSignedUrl: result?.uploadSignedUrl,
}
}
case 'application/pdf':
case 'application/epub+zip': {
const request = await uploadFileRequestMutation({
Expand Down Expand Up @@ -315,6 +328,7 @@ export function UploadModal(props: UploadModalProps): JSX.Element {
noClick={true}
accept={{
'text/csv': ['.csv'],
'text/opml': ['.opml', '.xml'],
'application/zip': ['.zip'],
'application/pdf': ['.pdf'],
'application/epub+zip': ['.epub'],
Expand Down
18 changes: 16 additions & 2 deletions packages/web/components/templates/homeFeed/AddLinkModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ const UploadOPMLTab = (): JSX.Element => {
description="Drag OPML file to add feeds"
accept={{
'text/csv': ['.csv'],
'text/opml': ['.opml', '.xml'],
'application/zip': ['.zip'],
'application/pdf': ['.pdf'],
'application/epub+zip': ['.epub'],
Expand Down Expand Up @@ -433,6 +434,9 @@ const UploadPad = (props: UploadPadProps): JSX.Element => {
) {
contentType = 'text/csv'
}
if (file.name.endsWith('.xml') || file.name.endsWith('.opml')) {
contentType = 'text/opml'
}
switch (contentType) {
case 'text/csv': {
let urlCount = 0
Expand Down Expand Up @@ -488,6 +492,16 @@ const UploadPad = (props: UploadPadProps): JSX.Element => {
uploadSignedUrl: result?.uploadSignedUrl,
}
}
case 'text/opml':
case 'text/xml': {
const result = await uploadImportFileRequestMutation(
UploadImportFileType.OPML,
contentType
)
return {
uploadSignedUrl: result?.uploadSignedUrl,
}
}
case 'application/pdf':
case 'application/epub+zip': {
const request = await uploadFileRequestMutation({
Expand Down Expand Up @@ -791,15 +805,15 @@ const TabBar = (props: TabBarProps) => {
>
Feed
</Button>
{/* <Button
<Button
style={props.selectedTab == 'opml' ? 'tabSelected' : 'tab'}
onClick={(event) => {
props.setSelectedTab('opml')
event.preventDefault()
}}
>
OPML
</Button> */}
</Button>
<Button
style={props.selectedTab == 'import' ? 'tabSelected' : 'tab'}
onClick={(event) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { gqlFetcher } from '../networkHelpers'

export enum UploadImportFileType {
URL_LIST = 'URL_LIST',
OPML = 'OPML',
POCKET = 'POCKET',
MATTER = 'MATTER',
}
Expand Down
28 changes: 28 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -24235,6 +24235,14 @@ opentracing@^0.14.4:
resolved "https://registry.yarnpkg.com/opentracing/-/opentracing-0.14.5.tgz#891fa92cd90a24e64f99bc964370227310926c85"
integrity sha512-XLKtEfHxqrWyF1fzxznsv78w3csW41ucHnjiKnfzZLD5FN8UBDZZL1i4q0FR29zjxXhm+2Hop+5Vr/b8tKIvEg==

opmlparser@^0.8.0:
version "0.8.0"
resolved "https://registry.yarnpkg.com/opmlparser/-/opmlparser-0.8.0.tgz#a5d74834b136af9a639013f5dc39746b7c27063f"
integrity sha512-kQqMYInALr4hDWOvsoXqJ04nNtaINWXFcX3CNwuAd6/8RnRGK+RNotFzN57h6Suq+JU/Tz/4BVWcovBhQUBP5Q==
dependencies:
readable-stream "~1.1.10"
sax "~0.6.0"

optionator@^0.8.1:
version "0.8.3"
resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.3.tgz#84fa1d036fe9d3c7e21d99884b601167ec8fb495"
Expand Down Expand Up @@ -27060,6 +27068,16 @@ readable-stream@^3.0.0, readable-stream@^3.0.2, readable-stream@^3.0.6, readable
string_decoder "^1.1.1"
util-deprecate "^1.0.1"

readable-stream@~1.1.10:
version "1.1.14"
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-1.1.14.tgz#7cf4c54ef648e3813084c636dd2079e166c081d9"
integrity sha512-+MeVjFf4L44XUkhM1eYbD8fyEsxcV81pqMSR5gblfcLCHfZvbrqy4/qYHE+/R5HoBUT11WV5O08Cr1n3YXkWVQ==
dependencies:
core-util-is "~1.0.0"
inherits "~2.0.1"
isarray "0.0.1"
string_decoder "~0.10.x"

readdir-scoped-modules@^1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/readdir-scoped-modules/-/readdir-scoped-modules-1.1.0.tgz#8d45407b4f870a0dcaebc0e28670d18e74514309"
Expand Down Expand Up @@ -27924,6 +27942,11 @@ sax@^1.3.0:
resolved "https://registry.yarnpkg.com/sax/-/sax-1.3.0.tgz#a5dbe77db3be05c9d1ee7785dbd3ea9de51593d0"
integrity sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA==

sax@~0.6.0:
version "0.6.1"
resolved "https://registry.yarnpkg.com/sax/-/sax-0.6.1.tgz#563b19c7c1de892e09bfc4f2fc30e3c27f0952b9"
integrity sha512-8ip+qnRh7m8OEyvoM1JoSBzlrepp3ajVR8nqgrfTig+TewfyvTijl0am8/anFqgbcdz62ofEUKE1hHNDCdbeSQ==

saxes@^5.0.1:
version "5.0.1"
resolved "https://registry.yarnpkg.com/saxes/-/saxes-5.0.1.tgz#eebab953fa3b7608dbe94e5dadb15c888fa6696d"
Expand Down Expand Up @@ -29193,6 +29216,11 @@ string_decoder@^1.0.0, string_decoder@^1.1.1:
dependencies:
safe-buffer "~5.2.0"

string_decoder@~0.10.x:
version "0.10.31"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-0.10.31.tgz#62e203bc41766c6c28c9fc84301dab1c5310fa94"
integrity sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==

string_decoder@~1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8"
Expand Down