-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.ts
83 lines (72 loc) · 2.53 KB
/
parse.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import { Html5Entities } from 'https://deno.land/x/html_entities/mod.js'
import { DOMParser } from 'https://deno.land/x/deno_dom/deno-dom-wasm.ts'
import { convertBytesToSeconds, getLastName } from './utils.ts'
import type { FeedEntry } from 'https://deno.land/x/rss/src/types/feed.ts'
import type { PodcastFeedEntry } from './fetch.ts'
export function parseEntries(
newsEntries: FeedEntry[],
podcastEntries: PodcastFeedEntry[],
) {
const entries = podcastEntries
.map((podcastEntry) => ({
podcastEntry,
newsEntry: newsEntries.find(
(entry) => entry.links[0].href === podcastEntry.id,
)!,
}))
.filter((entry) => entry.newsEntry)
.map(transformEntry)
.sort((a, b) => b.date?.getTime() - a.date?.getTime())
return entries
}
function transformEntry({
podcastEntry,
newsEntry,
}: {
podcastEntry: PodcastFeedEntry
newsEntry: FeedEntry
}) {
const { imageUrl, authors, description } = parseValuesFromPostHtml(
newsEntry.description?.value ?? '',
)
return {
imageUrl,
authors,
description,
subtitle: podcastEntry['itunes:summary'].value,
date: podcastEntry.published!,
enclosure: {
url: podcastEntry.attachments![0].url!,
size: podcastEntry.attachments![0].sizeInBytes!,
type: podcastEntry.attachments![0].mimeType!,
},
duration: convertBytesToSeconds(podcastEntry.attachments![0].sizeInBytes!),
guid: newsEntry.id.split(' at ')[0],
url: newsEntry.links[0]!.href ?? '',
title: newsEntry.title?.value ?? '',
}
}
export type ParsedEntry = ReturnType<typeof transformEntry>
export function parseValuesFromPostHtml(postHtml: string) {
const decodedHtml = Html5Entities.decode(postHtml)
const document = new DOMParser().parseFromString(decodedHtml, 'text/html')
// Image URL
const imageUrl = document
?.querySelector('.field-name-field-image a')
?.getAttribute('href') ?? ''
// List of all author names
const mainAuthorElement = document?.querySelector('.field-name-author a')
const additionalAuthorElements = document?.querySelectorAll(
'.field-name-field-dodatni-avtorji .field-item',
)!
const allAuthorElements = [mainAuthorElement, ...additionalAuthorElements]
const authors = allAuthorElements
.map((element) => element?.textContent ?? '')
.filter(Boolean)
.sort((a, b) => getLastName(a).localeCompare(getLastName(b)))
// Description text from the actual post body
const description = document?.querySelector(
'.field-name-body .field-item',
)?.innerHTML
return { imageUrl, authors, description }
}