diff --git a/bin/import-medium-article.js b/bin/import-medium-article.js
new file mode 100755
index 0000000..ce5fd71
--- /dev/null
+++ b/bin/import-medium-article.js
@@ -0,0 +1,70 @@
+#!/usr/bin/env node
+
+const path = require('path')
+const inquirer = require('inquirer')
+const ora = require('ora')
+const {
+ getMarkdownFromOnlinePost,
+} = require('../lib/import-article-from-medium')
+
+// eslint-disable-next-line no-console
+console.log(` -------------------------
+
+Hello there!
+
+Let's import one of your Medium article here.
+`)
+
+let spinner
+
+inquirer
+ .prompt([
+ {
+ name: 'canonicalLink',
+ message: 'URL of the Medium article',
+ },
+ ])
+ .then(({ canonicalLink }) => {
+ canonicalLink = canonicalLink.trim()
+ if (!canonicalLink) {
+ throw new Error(`
+We do need the URL to import the article...`)
+ }
+ spinner = ora('Parsing Medium article').start()
+
+ return getMarkdownFromOnlinePost(
+ path.join(process.cwd(), './content'),
+ canonicalLink
+ )
+ })
+ .then(slug => {
+ if (!slug) {
+ throw new Error(
+ 'Looks like the URL points to a draft or a response to an article. We cannot import that.'
+ )
+ }
+ // eslint-disable-next-line no-console
+ console.log(`
+ -------------------------
+
+Your article is ready to go! 🙌
+
+You can find it here: ${path.join(process.cwd(), './content', slug)}
+
+Happy blogging!
+...
+
+
+`)
+ })
+ .then(() => process.exit(0))
+ .catch(err => {
+ if (spinner) {
+ spinner.fail()
+ }
+ // eslint-disable-next-line no-console
+ console.log()
+ // eslint-disable-next-line no-console
+ console.error(err)
+ process.exit(1)
+ })
diff --git a/index.js b/bin/medium-to-own-blog.js
similarity index 96%
rename from index.js
rename to bin/medium-to-own-blog.js
index 6e21cb3..222585b 100755
--- a/index.js
+++ b/bin/medium-to-own-blog.js
@@ -5,10 +5,10 @@ const inquirer = require('inquirer')
const ora = require('ora')
const JSZip = require('jszip')
const fs = require('fs-extra')
-const { getProfile } = require('./get-profile')
-const { getMarkdownFromPost } = require('./generate-md')
-const { addGatsbyFiles } = require('./add-gatsby-files')
-const { exec, withOutputPath } = require('./utils')
+const { getProfile } = require('../lib/get-profile')
+const { getMarkdownFromPost } = require('../lib/generate-md')
+const { addGatsbyFiles } = require('../lib/add-gatsby-files')
+const { exec, withOutputPath } = require('../lib/utils')
// eslint-disable-next-line no-console
console.log(` -------------------------
diff --git a/gatsby-template/package-lock.json b/gatsby-template/package-lock.json
new file mode 100644
index 0000000..2bba576
--- /dev/null
+++ b/gatsby-template/package-lock.json
@@ -0,0 +1,5 @@
+{
+ "name": "{{ mediumUsername }}-blog",
+ "version": "0.1.0",
+ "lockfileVersion": 1
+}
diff --git a/gatsby-template/package.json b/gatsby-template/package.json
index 54fa906..debbb5d 100644
--- a/gatsby-template/package.json
+++ b/gatsby-template/package.json
@@ -64,7 +64,8 @@
"lh": "lighthousebot",
"start": "npm run dev",
"serve": "gatsby serve",
- "test": "echo \"Write tests! -> https://gatsby.dev/unit-testing\""
+ "test": "echo \"Write tests! -> https://gatsby.dev/unit-testing\"",
+ "import-medium-article": "npx --package=medium-to-own-blog import-medium-article"
},
"prettier": {
"proseWrap": "never",
diff --git a/gatsby-template/src/components/embed.css b/gatsby-template/src/components/embed.css
index 56d3901..3e7130f 100644
--- a/gatsby-template/src/components/embed.css
+++ b/gatsby-template/src/components/embed.css
@@ -1,13 +1,13 @@
-.embed {
+.embed-container-sizing {
position: relative;
}
-.embed img {
+.embed-container-sizing img {
display: block;
height: auto;
width: 100%;
}
-.embed iframe {
+.embed-container-sizing iframe {
height: 100%;
left: 0;
position: absolute;
diff --git a/gatsby-template/src/components/embed.js b/gatsby-template/src/components/embed.js
index e675c84..178fed4 100644
--- a/gatsby-template/src/components/embed.js
+++ b/gatsby-template/src/components/embed.js
@@ -3,7 +3,7 @@ import React, { useRef, useEffect } from 'react'
import './embed.css'
-function Embed({ aspectRatio, src }) {
+function Embed({ aspectRatio, src, caption }) {
const iframeRef = useRef(null)
useEffect(() => {
@@ -29,7 +29,7 @@ function Embed({ aspectRatio, src }) {
if (src && src.match(/^https:\/\/gist.github.com/)) {
return (
-
+
+ {caption ? {caption} : null}
)
}
return (
-
-
+
+
+
+
+ {caption ?
{caption} : null}
)
}
diff --git a/generate-md.js b/generate-md.js
deleted file mode 100644
index 23fb9a3..0000000
--- a/generate-md.js
+++ /dev/null
@@ -1,366 +0,0 @@
-const querystring = require('querystring')
-const path = require('path')
-const fs = require('fs-extra')
-const { JSDOM } = require('jsdom')
-const TurndownService = require('turndown')
-const slugify = require('slugify')
-const { request, withOutputPath } = require('./utils')
-
-let untitledCounter = 0
-let imageDownloader = []
-let iframeParser = []
-
-function replaceIframe(content, iframe) {
- const placeholder = `Embed placeholder ${Math.random()}`
- const source = iframe.attributes.getNamedItem('src').value
-
- let aspectRatioPlaceholder = iframe
-
- while (
- !aspectRatioPlaceholder.classList.contains('aspectRatioPlaceholder') &&
- aspectRatioPlaceholder.parentNode
- ) {
- aspectRatioPlaceholder = aspectRatioPlaceholder.parentNode
- }
-
- const aspectRatioFill =
- aspectRatioPlaceholder &&
- aspectRatioPlaceholder.querySelector('.aspectRatioPlaceholder-fill')
-
- const aspectRatio = aspectRatioFill
- ? parseFloat(aspectRatioFill.style.paddingBottom) / 100
- : 1
-
- iframeParser.push(
- request(`https://medium.com${source}`)
- .then(body => {
- const iframeDom = new JSDOM(body).window.document
- const nestedIframe = iframeDom.querySelector('iframe')
-
- if (!nestedIframe) {
- // check if it's a gist
- const gist = iframeDom.querySelector(
- 'script[src^="https://gist.github.com"]'
- )
-
- if (gist) {
- return {
- src: gist.attributes.getNamedItem('src').value,
- aspectRatio,
- placeholder,
- }
- }
-
- // remove the placeholder if we can't find the source
- return {
- error: true,
- placeholder,
- }
- }
-
- // something like https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fwww.youtube.com%2Fembed%2Fcz1t_oo6k9c%3Ffeature%3Doembed&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dcz1t_oo6k9c&image=https%3A%2F%2Fi.ytimg.com%2Fvi%2Fcz1t_oo6k9c%2Fhqdefault.jpg&key=a19fcc184b9711e1b4764040d3dc5c07&type=text%2Fhtml&schema=youtube
- const nestedSource = nestedIframe.attributes.getNamedItem('src').value
- const query = querystring.parse(nestedSource.split('?')[1])
-
- return {
- src: query.src,
- url: query.url,
- aspectRatio,
- placeholder,
- }
- })
- .catch(() => ({}))
- )
- return `\n\n${placeholder}\n\n`
-}
-
-const config = {
- headingStyle: 'atx',
- hr: '---',
- bulletListMarker: '-',
- codeBlockStyle: 'fenced',
- blankReplacement(content, node) {
- if (node.nodeName === 'FIGURE') {
- const iframe = node.querySelector('iframe')
- if (iframe) {
- return replaceIframe('', iframe)
- }
- }
- if (node.nodeName === 'IFRAME') {
- return replaceIframe('', node)
- }
- return node.isBlock ? '\n\n' : ''
- },
-}
-const td = new TurndownService(config)
-
-td.addRule('iframe', {
- filter: ['iframe', 'IFRAME'],
- replacement: replaceIframe,
-})
-
-// parsing figure and figcaption for markdown
-td.addRule('figure', {
- filter: 'figure',
- replacement(content) {
- // eslint-disable-next-line prefer-const
- let [, , element, , caption] = content.split('\n')
- if (caption) {
- // the caption
- element = [element.slice(0, 2), caption, element.slice(2)].join('')
- }
-
- return element
- },
-})
-
-// parsing code block
-td.addRule('code-blocks', {
- filter: ['pre'],
- replacement(content, node) {
- let string = ``
- if (!node.classList.contains('graf-after--pre')) {
- string += '```\n'
- } else {
- string += '\n\n'
- }
-
- // replace all the `
` to maintain code formatting
- node.querySelectorAll('br').forEach(child => child.replaceWith('\n'))
-
- string += node.textContent
- string += '\n'
-
- if (
- !node.nextElementSibling ||
- node.nextElementSibling.nodeName !== 'PRE'
- ) {
- string += '```'
- }
-
- return string
- },
-})
-
-// some `code` has siblings inside `pre`
-td.addRule('code', {
- filter(node) {
- const isCodeBlock = node.parentNode.nodeName === 'PRE'
-
- return node.nodeName === 'CODE' && !isCodeBlock
- },
- replacement(content) {
- if (!content.trim()) return ''
-
- let delimiter = '`'
- let leadingSpace = ''
- let trailingSpace = ''
- const matches = content.match(/`+/gm)
- if (matches) {
- if (/^`/.test(content)) leadingSpace = ' '
- if (/`$/.test(content)) trailingSpace = ' '
- while (matches.indexOf(delimiter) !== -1) delimiter += '`'
- }
-
- return delimiter + leadingSpace + content + trailingSpace + delimiter
- },
-})
-
-// override the default image rule to download the image from the medium CDN
-td.addRule('image', {
- filter: 'img',
-
- replacement(content, node) {
- const alt = node.alt || ''
- let src = node.getAttribute('src') || ''
-
- if (/^https:\/\/cdn-images.*\.medium\.com/.test(src)) {
- const cdnURL = src
- const filename = `asset-${imageDownloader.length + 1}${path.extname(src)}`
- src = `./${filename}`
- imageDownloader.push(
- request(cdnURL, { encoding: null })
- .then(body => ({ body, filename }))
- .catch(() => ({})) // we will just ignore the error
- )
- }
-
- const title = node.title || ''
- const titlePart = title ? ` "${title}"` : ''
- return src ? `![${alt}](${src}${titlePart})` : ''
- },
-})
-
-module.exports.getMarkdownFromPost = async (
- profile,
- localContent,
- fileName
-) => {
- try {
- imageDownloader = []
- iframeParser = []
- const localDom = new JSDOM(localContent).window.document
-
- const metadata = {}
- let md = ''
-
- let slug
- let redirect
-
- if (localDom.querySelector('.p-canonical')) {
- const canonicalLink = localDom
- .querySelector('.p-canonical')
- .attributes.getNamedItem('href').value
-
- const onlineContent = await request(canonicalLink)
- const onlineDom = new JSDOM(onlineContent).window.document
-
- if (
- onlineDom.querySelector('.postArticle--response') ||
- !onlineDom.querySelector('.postArticle-content')
- ) {
- // that's a response to another article
- // so we will ignore that
- return
- }
-
- const tags = Array.from(onlineDom.querySelectorAll('.js-postTags li'))
-
- const titleElement = onlineDom.querySelector('.graf--title')
-
- // some articles might not have a title
- const title = titleElement ? titleElement.textContent : ''
-
- redirect = path.basename(decodeURI(canonicalLink))
-
- slug = title ? slugify(title).toLowerCase() : redirect
-
- // remove some extra stuff from the html
- if (titleElement) {
- titleElement.remove()
- }
- if (onlineDom.querySelector('.section-divider')) {
- onlineDom.querySelector('.section-divider').remove()
- }
- if (onlineDom.querySelector('.js-postMetaLockup')) {
- onlineDom.querySelector('.js-postMetaLockup').remove()
- }
-
- md = td.turndown(onlineDom.querySelector('.postArticle-content'))
-
- const canonicalMeta = onlineDom.querySelector("link[rel='canonical']")
-
- metadata.title = title
- metadata.description = onlineDom
- .querySelector("meta[name='description']")
- .attributes.getNamedItem('content').value
- metadata.date = onlineDom
- .querySelector("meta[property='article:published_time']")
- .attributes.getNamedItem('content').value
- metadata.categories = tags.map(t => t.textContent)
- metadata.published = true
- metadata.canonicalLink = canonicalMeta
- ? canonicalMeta.attributes.getNamedItem('href').value
- : canonicalLink
- } else {
- // that's a draft
- const title =
- (
- localDom.querySelector('.p-name') || { textContent: '' }
- ).textContent.trim() || `Untitled Draft ${++untitledCounter}`
-
- slug = slugify(title).toLowerCase()
-
- // remove some extra stuff from the html
- if (localDom.querySelector('.p-name')) {
- localDom.querySelector('.p-name').remove()
- }
- if (localDom.querySelector('.graf--title')) {
- localDom.querySelector('.graf--title').remove()
- }
- if (localDom.querySelector('.graf--subtitle')) {
- localDom.querySelector('.graf--subtitle').remove()
- }
- if (localDom.querySelector('.section-divider')) {
- localDom.querySelector('.section-divider').remove()
- }
-
- md = td.turndown(localDom.querySelector('.e-content'))
-
- metadata.title = title
- metadata.description = (
- localDom.querySelector('.p-summary[data-field="subtitle"]') || {
- textContent: '',
- }
- ).textContent.trim()
- metadata.date = new Date().toISOString()
- metadata.published = false
- }
-
- const frontmatter = `---
-title: ${JSON.stringify(metadata.title)}
-description: ${JSON.stringify(metadata.description)}
-date: "${metadata.date}"
-categories: ${
- metadata.categories
- ? `
-${metadata.categories.map(c => ` - ${c}`).join('\n')}
-`
- : '[]'
- }
-published: ${metadata.published ? 'true' : 'false'}${
- metadata.canonicalLink
- ? `
-canonical_link: ${metadata.canonicalLink}`
- : ''
- }${
- redirect
- ? `
-redirect_from:
- - /${redirect}`
- : ''
- }
----
-
-`
-
- await fs.mkdirp(withOutputPath(profile, `./content/${slug}`))
-
- await Promise.all(
- imageDownloader
- .map(p =>
- p.then(({ body, filename }) => {
- if (body) {
- fs.writeFile(
- withOutputPath(profile, `./content/${slug}/${filename}`),
- body
- )
- }
- })
- )
- .concat(
- iframeParser.map(p =>
- p.then(({ src, placeholder, aspectRatio }) => {
- if (src) {
- md = md.replace(
- placeholder,
- `
`
- )
- } else if (placeholder) {
- // remove the placeholder if we can't find the source
- md = md.replace(placeholder, '')
- }
- })
- )
- )
- )
-
- await fs.writeFile(
- withOutputPath(profile, `./content/${slug}/index.md`),
- `${frontmatter}${md}\n`
- )
- } catch (err) {
- err.message = `Error parsing ${fileName}: ${err.message}`
- throw err
- }
-}
diff --git a/add-gatsby-files.js b/lib/add-gatsby-files.js
similarity index 92%
rename from add-gatsby-files.js
rename to lib/add-gatsby-files.js
index 8eca3f1..043457f 100644
--- a/add-gatsby-files.js
+++ b/lib/add-gatsby-files.js
@@ -15,7 +15,7 @@ module.exports.addGatsbyFiles = profile => {
function copyTemplate(fileName) {
return fs
- .readFile(path.join(__dirname, `./gatsby-template/${fileName}`), 'utf8')
+ .readFile(path.join(__dirname, `../gatsby-template/${fileName}`), 'utf8')
.then(replaceTemplate)
.then(content =>
fs.writeFile(withOutputPath(profile, `./${fileName}`), content)
@@ -109,11 +109,11 @@ yarn-error.log
copyTemplate('.eslintrc.yml'),
copyTemplate('.dockerignore'),
fs.copy(
- path.join(__dirname, './gatsby-template/src'),
+ path.join(__dirname, '../gatsby-template/src'),
withOutputPath(profile, './src')
),
fs.copy(
- path.join(__dirname, './gatsby-template/static'),
+ path.join(__dirname, '../gatsby-template/static'),
withOutputPath(profile, './static')
),
])
diff --git a/default-icon.png b/lib/default-icon.png
similarity index 100%
rename from default-icon.png
rename to lib/default-icon.png
diff --git a/lib/generate-md.js b/lib/generate-md.js
new file mode 100644
index 0000000..016557c
--- /dev/null
+++ b/lib/generate-md.js
@@ -0,0 +1,35 @@
+const { JSDOM } = require('jsdom')
+const { withOutputPath } = require('./utils')
+const {
+ getMarkdownFromOnlinePost,
+ getMarkdownFromLocalPost,
+} = require('./import-article-from-medium')
+
+module.exports.getMarkdownFromPost = async (
+ profile,
+ localContent,
+ fileName
+) => {
+ try {
+ const localDom = new JSDOM(localContent).window.document
+
+ if (localDom.querySelector('.p-canonical')) {
+ const canonicalLink = localDom
+ .querySelector('.p-canonical')
+ .attributes.getNamedItem('href').value
+
+ await getMarkdownFromOnlinePost(
+ withOutputPath(profile, './content'),
+ canonicalLink
+ )
+ } else {
+ await getMarkdownFromLocalPost(
+ withOutputPath(profile, './content'),
+ localDom
+ )
+ }
+ } catch (err) {
+ err.message = `Error parsing ${fileName}: ${err.message}`
+ throw err
+ }
+}
diff --git a/get-profile.js b/lib/get-profile.js
similarity index 100%
rename from get-profile.js
rename to lib/get-profile.js
diff --git a/lib/import-article-from-medium.js b/lib/import-article-from-medium.js
new file mode 100644
index 0000000..aa8b451
--- /dev/null
+++ b/lib/import-article-from-medium.js
@@ -0,0 +1,438 @@
+const querystring = require('querystring')
+const path = require('path')
+const fs = require('fs-extra')
+const { JSDOM } = require('jsdom')
+const TurndownService = require('turndown')
+const slugify = require('slugify')
+const { request } = require('./utils')
+
+let untitledCounter = 0
+let imageDownloader = []
+const iframeParser = {}
+
+function replaceIframe(content, iframe, caption = '') {
+ const source = iframe.attributes.getNamedItem('src').value
+
+ if (iframeParser[source]) {
+ // we already parsed an iframe pointing to the same thing
+ // so return the result (or the placeholder if it isn't finished)
+
+ if (!iframeParser[source].result && !iframeParser[source].caption) {
+ iframeParser[source].caption = typeof caption !== 'string' ? '' : caption
+ }
+
+ return `\n\n${iframeParser[source].result ||
+ iframeParser[source].placeholder ||
+ ''}\n\n`
+ }
+
+ const placeholder = `Embed placeholder ${Math.random()}`
+
+ let aspectRatioPlaceholder = iframe
+
+ while (
+ !aspectRatioPlaceholder.classList.contains('aspectRatioPlaceholder') &&
+ aspectRatioPlaceholder.parentNode
+ ) {
+ aspectRatioPlaceholder = aspectRatioPlaceholder.parentNode
+ }
+
+ const aspectRatioFill =
+ aspectRatioPlaceholder &&
+ aspectRatioPlaceholder.querySelector('.aspectRatioPlaceholder-fill')
+
+ const aspectRatio = aspectRatioFill
+ ? parseFloat(aspectRatioFill.style.paddingBottom) / 100
+ : 1
+
+ iframeParser[source] = {
+ caption: typeof caption !== 'string' ? '' : caption,
+ placeholder,
+ promise: request(`https://medium.com${source}`)
+ .then(body => {
+ const iframeDom = new JSDOM(body).window.document
+ const nestedIframe = iframeDom.querySelector('iframe')
+
+ if (!nestedIframe) {
+ // check if it's a gist
+ const gist = iframeDom.querySelector(
+ 'script[src^="https://gist.github.com"]'
+ )
+
+ if (gist) {
+ return {
+ src: gist.attributes.getNamedItem('src').value,
+ aspectRatio,
+ }
+ }
+
+ // remove the placeholder if we can't find the source
+ return {
+ error: true,
+ }
+ }
+
+ // something like https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fwww.youtube.com%2Fembed%2Fcz1t_oo6k9c%3Ffeature%3Doembed&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3Dcz1t_oo6k9c&image=https%3A%2F%2Fi.ytimg.com%2Fvi%2Fcz1t_oo6k9c%2Fhqdefault.jpg&key=a19fcc184b9711e1b4764040d3dc5c07&type=text%2Fhtml&schema=youtube
+ const nestedSource = nestedIframe.attributes.getNamedItem('src').value
+ const query = querystring.parse(nestedSource.split('?')[1])
+
+ return {
+ src: query.src,
+ url: query.url,
+ }
+ })
+ .catch(() => ({ error: true })),
+ }
+
+ return `\n\n${placeholder}\n\n`
+}
+
+const config = {
+ headingStyle: 'atx',
+ hr: '---',
+ bulletListMarker: '-',
+ codeBlockStyle: 'fenced',
+ blankReplacement(content, node) {
+ if (node.nodeName === 'FIGURE') {
+ const iframe = node.querySelector('iframe')
+ if (iframe) {
+ return replaceIframe('', iframe)
+ }
+ }
+ if (node.nodeName === 'IFRAME') {
+ return replaceIframe('', node)
+ }
+ return node.isBlock ? '\n\n' : ''
+ },
+}
+const td = new TurndownService(config)
+
+td.addRule('iframe', {
+ filter: ['iframe', 'IFRAME'],
+ replacement: replaceIframe,
+})
+
+// parsing figure and figcaption for markdown
+td.addRule('figure', {
+ filter: 'figure',
+ replacement(content, node) {
+ const iframe = node.querySelector('iframe')
+ if (iframe) {
+ return replaceIframe('', iframe, content.split('\n')[2])
+ }
+
+ // eslint-disable-next-line prefer-const
+ let [, , element, , caption] = content.split('\n')
+ if (caption) {
+ // the caption
+ element = [element.slice(0, 2), caption, element.slice(2)].join('')
+ }
+
+ return element
+ },
+})
+
+// parsing code block
+td.addRule('code-blocks', {
+ filter: ['pre'],
+ replacement(content, node) {
+ let string = ``
+ if (!node.classList.contains('graf-after--pre')) {
+ string += '```\n'
+ } else {
+ string += '\n\n'
+ }
+
+ // replace all the `
` to maintain code formatting
+ node.querySelectorAll('br').forEach(child => child.replaceWith('\n'))
+
+ string += node.textContent
+ string += '\n'
+
+ if (
+ !node.nextElementSibling ||
+ node.nextElementSibling.nodeName !== 'PRE'
+ ) {
+ string += '```'
+ }
+
+ return string
+ },
+})
+
+// some `code` has siblings inside `pre`
+td.addRule('code', {
+ filter(node) {
+ const isCodeBlock = node.parentNode.nodeName === 'PRE'
+
+ return node.nodeName === 'CODE' && !isCodeBlock
+ },
+ replacement(content) {
+ if (!content.trim()) return ''
+
+ let delimiter = '`'
+ let leadingSpace = ''
+ let trailingSpace = ''
+ const matches = content.match(/`+/gm)
+ if (matches) {
+ if (/^`/.test(content)) leadingSpace = ' '
+ if (/`$/.test(content)) trailingSpace = ' '
+ while (matches.indexOf(delimiter) !== -1) delimiter += '`'
+ }
+
+ return delimiter + leadingSpace + content + trailingSpace + delimiter
+ },
+})
+
+// override the default image rule to download the image from the medium CDN
+td.addRule('image', {
+ filter: 'img',
+
+ replacement(content, node) {
+ const alt = node.alt || ''
+ let src = node.getAttribute('src') || ''
+
+ if (/^https:\/\/cdn-images.*\.medium\.com/.test(src)) {
+ const cdnURL = src
+ const filename = `asset-${imageDownloader.length + 1}${path.extname(src)}`
+ src = `./${filename}`
+ imageDownloader.push(
+ request(cdnURL, { encoding: null })
+ .then(body => ({ body, filename }))
+ .catch(() => ({})) // we will just ignore the error
+ )
+ }
+
+ const title = node.title || ''
+ const titlePart = title ? ` "${title}"` : ''
+ return src ? `![${alt}](${src}${titlePart})` : ''
+ },
+})
+
+module.exports.getMarkdownFromOnlinePost = async (
+ contentFolder,
+ canonicalLink
+) => {
+ imageDownloader = []
+
+ const metadata = {}
+ let md = ''
+
+ const onlineContent = await request(canonicalLink)
+ const onlineDom = new JSDOM(onlineContent).window.document
+
+ if (
+ onlineDom.querySelector('.postArticle--response') ||
+ !onlineDom.querySelector('.postArticle-content')
+ ) {
+ // that's a response to another article
+ // so we will ignore that
+ return undefined
+ }
+
+ const tags = Array.from(onlineDom.querySelectorAll('.js-postTags li'))
+
+ const titleElement = onlineDom.querySelector('.graf--title')
+
+ // some articles might not have a title
+ const title = titleElement ? titleElement.textContent : ''
+
+ const redirect = path.basename(decodeURI(canonicalLink))
+
+ const slug = title ? slugify(title).toLowerCase() : redirect
+
+ // remove some extra stuff from the html
+ if (titleElement) {
+ titleElement.remove()
+ }
+ if (onlineDom.querySelector('.section-divider')) {
+ onlineDom.querySelector('.section-divider').remove()
+ }
+ if (onlineDom.querySelector('.js-postMetaLockup')) {
+ onlineDom.querySelector('.js-postMetaLockup').remove()
+ }
+
+ md = td.turndown(onlineDom.querySelector('.postArticle-content'))
+
+ const canonicalMeta = onlineDom.querySelector("link[rel='canonical']")
+
+ metadata.title = title
+ metadata.description = onlineDom
+ .querySelector("meta[name='description']")
+ .attributes.getNamedItem('content').value
+ metadata.date = onlineDom
+ .querySelector("meta[property='article:published_time']")
+ .attributes.getNamedItem('content').value
+ metadata.categories = tags.map(t => t.textContent)
+ metadata.published = true
+ metadata.canonicalLink = canonicalMeta
+ ? canonicalMeta.attributes.getNamedItem('href').value
+ : canonicalLink
+
+ const frontmatter = `---
+title: ${JSON.stringify(metadata.title)}
+description: ${JSON.stringify(metadata.description)}
+date: "${metadata.date}"
+categories: ${
+ metadata.categories
+ ? `
+${metadata.categories.map(c => ` - ${c}`).join('\n')}
+`
+ : '[]'
+ }
+published: ${metadata.published ? 'true' : 'false'}${
+ metadata.canonicalLink
+ ? `
+canonical_link: ${metadata.canonicalLink}`
+ : ''
+ }${
+ redirect
+ ? `
+redirect_from:
+ - /${redirect}`
+ : ''
+ }
+---
+
+`
+
+ await fs.mkdirp(path.join(contentFolder, `./${slug}`))
+
+ await Promise.all(
+ imageDownloader
+ .map(p =>
+ p.then(({ body, filename }) => {
+ if (body) {
+ fs.writeFile(
+ path.join(contentFolder, `./${slug}/${filename}`),
+ body
+ )
+ }
+ })
+ )
+ .concat(
+ Object.keys(iframeParser)
+ .filter(k => iframeParser[k].promise)
+ .map(k => {
+ const { promise, placeholder, caption } = iframeParser[k]
+ return promise.then(({ src, aspectRatio }) => {
+ const result = `
`
+ iframeParser[k] = { result }
+ if (src) {
+ md = md.replace(new RegExp(placeholder, 'g'), result)
+ } else if (placeholder) {
+ // remove the placeholder if we can't find the source
+ md = md.replace(new RegExp(placeholder, 'g'), '')
+ }
+ })
+ })
+ )
+ )
+
+ await fs.writeFile(
+ path.join(contentFolder, `./${slug}/index.md`),
+ `${frontmatter}${md}\n`
+ )
+
+ return slug
+}
+
+module.exports.getMarkdownFromLocalPost = async (contentFolder, localDom) => {
+ imageDownloader = []
+
+ const metadata = {}
+ let md = ''
+
+ const title =
+ (
+ localDom.querySelector('.p-name') || { textContent: '' }
+ ).textContent.trim() || `Untitled Draft ${++untitledCounter}`
+
+ const slug = slugify(title).toLowerCase()
+
+ // remove some extra stuff from the html
+ if (localDom.querySelector('.p-name')) {
+ localDom.querySelector('.p-name').remove()
+ }
+ if (localDom.querySelector('.graf--title')) {
+ localDom.querySelector('.graf--title').remove()
+ }
+ if (localDom.querySelector('.graf--subtitle')) {
+ localDom.querySelector('.graf--subtitle').remove()
+ }
+ if (localDom.querySelector('.section-divider')) {
+ localDom.querySelector('.section-divider').remove()
+ }
+
+ md = td.turndown(localDom.querySelector('.e-content'))
+
+ metadata.title = title
+ metadata.description = (
+ localDom.querySelector('.p-summary[data-field="subtitle"]') || {
+ textContent: '',
+ }
+ ).textContent.trim()
+ metadata.date = new Date().toISOString()
+ metadata.published = false
+
+ const frontmatter = `---
+title: ${JSON.stringify(metadata.title)}
+description: ${JSON.stringify(metadata.description)}
+date: "${metadata.date}"
+categories: ${
+ metadata.categories
+ ? `
+${metadata.categories.map(c => ` - ${c}`).join('\n')}
+`
+ : '[]'
+ }
+published: ${metadata.published ? 'true' : 'false'}${
+ metadata.canonicalLink
+ ? `
+canonical_link: ${metadata.canonicalLink}`
+ : ''
+ }
+---
+
+`
+
+ await fs.mkdirp(path.join(contentFolder, `./${slug}`))
+
+ await Promise.all(
+ imageDownloader
+ .map(p =>
+ p.then(({ body, filename }) => {
+ if (body) {
+ fs.writeFile(
+ path.join(contentFolder, `./${slug}/${filename}`),
+ body
+ )
+ }
+ })
+ )
+ .concat(
+ Object.keys(iframeParser)
+ .filter(k => iframeParser[k].promise)
+ .map(k => {
+ const { promise, placeholder, caption } = iframeParser[k]
+ return promise.then(({ src, aspectRatio }) => {
+ const result = `
`
+ iframeParser[k] = { result }
+ if (src) {
+ md = md.replace(new RegExp(placeholder, 'g'), result)
+ } else if (placeholder) {
+ // remove the placeholder if we can't find the source
+ md = md.replace(new RegExp(placeholder, 'g'), '')
+ }
+ })
+ })
+ )
+ )
+
+ await fs.writeFile(
+ path.join(contentFolder, `./${slug}/index.md`),
+ `${frontmatter}${md}\n`
+ )
+
+ return slug
+}
diff --git a/utils.js b/lib/utils.js
similarity index 100%
rename from utils.js
rename to lib/utils.js
diff --git a/package-lock.json b/package-lock.json
index 08cfdb9..c31bce7 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,6 +1,6 @@
{
- "name": "medium-to-gatsby",
- "version": "1.0.0",
+ "name": "medium-to-own-blog",
+ "version": "0.1.12",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
diff --git a/package.json b/package.json
index 16dfbd4..8ded00c 100644
--- a/package.json
+++ b/package.json
@@ -3,18 +3,14 @@
"version": "0.1.12",
"description": "Switch from Medium to your own blog in a few minutes",
"bin": {
- "medium-to-own-blog": "index.js"
+ "medium-to-own-blog": "bin/medium-to-own-blog.js",
+ "import-medium-article": "bin/import-medium-article.js"
},
"main": "index.js",
"files": [
"/gatsby-template",
- "/add-gatsby-files.js",
- "/default-icon.png",
- "/generate-md.js",
- "/get-profile.js",
- "/index.js",
- "/parse-profile.js",
- "/utils.js"
+ "/bin",
+ "/lib"
],
"scripts": {
"test": "node ./index.js"
diff --git a/parse-profile.js b/parse-profile.js
deleted file mode 100644
index e69de29..0000000