Skip to content

Commit

Permalink
Crawl site
Browse files Browse the repository at this point in the history
  • Loading branch information
daun committed Sep 13, 2023
1 parent 7fc7f0b commit 29fb3fc
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 22 deletions.
55 changes: 33 additions & 22 deletions src/commands/validate.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
import * as fs from 'fs/promises'

import { URL } from 'url'
import { join } from 'path'

import { Command, Flags } from '@oclif/core'
// @ts-ignore
import Crawler from 'crawler'
import chalk from 'chalk'
import { Listr, ListrTask } from 'listr2'
import { Browser } from 'playwright'

import { createBrowser } from '../browser.js'
import { isUrl } from '../util.js'
import { defaults as defaultConfig, loadConfig, type Config } from '../config.js'
import { createBrowser, visitPage } from '../browser.js'
import { isUrl, isValidUrl, n } from '../util.js'
import { loadConfig, type Config } from '../config.js'

interface Ctx {
config: Config
browser?: Browser
teardown?: () => Promise<void>
urls: string[]
}

export default class Validate extends Command {
Expand All @@ -27,8 +29,6 @@ export default class Validate extends Command {
`<%= config.bin %> <%= command.id %> --crawl --url https://mysite.com`,
`<%= config.bin %> <%= command.id %> --tests containers,transition-duration`,
`<%= config.bin %> <%= command.id %> --asynchronous`,
`<%= config.bin %> <%= command.id %> --crawl https://mysite.com`,
`<%= config.bin %> <%= command.id %> --asynchronous`,
]
static flags = {
url: Flags.string({
Expand All @@ -43,8 +43,6 @@ export default class Validate extends Command {
summary: 'Crawl site',
description: 'Crawl the site for all public URLs and validate all found pages. Requires the --url flag as a base URL.',
required: false,
default: false,
dependsOn: ['url']
}),
sitemap: Flags.string({
char: 's',
Expand All @@ -66,7 +64,6 @@ export default class Validate extends Command {
summary: 'Parallel',
description: 'Run all tests asynchronously. A lot faster, but might cause issues.',
required: false,
default: false,
}),
containers: Flags.string({
summary: 'Containers',
Expand All @@ -90,7 +87,8 @@ export default class Validate extends Command {

async run(): Promise<void> {
const ctx: Ctx = {
config: await this.parseConfig()
config: await this.parseConfig(),
urls: []
}

const tasks: ListrTask<Ctx>[] = [
Expand All @@ -105,29 +103,39 @@ export default class Validate extends Command {
ctx.teardown = teardown
}
},
// {
// title: 'Compile list of pages',
// task: async (): Promise<void> => {
// const pages = this.getListOfPages(ctx)
// }
// },
]
)
},
{
title: 'Compiling pages',
task: async (ctx, task): Promise<void> => {
const { source, urls } = await this.getPageUrls(ctx)
ctx.urls = urls
task.title = chalk`Found {green ${urls.length} ${n(urls.length, 'page')}} in {magenta ${source}}`
}
},
{
title: 'Shutting down',
task: async (ctx, task) => task.newListr(() => [
{
title: 'Closing browser',
task: async (ctx) => {
await ctx.teardown!()
ctx.teardown = undefined
}
}
])
}
]

await new Listr<Ctx>(tasks, { ctx }).run()
try {
await new Listr<Ctx>(tasks, { ctx }).run()
} catch (error) {
if (ctx.teardown) {
await ctx.teardown()
}
throw error
}
}

async catch(error: Error) {
Expand All @@ -148,26 +156,29 @@ export default class Validate extends Command {
sitemap: flags.sitemap,
asynchronous: flags.asynchronous,
tests: flags.tests.split(','),
styles: flags.stylesExpectedToChange.split(','),
styles: flags.styles.split(','),
}
}
return await loadConfig(overrides)
}

async getPagesToTest(ctx: Ctx): Promise<{ urls: string[], source: string }> {
async getPageUrls(ctx: Ctx): Promise<{ urls: string[], source: string }> {
const { url, crawl, sitemap } = ctx.config.validate
let urls: string[] = []
let source = ''
if (url) {
if (!isValidUrl(url)) {
throw new Error(`Invalid URL: ${url}. Make sure you include the protocol and hostname.`)
}
if (crawl) {
source = 'crawled site urls'
source = 'crawled site'
urls = await this.getPageUrlsFromCrawler(ctx)
} else {
source = 'single url argument'
source = 'url argument'
urls = [url]
}
} else if (sitemap) {
source = `parsed sitemap ${sitemap}`
source = 'parsed sitemap'
urls = await this.getPageUrlsFromSitemap(ctx)
} else {
throw new Error('You must specify either a url or a sitemap to validate.')
Expand Down
20 changes: 20 additions & 0 deletions src/util.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import * as fs from 'fs/promises'
import { URL } from 'url'

import { camelCase, upperFirst } from 'lodash-es'

export function wait(ms: number): Promise<void> {
Expand Down Expand Up @@ -40,3 +42,21 @@ export async function isEmptyDirectory(path: string): Promise<boolean> {
return false
}
}

export function n(n: number, base: string): string {
return n === 1 ? base : `${base}s`
}


export function isValidUrl(s: string, protocols: string[] = ['http', 'https']): boolean {
try {
const { protocol } = new URL(s)
if (protocols?.length) {
return !!protocol && protocols.map(x => `${x.toLowerCase()}:`).includes(protocol)
} else {
return true
}
} catch (err) {
return false
}
}

0 comments on commit 29fb3fc

Please sign in to comment.