Skip to content

Commit

Permalink
Merge pull request #28 from crawlab-team/develop
Browse files Browse the repository at this point in the history
  • Loading branch information
rpennacchio3mj5y committed Mar 1, 2022
2 parents ced7ed7 + 28196c9 commit 44997cb
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 66 deletions.
60 changes: 21 additions & 39 deletions lib/BaseExecutor.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,32 @@ const models = require('../models')
const constants = require('../constants')

class BaseExecutor {
constructor(task) {
this.task = task
this.platform = undefined
this.spider = undefined
}
constructor(task) {
this.task = task
this.platform = undefined
this.spider = undefined
}

async init() {
const task = this.task
async init() {
const task = this.task

// 平台
this.platform = await models.Platform.findOne({ _id: task.platformId })
const spiderName = this.platform.name
// 平台
this.platform = await models.Platform.findOne({ _id: task.platformId })
const spiderName = this.platform.name

let spider
if (spiderName === constants.platform.JUEJIN) {
spider = new spiders.JuejinSpider(task._id)
} else if (spiderName === constants.platform.SEGMENTFAULT) {
spider = new spiders.SegmentfaultSpider(task._id)
} else if (spiderName === constants.platform.JIANSHU) {
spider = new spiders.JianshuSpider(task._id)
} else if (spiderName === constants.platform.CSDN) {
spider = new spiders.CsdnSpider(task._id)
} else if (spiderName === constants.platform.ZHIHU) {
spider = new spiders.ZhihuSpider(task._id)
} else if (spiderName === constants.platform.OSCHINA) {
spider = new spiders.OschinaSpider(task._id)
} else if (spiderName === constants.platform.TOUTIAO) {
spider = new spiders.ToutiaoSpider(task._id)
} else if (spiderName === constants.platform.CNBLOGS) {
spider = new spiders.CnblogsSpider(task._id)
} else if (spiderName === constants.platform.V2EX) {
spider = new spiders.V2exSpider(task._id)
}
this.spider = spider
}
const Spider = require(`../spiders/${spiderName}`)
console.log(Spider)
this.spider = new Spider(task._id)
}

async run() {
// to be inherited
}
async run() {
// to be inherited
}

async start() {
await this.init()
await this.run()
}
async start() {
await this.init()
await this.run()
}
}

module.exports = BaseExecutor
6 changes: 5 additions & 1 deletion routes/platform.js
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,11 @@ module.exports = {
const platform = platforms[i]
const Spider = require(`../spiders/${platform.name}`)
const spider = new Spider(null, platform._id.toString())
await spider.checkCookieStatus()
try {
await spider.checkCookieStatus()
} catch (e) {
console.error(e)
}
}
await res.json({
status: 'ok'
Expand Down
42 changes: 22 additions & 20 deletions spiders/jianshu.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,32 @@ const constants = require('../constants')
const BaseSpider = require('./base')

class JianshuSpider extends BaseSpider {
async afterGoToEditor() {
await this.page.evaluate(() => {
document.querySelectorAll('span').forEach(el => {
if (el.textContent.trim() === '新建文章') {
el.click()
}
})
async afterGoToEditor() {
await this.page.evaluate(() => {
document.querySelectorAll('span')
.forEach(el => {
if (el.textContent.trim() === '新建文章') {
el.click()
}
})
await this.page.waitFor(3000)
}
})
await this.page.waitFor(5000)
}

async afterInputEditor() {
}
async afterInputEditor() {
}

async afterPublish() {
// this.task.url = this.page.url()
this.task.updateTs = new Date()
this.task.status = constants.status.FINISHED
await this.article.save()
}
async afterPublish() {
this.task.url = this.page.url()
if (!this.task.url.match(/\/p\/\w+/)) return
this.task.updateTs = new Date()
this.task.status = constants.status.FINISHED
await this.article.save()
}

async fetchStats() {
// TODO: implement this method
}
async fetchStats() {
// TODO: implement this method
}
}

module.exports = JianshuSpider
2 changes: 0 additions & 2 deletions spiders/v2ex.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
const fs = require('fs')
const path = require('path')
const BaseSpider = require('./base')
const constants = require('../constants')

Expand Down
30 changes: 30 additions & 0 deletions spiders/wechat.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
const BaseSpider = require('./base')
const constants = require('../constants')

class WechatSpider extends BaseSpider {
async afterInputEditor() {
}

async publish() {
// 发布文章
// const elPub = await this.page.$(this.editorSel.publish)
// await elPub.click()
// await this.page.waitFor(20000)

// 后续处理
await this.afterPublish()
}

async afterPublish() {
// this.task.url = await this.page.url().replace('#reply0', '')
// if (!this.task.url.match(/\/t\/\d+/)) return
// this.task.updateTs = new Date()
// this.task.status = constants.status.FINISHED
// await this.task.save()
}

async fetchStats() {
}
}

module.exports = WechatSpider
12 changes: 8 additions & 4 deletions spiders/zhihu.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,14 @@ class ZhihuSpider extends BaseSpider {

async publish() {
// 发布文章
await this.page.evaluate(() => {
const el = document.querySelector('.PublishPanel-stepTwoButton')
el.click()
})
try {
await this.page.evaluate(() => {
const el = document.querySelector('.PublishPanel-stepTwoButton')
el.click()
})
} catch (e) {
// do nothing
}
await this.page.waitFor(5000)

// 后续处理
Expand Down

0 comments on commit 44997cb

Please sign in to comment.