A micro crawler framework. achieved by GOLANG.
go get github.com/telanflow/scrago
package main
import (
"net/http"
"net/http/cookiejar"
"github.com/telanflow/scrago"
"github.com/telanflow/scrago/pages"
"github.com/telanflow/scrago/downloader"
)
type MySpider struct{
jar http.CookieJar
}
// Init
func (m *MySpider) Init(ctx *teler.Context) {
// Set the persistent cookie.
m.jar, _ = cookiejar.New(nil)
ctx.GetDownloader().UseOptions(downloader.WithCookieJar(m.jar))
// Add Target Url
//ctx.AddUrl("https://www.baidu.com")
}
// Page Process
func (m *MySpider) Process(ctx *teler.Context, page *pages.Page) {
}
// Pipeline Output
func (m *MySpider) Output(items *pages.PageItem) {
}
func main() {
// Start Spider
scrago.New(&MySpider{}).AddUrl("https://www.baidu.com").Run()
}
go build my_spider.go
./my_spider
teler licensed under the Apache Licence, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0.html).