Slushy a web crawler written in Scala

Get started

Clone and publish the project locally

git clone git@github.com:knshiro/slushy.git
sbt publish-local

Add dependency to build.sbt

libraryDependencies ++= "me.ugo" %% "slushy" % "0.0.1-SNAPSHOT"

Use it!

import me.ugo.slushy.crawler._
import com.netaporter.uri.dsl._

val crawler = Crawler("http://employment.en-japan.com/", 10) { page =>
  println("Parsed: " + page.url)
}

Await.ready(crawler.crawl(), Duration.Inf)

Go further

import me.ugo.slushy.crawler._
import me.ugo.slushy.scraper._
import com.netaporter.uri.dsl._
import scala.concurrent.Await
import scala.concurrent.duration._
import scala.concurrent.ExecutionContext.Implicits._

val crawler = new Crawler with UrlStorageBloomFilter with SitemapCrawler with EmailCrawler {

  override def startUrls: Seq[Uri] = List("http://employment.en-japan.com/")
  override val scraper: Scraper = new ThrottledHttpScraper(Frequency(3,100.millis))

  onStart {
    println("Crawling started! ")
  }

  onLinkFound { link =>
    println("Found link: " + link)
  }
  
  onReceivedPage { page =>
    println("Received page: " + page.url)
    println(page.doc.body().text)
  }

  onEmailFound { email =>
    println("Found email:" + email)
  }

  onReceivedPage { page =>
    println("I'm not just going to print the body of this page")
    println("The title is: " + page.doc.title())
  }

}
Await.ready(crawler.crawl(), Duration.Inf)
}

Name		Name	Last commit message	Last commit date
Latest commit History 9 Commits
project		project
src		src
.gitignore		.gitignore
README.md		README.md
build.sbt		build.sbt

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

Slushy a web crawler written in Scala

Get started

Go further

About

Releases

Packages

Languages

knshiro/slushy

Folders and files

Latest commit

History

Repository files navigation

Slushy a web crawler written in Scala

Get started

Go further

About

Resources

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages