Skip to content

Commit

Permalink
Reformat code
Browse files Browse the repository at this point in the history
  • Loading branch information
ruippeixotog committed Aug 9, 2020
1 parent 04c9899 commit e84af73
Show file tree
Hide file tree
Showing 32 changed files with 255 additions and 181 deletions.
2 changes: 2 additions & 0 deletions build.sbt
Expand Up @@ -54,6 +54,8 @@ lazy val commonSettings = Seq(
case _ => Seq.empty[String]
}),

scalafmtOnCompile := true,

fork in Test := true,

tutTargetDirectory := baseDirectory.value,
Expand Down
@@ -1,6 +1,6 @@
package net.ruippeixotog.scalascraper.browser

import java.io.{ File, InputStream }
import java.io.{File, InputStream}

import net.ruippeixotog.scalascraper.model.Document

Expand Down
@@ -1,6 +1,6 @@
package net.ruippeixotog.scalascraper.browser

import java.io.{ File, InputStream }
import java.io.{File, InputStream}
import java.net.URL
import java.nio.charset.Charset
import java.util.UUID
Expand All @@ -12,7 +12,7 @@ import org.apache.http.HttpStatus
import com.gargoylesoftware.htmlunit._
import com.gargoylesoftware.htmlunit.html._
import com.gargoylesoftware.htmlunit.html.parser.neko.HtmlUnitNekoHtmlParser
import com.gargoylesoftware.htmlunit.util.{ NameValuePair, StringUtils }
import com.gargoylesoftware.htmlunit.util.{NameValuePair, StringUtils}

import net.ruippeixotog.scalascraper.browser.HtmlUnitBrowser._
import net.ruippeixotog.scalascraper.model._
Expand Down Expand Up @@ -80,7 +80,9 @@ class HtmlUnitBrowser(browserType: BrowserVersion = BrowserVersion.CHROME, proxy
using(inputStream) { _ =>
val response = new WebResponse(
newWebResponseData(inputStream, charset),
newRequest(WebClient.URL_ABOUT_BLANK, charset = Some(charset)), 0)
newRequest(WebClient.URL_ABOUT_BLANK, charset = Some(charset)),
0
)
val window = newWindow()
new HtmlUnitNekoHtmlParser().parseHtml(response, window)
HtmlUnitDocument(window)
Expand Down Expand Up @@ -123,9 +125,10 @@ class HtmlUnitBrowser(browserType: BrowserVersion = BrowserVersion.CHROME, proxy
req
}

private[this] def newWindow(): WebWindow = underlying.synchronized {
underlying.openTargetWindow(underlying.getCurrentWindow, null, UUID.randomUUID().toString)
}
private[this] def newWindow(): WebWindow =
underlying.synchronized {
underlying.openTargetWindow(underlying.getCurrentWindow, null, UUID.randomUUID().toString)
}
}

object HtmlUnitBrowser {
Expand Down Expand Up @@ -158,8 +161,9 @@ object HtmlUnitBrowser {

def attrs = underlying.getAttributesMap.asScala.mapValues(_.getValue).toMap

def hasAttr(name: String) = underlying.hasAttribute(name) &&
(underlying.getAttribute(name) ne DomElement.ATTRIBUTE_NOT_DEFINED)
def hasAttr(name: String) =
underlying.hasAttribute(name) &&
(underlying.getAttribute(name) ne DomElement.ATTRIBUTE_NOT_DEFINED)

def attr(name: String) = {
val v = underlying.getAttribute(name)
Expand All @@ -168,11 +172,12 @@ object HtmlUnitBrowser {

def text = underlying.getTextContent.trim

def innerHtml = underlying.getChildNodes.iterator.asScala.map {
case node: DomElement => HtmlUnitElement(node).outerHtml
case node: DomText => node.getWholeText
case node => node.asXml.trim
}.mkString
def innerHtml =
underlying.getChildNodes.iterator.asScala.map {
case node: DomElement => HtmlUnitElement(node).outerHtml
case node: DomText => node.getWholeText
case node => node.asXml.trim
}.mkString

def outerHtml = {
val a = attrs.map { case (k, v) => s"""$k="${StringUtils.escapeXmlAttributeValue(v)}"""" }
Expand All @@ -188,11 +193,12 @@ object HtmlUnitBrowser {
}

object HtmlUnitNode {
def apply(underlying: DomNode): Option[Node] = underlying match {
case elem: DomElement => Some(ElementNode(HtmlUnitElement(elem)))
case textNode: DomText => Some(TextNode(textNode.getWholeText))
case _ => None
}
def apply(underlying: DomNode): Option[Node] =
underlying match {
case elem: DomElement => Some(ElementNode(HtmlUnitElement(elem)))
case textNode: DomText => Some(TextNode(textNode.getWholeText))
case _ => None
}
}

case class HtmlUnitDocument(window: WebWindow) extends Document {
Expand All @@ -216,10 +222,11 @@ object HtmlUnitBrowser {

def root = HtmlUnitElement(underlying.getDocumentElement)

override def title = underlying match {
case page: HtmlPage => page.getTitleText
case _ => ""
}
override def title =
underlying match {
case page: HtmlPage => page.getTitleText
case _ => ""
}

def toHtml = root.outerHtml
}
Expand Down
@@ -1,13 +1,13 @@
package net.ruippeixotog.scalascraper.browser

import java.io.{ File, InputStream }
import java.io.{File, InputStream}

import scala.collection.JavaConverters._
import scala.collection.mutable

import org.jsoup.Connection.Method._
import org.jsoup.Connection.Response
import org.jsoup.{ Connection, Jsoup }
import org.jsoup.{Connection, Jsoup}

import net.ruippeixotog.scalascraper.browser.JsoupBrowser._
import net.ruippeixotog.scalascraper.model._
Expand Down Expand Up @@ -62,12 +62,13 @@ class JsoupBrowser(val userAgent: String = "jsoup/1.8", val proxy: java.net.Prox
def requestSettings(conn: Connection): Connection = conn

protected[this] def defaultRequestSettings(conn: Connection): Connection =
conn.cookies(cookieMap.asJava).
userAgent(userAgent).
header("Accept", "text/html,application/xhtml+xml,application/xml").
header("Accept-Charset", "utf-8").
timeout(15000).
maxBodySize(0)
conn
.cookies(cookieMap.asJava)
.userAgent(userAgent)
.header("Accept", "text/html,application/xhtml+xml,application/xml")
.header("Accept-Charset", "utf-8")
.timeout(15000)
.maxBodySize(0)

protected[this] def executeRequest(conn: Connection): Response =
conn.execute()
Expand Down Expand Up @@ -127,11 +128,12 @@ object JsoupBrowser {
}

object JsoupNode {
def apply(underlying: org.jsoup.nodes.Node): Option[Node] = underlying match {
case elem: org.jsoup.nodes.Element => Some(ElementNode(JsoupElement(elem)))
case textNode: org.jsoup.nodes.TextNode => Some(TextNode(textNode.text))
case _ => None
}
def apply(underlying: org.jsoup.nodes.Node): Option[Node] =
underlying match {
case elem: org.jsoup.nodes.Element => Some(ElementNode(JsoupElement(elem)))
case textNode: org.jsoup.nodes.TextNode => Some(TextNode(textNode.text))
case _ => None
}
}

case class JsoupDocument(underlying: org.jsoup.nodes.Document) extends Document {
Expand Down
@@ -1,6 +1,6 @@
package net.ruippeixotog.scalascraper.dsl

import net.ruippeixotog.scalascraper.model.{ Element, ElementQuery }
import net.ruippeixotog.scalascraper.model.{Element, ElementQuery}
import net.ruippeixotog.scalascraper.scraper._

object DSL extends ImplicitConversions with ScrapingOps {
Expand All @@ -12,7 +12,10 @@ object DSL extends ImplicitConversions with ScrapingOps {
contentExtractor.mapQuery(cssQuery)

def extractor[E <: Element, C, A](
cssQuery: String, contentExtractor: HtmlExtractor[E, C], contentParser: C => A): HtmlExtractor[E, A] = {
cssQuery: String,
contentExtractor: HtmlExtractor[E, C],
contentParser: C => A
): HtmlExtractor[E, A] = {

contentExtractor.mapQuery(cssQuery).map(contentParser)
}
Expand Down
Expand Up @@ -2,7 +2,7 @@ package net.ruippeixotog.scalascraper.dsl

import net.ruippeixotog.scalascraper.model._
import net.ruippeixotog.scalascraper.scraper.ContentExtractors._
import net.ruippeixotog.scalascraper.scraper.{ HtmlExtractor, PolyHtmlExtractor }
import net.ruippeixotog.scalascraper.scraper.{HtmlExtractor, PolyHtmlExtractor}

trait ImplicitConversions {

Expand Down
@@ -1,7 +1,7 @@
package net.ruippeixotog.scalascraper.dsl

import net.ruippeixotog.scalascraper.model.Element
import net.ruippeixotog.scalascraper.scraper.{ HtmlExtractor, HtmlValidator }
import net.ruippeixotog.scalascraper.scraper.{HtmlExtractor, HtmlValidator}
import net.ruippeixotog.scalascraper.util._
import scala.util.Try
import scalaz._
Expand Down Expand Up @@ -34,7 +34,11 @@ trait ScrapingOps extends syntax.ToIdOps with ToFunctorOps with std.AllInstances
def >?>[B, C](extractor1: HtmlExtractor[E, B], extractor2: HtmlExtractor[E, C]) =
self.map { doc => (Try(extractor1.extract(doc)).toOption, Try(extractor2.extract(doc)).toOption) }

def >?>[B, C, D](extractor1: HtmlExtractor[E, B], extractor2: HtmlExtractor[E, C], extractor3: HtmlExtractor[E, D]) =
def >?>[B, C, D](
extractor1: HtmlExtractor[E, B],
extractor2: HtmlExtractor[E, C],
extractor3: HtmlExtractor[E, D]
) =
self.map { doc =>
val e1 = Try(extractor1.extract(doc)).toOption
val e2 = Try(extractor2.extract(doc)).toOption
Expand All @@ -57,15 +61,19 @@ trait ScrapingOps extends syntax.ToIdOps with ToFunctorOps with std.AllInstances
}

def validateWith[R](
success: HtmlValidator[E, _],
errors: Seq[HtmlValidator[E, R]],
default: => R = throw new ValidationException): F[Either[R, A]] = {
success: HtmlValidator[E, _],
errors: Seq[HtmlValidator[E, R]],
default: => R = throw new ValidationException
): F[Either[R, A]] = {

self.map { doc =>
if (success.matches(doc)) Right(doc)
else errors.foldLeft[Either[R, A]](Right(doc)) { (res, error) =>
if (res.isLeft || !error.matches(doc)) res else Left(error.result.get)
}.fold(Left.apply, _ => Left(default))
else
errors
.foldLeft[Either[R, A]](Right(doc)) { (res, error) =>
if (res.isLeft || !error.matches(doc)) res else Left(error.result.get)
}
.fold(Left.apply, _ => Left(default))
}
}

Expand Down Expand Up @@ -106,7 +114,9 @@ trait ScrapingOps extends syntax.ToIdOps with ToFunctorOps with std.AllInstances
@inline final def and = self
}

implicit def deepFunctorOps[FA, A, E <: Element](self: FA)(implicit df: DeepFunctor.AuxA[FA, A], conv: ToQuery.Aux[A, E]) =
implicit def deepFunctorOps[FA, A, E <: Element](
self: FA
)(implicit df: DeepFunctor.AuxA[FA, A], conv: ToQuery.Aux[A, E]) =
new ElementsScrapingOps[df.F, A, E](df.asF(self))(df.f, conv)
}

Expand Down
49 changes: 27 additions & 22 deletions core/src/main/scala/net/ruippeixotog/scalascraper/dsl/ToQuery.scala
Expand Up @@ -29,31 +29,36 @@ object ToQuery extends LowerPriorityToQuery {

def apply[A](implicit toQuery: ToQuery[A]): Aux[A, toQuery.Out] = toQuery

implicit def queryToQuery[E <: Element] = new ToQuery[ElementQuery[E]] {
type Out = E
def apply(query: ElementQuery[E]) = query
}

implicit def typedElemToQuery[E <: Element.Strict[E]] = new ToQuery[E] {
type Out = E
def apply(elem: E) = ElementQuery(elem)
}

implicit def typedDocToQuery[D <: Document, E <: Element.Strict[E]](implicit ev: D <:< Document.Typed[E]) = new ToQuery[D] {
type Out = E
def apply(doc: D) = ElementQuery(ev(doc).root)
}
implicit def queryToQuery[E <: Element] =
new ToQuery[ElementQuery[E]] {
type Out = E
def apply(query: ElementQuery[E]) = query
}

implicit def typedElemToQuery[E <: Element.Strict[E]] =
new ToQuery[E] {
type Out = E
def apply(elem: E) = ElementQuery(elem)
}

implicit def typedDocToQuery[D <: Document, E <: Element.Strict[E]](implicit ev: D <:< Document.Typed[E]) =
new ToQuery[D] {
type Out = E
def apply(doc: D) = ElementQuery(ev(doc).root)
}
}

trait LowerPriorityToQuery {

implicit def elemToQuery[E <: Element] = new ToQuery[E] {
type Out = Element
def apply(elem: E) = ElementQuery[Element](elem)
}
implicit def elemToQuery[E <: Element] =
new ToQuery[E] {
type Out = Element
def apply(elem: E) = ElementQuery[Element](elem)
}

implicit def docToQuery[D <: Document] = new ToQuery[D] {
type Out = Element
def apply(doc: D) = ElementQuery[Element](doc.root)
}
implicit def docToQuery[D <: Document] =
new ToQuery[D] {
type Out = Element
def apply(doc: D) = ElementQuery[Element](doc.root)
}
}
Expand Up @@ -18,19 +18,19 @@ trait ElementQuery[+E <: Element] extends Iterable[E] {
def select(query: String): ElementQuery[E]
}

private[model] class RootElementQuery[E <: Element](
private val target: E,
exec: String => Iterator[E]) extends ElementQuery[E] {
private[model] class RootElementQuery[E <: Element](private val target: E, exec: String => Iterator[E])
extends ElementQuery[E] {

def iterator = Iterator(target)

def select(query: String): ElementQuery[E] =
new LazyElementQuery(query.split(","), target, exec)

override def equals(obj: Any) = obj match {
case q: ElementQuery[_] => iterator.sameElements(q.iterator)
case _ => false
}
override def equals(obj: Any) =
obj match {
case q: ElementQuery[_] => iterator.sameElements(q.iterator)
case _ => false
}

override def hashCode() = iterator.toSeq.hashCode()

Expand All @@ -40,7 +40,8 @@ private[model] class RootElementQuery[E <: Element](
private[model] class LazyElementQuery[E <: Element](
private val queries: Seq[String],
private val target: E,
exec: String => Iterator[E]) extends ElementQuery[E] {
exec: String => Iterator[E]
) extends ElementQuery[E] {

def iterator = exec(queries.mkString(","))

Expand All @@ -49,10 +50,11 @@ private[model] class LazyElementQuery[E <: Element](
new LazyElementQuery(newQueries, target, exec)
}

override def equals(obj: Any) = obj match {
case q: ElementQuery[_] => iterator.sameElements(q.iterator)
case _ => false
}
override def equals(obj: Any) =
obj match {
case q: ElementQuery[_] => iterator.sameElements(q.iterator)
case _ => false
}

override def hashCode() = iterator.toSeq.hashCode()

Expand Down

0 comments on commit e84af73

Please sign in to comment.