This repository has been archived by the owner on Oct 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit ca86033
Showing
7 changed files
with
202 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
|
||
/target | ||
/nbactions.xml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>net.srirangan</groupId> | ||
<artifactId>simplewebcrawler</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
<description>A simple web crawler desgined to showcase scalability with Scala and GridGain</description> | ||
|
||
<name>${project.artifactId}</name> | ||
|
||
<properties> | ||
<scala.version>2.8.0</scala.version> | ||
</properties> | ||
|
||
<repositories> | ||
<repository> | ||
<id>official-repo</id> | ||
<name>The official maven repo</name> | ||
<url>http://repo1.maven.org/maven2/</url> | ||
</repository> | ||
<repository> | ||
<id>scala-tools.org</id> | ||
<name>Scala-Tools Maven2 Repository</name> | ||
<url>http://scala-tools.org/repo-releases</url> | ||
</repository> | ||
<repository> | ||
<id>gridgain</id> | ||
<url>http://www.gridgainsystems.com/maven2/</url> | ||
</repository> | ||
<repository> | ||
<id>glassfish</id> | ||
<url>http://download.java.net/maven/glassfish/</url> | ||
</repository> | ||
</repositories> | ||
|
||
<pluginRepositories> | ||
<pluginRepository> | ||
<id>scala-tools.org</id> | ||
<name>Scala-Tools Maven2 Repository</name> | ||
<url>http://scala-tools.org/repo-releases</url> | ||
</pluginRepository> | ||
</pluginRepositories> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>org.scala-lang</groupId> | ||
<artifactId>scala-library</artifactId> | ||
<version>${scala.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.gridgain</groupId> | ||
<artifactId>gridgain</artifactId> | ||
<version>3.0.5c</version> | ||
</dependency> | ||
</dependencies> | ||
|
||
<build> | ||
<sourceDirectory>src/main/scala</sourceDirectory> | ||
<testSourceDirectory>src/test/scala</testSourceDirectory> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.scala-tools</groupId> | ||
<artifactId>maven-scala-plugin</artifactId> | ||
<version>2.15.0</version> | ||
<executions> | ||
<execution> | ||
<goals> | ||
<goal>compile</goal> | ||
<goal>testCompile</goal> | ||
</goals> | ||
<configuration> | ||
<args> | ||
<arg>-make:transitive</arg> | ||
<arg>-dependencyfile</arg> | ||
<arg>${project.build.directory}/.scala_dependencies</arg> | ||
</args> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-surefire-plugin</artifactId> | ||
<version>2.6</version> | ||
<configuration> | ||
<useFile>false</useFile> | ||
<disableXmlReport>true</disableXmlReport> | ||
<includes> | ||
<include>**/*Test.*</include> | ||
<include>**/*Suite.*</include> | ||
</includes> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
</project> |
18 changes: 18 additions & 0 deletions
18
src/main/scala/net/srirangan/simplewebcrawler/app/Main.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package net.srirangan.simplewebcrawler.app | ||
|
||
import java.lang.String | ||
import org.gridgain.grid._ | ||
import org.gridgain.scalar.scalar | ||
import org.gridgain.scalar.scalar._ | ||
import net.srirangan.simplewebcrawler.tasks.LoadUrlTask | ||
|
||
object Main { | ||
|
||
def main(args: Array[String]):Unit = { | ||
scalar { | ||
val baseUrl:String = "http://base.url.to.begin.crawling?level=1" | ||
grid.execute(classOf[LoadUrlTask], baseUrl).get | ||
} | ||
} | ||
|
||
} |
12 changes: 12 additions & 0 deletions
12
src/main/scala/net/srirangan/simplewebcrawler/jobs/IndexKeywordsJob.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package net.srirangan.simplewebcrawler.jobs | ||
|
||
import java.lang.String | ||
import org.gridgain.grid.GridJobAdapterEx | ||
import org.gridgain.scalar.scalar._ | ||
|
||
class IndexKeywordsJob(data:String) extends GridJobAdapterEx { | ||
def execute():Object = { | ||
println(data) | ||
null | ||
} | ||
} |
37 changes: 37 additions & 0 deletions
37
src/main/scala/net/srirangan/simplewebcrawler/jobs/LoadUrlJob.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package net.srirangan.simplewebcrawler.jobs | ||
|
||
import java.lang.String | ||
import java.util.{List,ArrayList} | ||
import org.gridgain.grid.GridJobAdapterEx | ||
import org.gridgain.scalar.scalar._ | ||
import net.srirangan.simplewebcrawler.tasks.{LoadUrlTask,IndexKeywordsTask} | ||
|
||
class LoadUrlJob(url:String) extends GridJobAdapterEx { | ||
def execute():Object = { | ||
println("load url for - " + url) | ||
|
||
val data:String = "this is data for " + url | ||
val urls:List[String] = new ArrayList[String]() | ||
|
||
// | ||
// .. actual parser logic comes here | ||
// .. data:String will contain the contents of url:String | ||
// .. urls:List is a list of all new URLs found in data:String | ||
// | ||
|
||
// Start indexing keywords for data:String from url:String | ||
grid.execute(classOf[IndexKeywordsTask], data).get | ||
|
||
// adding dummy url in urls:List | ||
urls.add(url + ".1") | ||
|
||
// start load url for urls:List | ||
while( urls.iterator.hasNext() ) { | ||
val url:String = urls.iterator.next() | ||
grid.execute(classOf[LoadUrlTask], url).get | ||
} | ||
|
||
|
||
data | ||
} | ||
} |
18 changes: 18 additions & 0 deletions
18
src/main/scala/net/srirangan/simplewebcrawler/tasks/IndexKeywordsTask.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package net.srirangan.simplewebcrawler.tasks | ||
|
||
import java.lang.String | ||
import java.util.{List,ArrayList} | ||
import org.gridgain.grid.GridJob | ||
import org.gridgain.grid.GridTaskNoReduceSplitAdapter | ||
import net.srirangan.simplewebcrawler.jobs.IndexKeywordsJob | ||
|
||
class IndexKeywordsTask extends GridTaskNoReduceSplitAdapter[String] { | ||
|
||
protected def split( gridSize:Int, url:String):List[GridJob] = { | ||
val jobs:List[GridJob] = new ArrayList[GridJob]() | ||
val job:GridJob = new IndexKeywordsJob(url) | ||
jobs.add(job) | ||
jobs | ||
} | ||
|
||
} |
17 changes: 17 additions & 0 deletions
17
src/main/scala/net/srirangan/simplewebcrawler/tasks/LoadUrlTask.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package net.srirangan.simplewebcrawler.tasks | ||
|
||
import java.lang.String | ||
import java.util.{List,ArrayList} | ||
import org.gridgain.grid._ | ||
import net.srirangan.simplewebcrawler.jobs.LoadUrlJob | ||
|
||
class LoadUrlTask extends GridTaskNoReduceSplitAdapter[String] { | ||
|
||
def split(gridSize:Int, url:String):List[GridJob] = { | ||
val jobs:List[GridJob] = new ArrayList[GridJob]() | ||
val job:GridJob = new LoadUrlJob(url) | ||
jobs.add(job) | ||
jobs | ||
} | ||
|
||
} |