# Apache Spark with Scala

More details and magic commands here: https://github.com/apache/incubator-toree/blob/master/etc/examples/notebooks/magic-tutorial.ipynb


# RDD

In [3]:
val rdd = sc.parallelize(Seq(
     |                ("math",    55),
     |                ("math",    56),
     |                ("english", 57),
     |                ("english", 58),
     |                ("science", 59),
     |                ("science", 54)))
rdd

ParallelCollectionRDD[2] at parallelize at <console>:19

In [1]:
rdd.collect()

Array((math,55), (math,56), (english,57), (english,58), (science,59), (science,54))

## sortByKey - Ascending order

In [5]:
val sorted1 = rdd.sortByKey()
sorted1.collect()

Array((english,57), (english,58), (math,55), (math,56), (science,59), (science,54))

## sortByKey - Descending order

In [7]:
val sorted2 = rdd.sortByKey(false)
sorted2.collect()

Array((science,59), (science,54), (math,55), (math,56), (english,57), (english,58))

## sortByKey - Custom order

Let us define an implicit sorting for the method sortByKey()

We have used '{' above to limit the scope of the implicit ordering

In [8]:
implicit val sortIntegersByString = new Ordering[String] {
     |       override def compare(a: String, b: String) = {
     |          val result = a.compare(b)
     |          //We use -ve to sort the key in descending order
     |          -result
     |       }
     |    }
     |    val sorted2 = rdd.sortByKey()
     |
     |    //Result
     |    sorted2.collect()
     | }

Name: Compile Error
Message: <console>:1: error: ';' expected but 'implicit' found.
{    |    implicit val sortIntegersByString = new Ordering[String] {
          ^
StackTrace: 

## Now let's run the object ...

In [5]:
val files = Array("./resources/data/input1.txt", "./resources/data/input2.txt")

val myAnaliser = new Analiser

myAnaliser.main(files)

TEXT1
said=456
alice=377
that=234
with=172
very=139
TEXT2
vibrating=1
young=10
stumbled=8
intimately=1
someone=1
COMMON
little
said
that
they
this
with
Time elapsed: 8 seconds


## Desired Output:  
TEXT1  
said=456  
alice=377  
that=234  
with=172  
very=139  
TEXT2  
that=759  
with=448  
were=365  
from=326  
they=302  
COMMON  
little  
said  
that  
they  
this  
with  
