# Scala Syntax Sheet

### Declare Variables

In [3]:
import scala.collection.mutable.ArrayBuffer

val c: Seq[Int] = Seq(1, 2, 3, 3, 6, 9, 0)
println(c.size)

// Use Vector as default immutable datatype
val d = Vector("a", "b", "c", "c")
println(c.count(_==3))

// Use ArrayBuffer as default mutable datatype
var e = ArrayBuffer("a", "b", "c")

// Groupby and count items in collection
println(c.groupBy(identity).mapValues(_.size))
println(d.groupBy(identity).mapValues(_.size))

7
2
Map(0 -> 1, 1 -> 1, 6 -> 1, 9 -> 1, 2 -> 1, 3 -> 2)
Map(b -> 1, a -> 1, c -> 2)


import scala.collection.mutable.ArrayBuffer
c: Seq[Int] = List(1, 2, 3, 3, 6, 9, 0)
d: scala.collection.immutable.Vector[String] = Vector(a, b, c, c)
e: scala.collection.mutable.ArrayBuffer[String] = ArrayBuffer(a, b, c)


### Functions

In [7]:
// Quick
def suma(a: Int) = a + 3
suma(8)

// Explicit
def sayHello(name: String):String = {
    s"Hello $name!"
}
sayHello("Kevin")

// Scala allows multiple parameter lists: Useful when last parameter is an implicit or another function
def sayHello1(name: String)(whoAreYou : () => String) = {
    s"Hello $name! My name is ${whoAreYou()}"
}
def provideName() = {"Scala"}
val f = sayHello1("test")(provideName)

// Anonymous Functions
val g = sayHello1("test"){() => "Anonymous"}

// implicit declarations
def sayHello2(name:String)(implicit myself: String) = {
    s"Hello $name! My name is $myself"
}
implicit val myString = "implicits" // automatically knows to populate implicit myself parameter
val h = sayHello2("test")

suma: (a: Int)Int
sayHello: (name: String)String
sayHello1: (name: String)(whoAreYou: () => String)String
provideName: ()String
f: String = Hello test! My name is Scala
g: String = Hello test! My name is Anonymous
sayHello2: (name: String)(implicit myself: String)String
myString: String = implicits
h: String = Hello test! My name is implicits


### Classes

In [30]:
class fastTrack(val name: String, var myself: String){
    
    // No explicit constructor needed
    // val/var determine if getter/setters are made automatically
    
    def sayHi()= {s"Hello $name! My name is $myself"}
    
    def sayHello1(name1:String, myself1:String)={
        s"Hello $name1! My name is $myself1"
    }
    
    def greet(name1:String)(myself1:String)={
        s"Hello $name! My name is $myself"
    }
    
    val greeting = greet(name)(myself)
}

val fast = new fastTrack("test", "me")
// Only has getter since declared as val
println(fast.name)
// Has setter since declared as var
println(fast.myself)
fast.myself = "fast"
println(fast.myself)

println(fast.sayHi())
println(fast.sayHello1("Kevin","Thomas"))
println(fast.greeting)

test
me
fast
Hello test! My name is fast
Hello Kevin! My name is Thomas
Hello test! My name is me


defined class fastTrack
fast: fastTrack = fastTrack@53a313c1
fast.myself: String = fast


### Case Classes
* Don't need 'new' keyword
* Supports pattern matching
* All arguments prefixed with val by default

In [32]:
case class person(fname:String, lname:String){
    val myname = s"My name is $fname $lname"
}

val me = person("Kevin", "Kurek")
println(me.myname)

My name is Kevin Kurek


defined class person
me: person = person(Kevin,Kurek)


### Pattern Matching with Case Classes

In [37]:
// abstract class: requires subclasses to have same definitions
abstract class Person(fname: String, lname: String){
    def fullName = {s"$fname-$lname"}
}

case class Student(fname: String, lname: String, id: Int) 
        extends Person(fname, lname) // abstract Person class is parent of Student; child of Person

case class Worker(fname: String, lname: String, id: Int)
        extends Person(fname, lname) // child of Person

val me = Student("Kevin", "Kurek", 28)
val them = Worker("Other", "Person", 30)

// template variable must be of type Person. "<:" means T must be person or derivative of Person
def getFullID[T <: Person](something: T) = {
    something match {
        case Student(fname, lname, id) => s"$fname-$lname-$id"
        case p: Person => p.fullName
    }
}

println(getFullID(me)) // matches on Student type thus: fname-lname-id
println(getFullID(them)) // matches on Person since Worker is child thus: p.fullName = fname-lname

Kevin-Kurek-28
Other-Person


defined class Person
defined class Student
defined class Worker
me: Student = Student(Kevin,Kurek,28)
them: Worker = Worker(Other,Person,30)
getFullID: [T <: Person](something: T)String


### Implicit Classes

In [40]:
implicit class stringUtils(myString: String){
    def scalaWordCount() = {
        val split = myString.split("\\s+") //split on whitespace
        val grouped = split.groupBy(word => word) //alternative syntax: .groupBy(identity)
        val countPerKey = grouped.mapValues(group => group.length) //alternative syntax: .mapValues(_.size)
        countPerKey
    }
}

// Can automatically use class method without doing stringUtils.scalaWordCount()
"Spark collections mimic Scala collections".scalaWordCount()

Map(collections -> [Ljava.lang.String;@7b735980, Spark -> [Ljava.lang.String;@53da7e7b, Scala -> [Ljava.lang.String;@2e0d3430, mimic -> [Ljava.lang.String;@6def9066)


defined class stringUtils
res33: scala.collection.immutable.Map[String,Int] = Map(collections -> 2, Spark -> 1, Scala -> 1, mimic -> 1)


### Looping

In [1]:
val longWords = new StringBuilder

"Hello world it's Al".split(" ").foreach{e => 
                if (e.length>4) longWords.append(s" $e")
                else println("Not appended: " + e)
            }

Not appended: it's
Not appended: Al


longWords: StringBuilder =  Hello world


In [11]:
import scala.collection.mutable.ArrayBuffer

var mybuffer = ArrayBuffer.empty[Int]
val otherlist = List(1, 2, 3, 4)

// append value from one collection to another
otherlist.foreach(e => if(e > 2) mybuffer += e)

import scala.collection.mutable.ArrayBuffer
mybuffer: scala.collection.mutable.ArrayBuffer[Int] = ArrayBuffer(3, 4)
otherlist: List[Int] = List(1, 2, 3, 4)


In [16]:
val m = Map("fname"->"Kevin", "lname"->"Kurek", "mname"->"Thomas")

// loop key-value pairs
m.foreach{kv => if(kv._1.contains("fname")) println(kv._2)
                        else if (kv._1.contains("lname")) println(kv._2)
                        else println("Neither Condition")
         }


// equivalent syntax to be more explicit about key-value names
m foreach {
    case(key, value) => if(key.contains("fname")) println(value)
                        else if (key.contains("lname")) println(value)
                        else println("Neither Condition")
}

Kevin
Kurek
Neither Condition
Kevin
Kurek
Neither Condition


m: scala.collection.immutable.Map[String,String] = Map(fname -> Kevin, lname -> Kurek, mname -> Thomas)


In [17]:
// print index and element
val fruits = Array("apple", "banana", "orange")
for ((elem, count) <- fruits.zipWithIndex) { println(s"element $count is $elem") }

element 0 is apple
element 1 is banana
element 2 is orange


fruits: Array[String] = Array(apple, banana, orange)


### Functional Programming

In [45]:
object math_stuff{
    val double = (i:Int) => i * 2
    val triple = (i:Int) => i * 3
}

val x = 1 to 10
val double_list = x.map(math_stuff.double)
val triple_list = x.map(math_stuff.triple)

defined object math_stuff
x: scala.collection.immutable.Range.Inclusive = Range 1 to 10
double_list: scala.collection.immutable.IndexedSeq[Int] = Vector(2, 4, 6, 8, 10, 12, 14, 16, 18, 20)
triple_list: scala.collection.immutable.IndexedSeq[Int] = Vector(3, 6, 9, 12, 15, 18, 21, 24, 27, 30)


In [47]:
val myList = List("Spark", "mimics", "Scala", "collections")

// Same results, different syntax
val full_mapped = myList.map(s => s.toUpperCase)
val short_mapped = myList.map(_.toUpperCase)

myList: List[String] = List(Spark, mimics, Scala, collections)
full_mapped: List[String] = List(SPARK, MIMICS, SCALA, COLLECTIONS)
short_mapped: List[String] = List(SPARK, MIMICS, SCALA, COLLECTIONS)


In [50]:
val myList = List("Spark", "mimics", "Scala", "collections")

val filters = List("mimics", "collections")
val flatMapped = myList.flatMap{s=> 
                               if (filters.contains(s))
                                   None
                               else
                                   Some(s)
                                }

myList: List[String] = List(Spark, mimics, Scala, collections)
filters: List[String] = List(mimics, collections)
flatMapped: List[String] = List(Spark, Scala)


In [None]:
val final_sum = double_list.foldLeft(0.0)(_+_)
println(final_sum)

In [83]:
// Creates Numeric alphabet
// val alphabet = 'a' to 'z'
val alphabet = ('a' to 'z')
val alpha = List('a','b','c')
println(alphabet)
println(alpha)

NumericRange a to z
List(a, b, c)


alphabet: scala.collection.immutable.NumericRange.Inclusive[Char] = NumericRange a to z
alpha: List[Char] = List(a, b, c)


In [84]:
// def foldLeft[B](z: B)(op: (B, A) ⇒ B): B
val donuts: Seq[String] = Seq("Plain", "Strawberry", "Glazed")
val donut_words = donuts.foldLeft("")((a, b) => a + b + " Donut ")

// val donut_words = donuts.foldLeft("")((a, b) => b + " Donut ") // Glazed Donut (last entry)
// println(s"All donuts = ${donuts.foldLeft("")((a, b) => a + b + " Donut ")}")

donuts: Seq[String] = List(Plain, Strawberry, Glazed)
donut_words: String = "Plain Donut Strawberry Donut Glazed Donut "


In [85]:
val r = alphabet.reverse.toList
val smash = r.foldRight("")(_+_)
println(smash)

zyxwvutsrqponmlkjihgfedcba


r: List[Char] = List(z, y, x, w, v, u, t, s, r, q, p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a)
smash: String = zyxwvutsrqponmlkjihgfedcba


In [17]:
// push collections together
val chars = ('a' to 'z') ++ ('A' to 'Z')

chars: scala.collection.immutable.IndexedSeq[Char] = Vector(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z)


### Basic Spark

In [81]:
val items = (0 to 100).map(i => (s"name-$i", i))

// create RDD
val itemsRDD = sc.parallelize(items) 

// create dataframe
val itemsDF = itemsRDD.toDF()
itemsDF.show(5)

+------+---+
|    _1| _2|
+------+---+
|name-0|  0|
|name-1|  1|
|name-2|  2|
|name-3|  3|
|name-4|  4|
+------+---+
only showing top 5 rows



items: scala.collection.immutable.IndexedSeq[(String, Int)] = Vector((name-0,0), (name-1,1), (name-2,2), (name-3,3), (name-4,4), (name-5,5), (name-6,6), (name-7,7), (name-8,8), (name-9,9), (name-10,10), (name-11,11), (name-12,12), (name-13,13), (name-14,14), (name-15,15), (name-16,16), (name-17,17), (name-18,18), (name-19,19), (name-20,20), (name-21,21), (name-22,22), (name-23,23), (name-24,24), (name-25,25), (name-26,26), (name-27,27), (name-28,28), (name-29,29), (name-30,30), (name-31,31), (name-32,32), (name-33,33), (name-34,34), (name-35,35), (name-36,36), (name-37,37), (name-38,38), (name-39,39), (name-40,40), (name-41,41), (name-42,42), (name-43,43), (name-44,44), (name-45,45), (name-46,46), (name-47,47), (name-48,48), (name-49,49), (name-50,50), (name-51,51), (name-52,52), (name-...


In [74]:
itemsDF.registerTempTable("items")

In [79]:
val sql_df = spark.sql("""SELECT * 
                        FROM items 
                        WHERE _2 < 10""")
sql_df.show()

+------+---+
|    _1| _2|
+------+---+
|name-0|  0|
|name-1|  1|
|name-2|  2|
|name-3|  3|
|name-4|  4|
|name-5|  5|
|name-6|  6|
|name-7|  7|
|name-8|  8|
|name-9|  9|
+------+---+



sql_df: org.apache.spark.sql.DataFrame = [_1: string, _2: int]
