# Spark in Action - Chapter 2 Scala Version

In [None]:
import java.util.Properties

import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.sql.functions.{concat, lit, col}

Intitializing Scala interpreter ...

Spark Web UI available at http://192.168.1.41:4040
SparkContext available as 'sc' (version = 3.3.0, master = local[*], app id = local-1667056703538)
SparkSession available as 'spark'


import java.util.Properties
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.sql.functions.{concat, lit, col}


In [None]:
// Creates a session on a local master
val spark = SparkSession.builder
    .appName("CSV to DB")
    .master("local[*]")
    .config("spark.jars","{}/jars/sqlite-jdbc-3.36.0.3.jar")
    .config("spark.driver.extraClassPath","{}/jars/sqlite-jdbc-3.36.0.3.jar")
    .getOrCreate

22/10/29 17:18:31 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


spark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@59ce43a4


In [None]:
// Step 1: Ingestion
// ---------
// Reads a CSV file with header, called authors.csv, stores it in a
// dataframe
var df = spark.read.format("csv")
    .option("header", "true")
    .load("../net.jgp.books.spark.ch02/data/authors.csv")

df: org.apache.spark.sql.DataFrame = [lname: string, fname: string]


In [None]:
df.show()

+--------+--------------+
|   lname|         fname|
+--------+--------------+
|  Pascal|        Blaise|
|Voltaire|      François|
|  Perrin|  Jean-Georges|
|Maréchal|Pierre Sylvain|
|   Karau|        Holden|
| Zaharia|         Matei|
+--------+--------------+



In [None]:
// Step 2: Transform
// ---------
// Creates a new column called "name" as the concatenation of lname, a
// virtual column containing ", " and the fname column
df = df.withColumn("name", concat(col("lname"), lit(", "), col("fname")))

df: org.apache.spark.sql.DataFrame = [lname: string, fname: string ... 1 more field]


In [None]:
df.show()

+--------+--------------+--------------------+
|   lname|         fname|                name|
+--------+--------------+--------------------+
|  Pascal|        Blaise|      Pascal, Blaise|
|Voltaire|      François|  Voltaire, François|
|  Perrin|  Jean-Georges|Perrin, Jean-Georges|
|Maréchal|Pierre Sylvain|Maréchal, Pierre ...|
|   Karau|        Holden|       Karau, Holden|
| Zaharia|         Matei|      Zaharia, Matei|
+--------+--------------+--------------------+



In [None]:
// Step 3: Save
// ----
// The connection URL, assuming your PostgreSQL instance runs locally on
// the
// default port, and the database we use is "spark_labs"
val dbConnectionUrl = "jdbc:sqlite:/Users/development/ml/Spark/net.jgp.books.spark.ch02/data/spark_labs_scala.db"

dbConnectionUrl: String = jdbc:sqlite:/Users/development/ml/Spark/net.jgp.books.spark.ch02/data/spark_labs_scala.db


In [None]:
// Properties to connect to the database, the JDBC driver is part of our
// pom.xml
val prop = new Properties
prop.setProperty("driver", "org.sqlite.JDBC")
prop.setProperty("user", "jgp")
prop.setProperty("password", "Spark<3Java")

prop: java.util.Properties = {user=jgp, password=Spark<3Java, driver=org.sqlite.JDBC}
res2: Object = null


In [None]:
// Write in a table called ch02
df.write.mode(SaveMode.Overwrite).jdbc(dbConnectionUrl, "ch02", prop)

22/10/29 17:18:37 WARN JdbcUtils: Requested isolation level 1 is not supported; falling back to default isolation level 8


In [None]:
// Good to stop SparkSession at the end of the application
spark.stop