# Chapter 5: Loading and Saving your Data (Scala)

The problems included in this notebook are solved using different high-level data sources included in Spark SQL.

In [30]:
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder.appName("Loading-Saving-Data").master("local[*]").getOrCreate()
val sc = spark.sparkContext

spark = org.apache.spark.sql.SparkSession@22f334e
sc = org.apache.spark.SparkContext@ab9cfe4


## Parquet Format

Loading data

In [31]:
val parquetData = spark.read.parquet("../data/person.parquet")

parquetData = [Name: string, Age: int]


[Name: string, Age: int]

In [32]:
parquetData.show()

+----+---+
|Name|Age|
+----+---+
|Raul| 29|
|Javi| 34|
+----+---+



Saving data

In [33]:
parquetData.write.mode("overwrite").parquet("../data/person_write.parquet")

In [34]:
spark.read.parquet("../data/person_write.parquet").show()

+----+---+
|Name|Age|
+----+---+
|Raul| 29|
|Javi| 34|
+----+---+



## CSV Format

Loading data

In [35]:
val csvData = spark.read.option("header", "true").option("inferschema", "true").csv("../data/person.csv")

csvData = [Name: string, Age: int]


[Name: string, Age: int]

In [36]:
csvData.show()

+----+---+
|Name|Age|
+----+---+
|Raul| 29|
|Javi| 34|
+----+---+



In [37]:
import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType}
val schema = new StructType(Array(StructField("Name", StringType, true), 
                                  StructField("Age", IntegerType, true)))

schema = StructType(StructField(Name,StringType,true), StructField(Age,IntegerType,true))


StructType(StructField(Name,StringType,true), StructField(Age,IntegerType,true))

In [38]:
val csvDataSchema = spark.read.schema(schema).csv("../data/person.csv")

csvDataSchema = [Name: string, Age: int]


[Name: string, Age: int]

In [39]:
csvDataSchema.show()

+----+----+
|Name| Age|
+----+----+
|null|null|
|Raul|  29|
|Javi|  34|
+----+----+



Saving data:

In [40]:
csvData.write.mode("overwrite").option("header", "true").csv("../data/person_write.csv")

In [41]:
spark.read.option("inferSchema", "true").option("header", "true").csv("../data/person_write.csv").show()

+----+---+
|Name|Age|
+----+---+
|Raul| 29|
|Javi| 34|
+----+---+



## JSON Format

Loading data

In [42]:
val jsonData = spark.read.json("../data/person.json")

jsonData = [age: bigint, name: string]


[age: bigint, name: string]

In [43]:
jsonData.show()

+---+----+
|age|name|
+---+----+
| 29|Raul|
| 33|Javi|
+---+----+



Saving data

In [44]:
jsonData.write.mode("overwrite").json("../data/person_write.json")

In [45]:
spark.read.json("../data/person_write.json").show()

+---+----+
|age|name|
+---+----+
| 29|Raul|
| 33|Javi|
+---+----+

