In [1]:
import pyspark 
from pyspark import SparkContext
sc = SparkContext()
spark = pyspark.sql.SparkSession(sc, jsparkSession=None)

# Read File

In [2]:
bookDF = spark.read.json("bookcontents.json")

## Schema

In [3]:
bookDF.printSchema()

root
 |-- Chapter: long (nullable = true)
 |-- Name: string (nullable = true)
 |-- Page: long (nullable = true)



## Action

In [4]:
bookDF.show()

+-------+--------------------+----+
|Chapter|                Name|Page|
+-------+--------------------+----+
|      1|        Introduction|  11|
|      2|Basic Engineering...|  19|
|      3|Advanced Engineer...|  28|
|      4|     Hands On Course|  60|
|      5|        Case Studies|  62|
|      6|Best Practices Cl...|  73|
|      7|130+ Data Sources...|  77|
|      8|1001 Interview Qu...|  82|
|      9|Recommended Books...|  87|
+-------+--------------------+----+



# Transformations

In [5]:
ChapterAndNameDF = bookDF.select("Chapter","Name")

In [6]:
ChapterAndNameDF.show()

+-------+--------------------+
|Chapter|                Name|
+-------+--------------------+
|      1|        Introduction|
|      2|Basic Engineering...|
|      3|Advanced Engineer...|
|      4|     Hands On Course|
|      5|        Case Studies|
|      6|Best Practices Cl...|
|      7|130+ Data Sources...|
|      8|1001 Interview Qu...|
|      9|Recommended Books...|
+-------+--------------------+



In [7]:
Chapter1to4 = ChapterAndNameDF.where("Chapter < 5")

In [8]:
Chapter1to4.show()

+-------+--------------------+
|Chapter|                Name|
+-------+--------------------+
|      1|        Introduction|
|      2|Basic Engineering...|
|      3|Advanced Engineer...|
|      4|     Hands On Course|
+-------+--------------------+



# Chaining -> Query

In [9]:
bookDF.select("Chapter","Name").where("Chapter < 5").show()

+-------+--------------------+
|Chapter|                Name|
+-------+--------------------+
|      1|        Introduction|
|      2|Basic Engineering...|
|      3|Advanced Engineer...|
|      4|     Hands On Course|
+-------+--------------------+



## Write as JSON

In [20]:
Chapter1to4.write.json("Contents1to4")