In [1]:
import pyspark 
from pyspark import SparkContext
sc = SparkContext()
spark = pyspark.sql.SparkSession(sc, jsparkSession=None)

## Raw read without schema

In [2]:
bookDF = spark.read.csv("bookcontents.csv")

In [3]:
bookDF.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: string (nullable = true)



In [4]:
bookDF.show()

+-------+--------------------+----+
|    _c0|                 _c1| _c2|
+-------+--------------------+----+
|Chapter|                Name|Page|
|      1|        Introduction|  11|
|      2|Basic Engineering...|  19|
|      3|Advanced Engineer...|  28|
|      4|     Hands On Course|  60|
|      5|        Case Studies|  62|
|      6|Best Practices Cl...|  73|
|      7|130+ Data Sources...|  77|
|      8|1001 Interview Qu...|  82|
|      9|Recommended Books...|  87|
+-------+--------------------+----+



## Inference of schema with no header

In [5]:
bookInferDF = spark.read.option("inferSchema","true").csv("bookcontentsNoHeader.csv")

In [6]:
bookInferDF.printSchema()

root
 |-- _c0: integer (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: integer (nullable = true)



In [7]:
bookInferDF.show()

+---+--------------------+---+
|_c0|                 _c1|_c2|
+---+--------------------+---+
|  1|        Introduction| 11|
|  2|Basic Engineering...| 19|
|  3|Advanced Engineer...| 28|
|  4|     Hands On Course| 60|
|  5|        Case Studies| 62|
|  6|Best Practices Cl...| 73|
|  7|130+ Data Sources...| 77|
|  8|1001 Interview Qu...| 82|
|  9|Recommended Books...| 87|
+---+--------------------+---+



## Use header row

In [8]:
bookHeaderDF = spark.read.option("header","true").csv("bookcontents.csv")

In [9]:
bookHeaderDF.printSchema()

root
 |-- Chapter: string (nullable = true)
 |-- Name: string (nullable = true)
 |-- Page: string (nullable = true)



In [10]:
bookHeaderDF.show()

+-------+--------------------+----+
|Chapter|                Name|Page|
+-------+--------------------+----+
|      1|        Introduction|  11|
|      2|Basic Engineering...|  19|
|      3|Advanced Engineer...|  28|
|      4|     Hands On Course|  60|
|      5|        Case Studies|  62|
|      6|Best Practices Cl...|  73|
|      7|130+ Data Sources...|  77|
|      8|1001 Interview Qu...|  82|
|      9|Recommended Books...|  87|
+-------+--------------------+----+



## Header & Infer schema

In [11]:
bookHeaderDF = spark.read.option("inferSchema","true").option("header","true").csv("bookcontents.csv")

In [12]:
bookHeaderDF.printSchema()

root
 |-- Chapter: integer (nullable = true)
 |-- Name: string (nullable = true)
 |-- Page: integer (nullable = true)



In [13]:
bookHeaderDF.show()

+-------+--------------------+----+
|Chapter|                Name|Page|
+-------+--------------------+----+
|      1|        Introduction|  11|
|      2|Basic Engineering...|  19|
|      3|Advanced Engineer...|  28|
|      4|     Hands On Course|  60|
|      5|        Case Studies|  62|
|      6|Best Practices Cl...|  73|
|      7|130+ Data Sources...|  77|
|      8|1001 Interview Qu...|  82|
|      9|Recommended Books...|  87|
+-------+--------------------+----+



## Manual schema

In [14]:
from pyspark.sql.types import *

In [15]:
# Test the schema
spark.read.option("inferSchema","true").csv("bookcontentsNoHeader.csv").printSchema()

root
 |-- _c0: integer (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: integer (nullable = true)



In [22]:
columns = [
StructField("Chapter", IntegerType()),
StructField("Name", StringType()),
StructField("Page", IntegerType())]
csvschema = StructType(columns)

In [23]:
manualschemaDF = spark.read.schema(csvschema).csv("bookcontentsNoHeader.csv")

In [24]:
manualschemaDF.printSchema()

root
 |-- Chapter: integer (nullable = true)
 |-- Name: string (nullable = true)
 |-- Page: integer (nullable = true)



In [25]:
manualschemaDF.show()

+-------+--------------------+----+
|Chapter|                Name|Page|
+-------+--------------------+----+
|      1|        Introduction|  11|
|      2|Basic Engineering...|  19|
|      3|Advanced Engineer...|  28|
|      4|     Hands On Course|  60|
|      5|        Case Studies|  62|
|      6|Best Practices Cl...|  73|
|      7|130+ Data Sources...|  77|
|      8|1001 Interview Qu...|  82|
|      9|Recommended Books...|  87|
+-------+--------------------+----+

