In [0]:
help(spark.read)

Help on DataFrameReader in module pyspark.sql.readwriter object:

class DataFrameReader(OptionUtils)
 |  DataFrameReader(spark: 'SparkSession')
 |  
 |  Interface used to load a :class:`DataFrame` from external storage systems
 |  (e.g. file systems, key-value stores, etc). Use :attr:`SparkSession.read`
 |  to access this.
 |  
 |  .. versionadded:: 1.4.0
 |  
 |  .. versionchanged:: 3.4.0
 |      Support Spark Connect.
 |  
 |  Method resolution order:
 |      DataFrameReader
 |      OptionUtils
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, spark: 'SparkSession')
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  csv(self, path: Union[str, List[str]], schema: Union[pyspark.sql.types.StructType, str, NoneType] = None, sep: Optional[str] = None, encoding: Optional[str] = None, quote: Optional[str] = None, escape: Optional[str] = None, comment: Optional[str] = None, header: Union[bool, str, NoneType] = None, inferSchema: Union

In [0]:
df = spark.read.csv(path='dbfs:/FileStore/data/employee1.csv',header=True)
display(df)
df.printSchema()

id,name,gender,salary
1,tony,male,3000
2,bruce,male,4000


root
 |-- id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: integer (nullable = true)



In [0]:
df = spark.read.format('csv').option(key='header',value=True).load(path='dbfs:/FileStore/data/employee1.csv')
display(df)
df.printSchema()

id,name,gender,salary
1,tony,male,3000
2,bruce,male,4000


root
 |-- id: string (nullable = true)
 |-- name: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: string (nullable = true)



In [0]:
df = spark.read.csv(path='dbfs:/FileStore/data/',header=True)
display(df)
df.printSchema()


id,name,gender,salary
3,peter,male,2500
4,steve,male,5000
1,tony,male,3000
2,bruce,male,4000


root
 |-- id: string (nullable = true)
 |-- name: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: string (nullable = true)



In [0]:
files_path = ['dbfs:/FileStore/data/employee1.csv','dbfs:/FileStore/data1/employee2.csv']
df = spark.read.csv(path=files_path,header=True)
display(df)
df.printSchema()

id,name,gender,salary
3,peter,male,2500
4,steve,male,5000
1,tony,male,3000
2,bruce,male,4000


root
 |-- id: string (nullable = true)
 |-- name: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: string (nullable = true)



In [0]:
from pyspark.sql.types import *

schema = StructType().add("id", IntegerType())\
                    .add("name", StringType())\
                    .add("gender", StringType())\
                    .add("salary", FloatType())

df = spark.read.csv(path='dbfs:/FileStore/data/',header=True,schema = schema)
display(df)
df.printSchema()


id,name,gender,salary
3,peter,male,2500.0
4,steve,male,5000.0
1,tony,male,3000.0
2,bruce,male,4000.0


root
 |-- id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: float (nullable = true)



In [0]:
from pyspark.sql.types import *
help(StructType)

Help on class StructType in module pyspark.sql.types:

class StructType(DataType)
 |  StructType(fields: Optional[List[pyspark.sql.types.StructField]] = None)
 |  
 |  Struct type, consisting of a list of :class:`StructField`.
 |  
 |  This is the data type representing a :class:`Row`.
 |  
 |  Iterating a :class:`StructType` will iterate over its :class:`StructField`\s.
 |  A contained :class:`StructField` can be accessed by its name or position.
 |  
 |  Examples
 |  --------
 |  >>> from pyspark.sql.types import *
 |  >>> struct1 = StructType([StructField("f1", StringType(), True)])
 |  >>> struct1["f1"]
 |  StructField('f1', StringType(), True)
 |  >>> struct1[0]
 |  StructField('f1', StringType(), True)
 |  
 |  >>> struct1 = StructType([StructField("f1", StringType(), True)])
 |  >>> struct2 = StructType([StructField("f1", StringType(), True)])
 |  >>> struct1 == struct2
 |  True
 |  >>> struct1 = StructType([StructField("f1", CharType(10), True)])
 |  >>> struct2 = StructType([S