### Data Reading from CSV File

In [0]:
df = spark.read.format('csv').option('inferSchema','true').option('header','true').load("/Volumes/databricksdata/practice/csvdata")

In [0]:
df.display()


### Read Data from JSON File from nested json 

In [0]:
from pyspark.sql.functions import explode, col

df = spark.read.format("json") \
    .option("multiLine", "true") \
    .option("inferSchema", "true") \
    .load("/Volumes/databricksdata/practice/jsondata/array-of-objects.json")

# explode the users array into rows
df_flat = df.select(explode(col("users")).alias("user")).select("user.*")

display(df_flat)

### Change default datatype -- User define types 

In [0]:
df = spark.read.format("csv") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load("/Volumes/databricksdata/practice/csvdata/Comprehensive_Banking_Database.csv")

In [0]:
df2 = df.withColumn("Customer ID", df["Customer ID"].cast("string"))
display(df2)


In [0]:
df2.printSchema()

### StructType Schema

In [0]:
from pyspark.sql.types import *

#Schema creation

user_schema = StructType([
    StructField("id", StringType()),     # id is a string
    StructField("name", StringType()),   # name is a string
    StructField("email", StringType())   # email is a string
])

# defining json strucher 

main_schema = StructType([
    StructField("credit", StringType()),
    StructField("users", ArrayType(user_schema))
])

# reading data 

df = spark.read.json(
    "/Volumes/databricksdata/practice/jsondata/array-of-objects.json",
    schema = main_schema,
    multiLine = True
)

# Data conversion

from pyspark.sql.functions import explode, col

df_struct = df.select(explode(col("users")).alias("user")).select("user.*")
display(df_struct)

