# > Objetivo: Ler um arquivo json, corrigir os tipos de dados e re-escrevelo em formato delta

### Imports

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DateType
from pyspark.sql.functions import col

### Definição de variável

In [0]:
user = str(dbutils.fs.ls("file:/Workspace/Users/")).split('/')[3]
filespath = f'file:/Workspace/Users/{user}/treinamento_formula1/landing'
savepath = 'dbfs:/mnt/formula1/bronze/drivers'

dfschema = StructType([
        StructField("driverId", IntegerType(), True),
        StructField("driverRef", StringType(), True),
        StructField("number", IntegerType(), True),
        StructField("code", StringType(), True),
        StructField("name", StructType(
            [
                StructField("forename", StringType(), True),
                StructField("surname", StringType(), True)
            ]
        ), True),
        StructField("dob", DateType(), True),
        StructField("nationality", StringType(), True),
        StructField("url", StringType(), True),
    ])

### Lendo arquivo

In [0]:
dataframe = (spark.read.schema(dfschema)
                    .format('json')
                    .option('header', 'true')
                    .load(f'{filespath}/drivers.json')
                    .selectExpr("*", "name.forename as forename", "name.surname as surname")
                    .drop('name'))

### Salvando o arquivo

In [0]:
dbutils.fs.rm(f'{savepath}/driversRaw.delta', recurse=True)
dataframe.write.mode('overwrite') \
               .format('delta') \
               .save(f'{savepath}/driversRaw.delta')