In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, TimestampType, FloatType, DoubleType
import os

In [0]:
%run ../Utils/DBInstanceUtils

In [0]:
%run ../Utils/ProcessingUtils

In [0]:
%run ../Utils/FormatterUtils

In [0]:
%run ../Utils/FileUtils

In [0]:
def file_to_Df(path, extension, mode="FAILFAST"):
    try:
        df = (spark.read.format(extension).option('mode', mode).load(path))
    except Exception as e:
        try: 
            df = (spark.read
                .format(extension)
                .option('mode', mode)
                .option('multiLine', True)
                .load(path))
        except Exception as e:
            raise(e)
    return df

In [0]:
instance = DBInstanceUtils('treinamento_formula1')
extension = 'json'

schema = {
    "constructorId": IntegerType(),
    "driverId": IntegerType(),
    "number": IntegerType(),
    "position": IntegerType(),
    "q1": StringType(),
    "q2": StringType(),
    "q3": StringType(),
    "qualifyId": IntegerType(),
    "raceId": IntegerType()
}

In [0]:
try:
    df = file_to_Df(f'{instance.get_filepath()}/qualifying/*', extension)
except Exception as e:
    dbutils.notebook.exit(f'Error reading file: \n {e}')

In [0]:
df = (Refine(df)
      .enforce_schema(schema)
      .load())

df.display()

In [0]:
filename    = 'qualifyingRaw'
extension   = 'delta'
file        = f'{filename}.{extension}'
DBInstanceUtils.set_dbfs_savepath(instance, 'mnt', f'formula1/bronze/qualifying', file)

In [0]:
df.write.format("delta").mode("overwrite").save(instance.get_dbfs_savepath())