In [25]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StringType, BooleanType, LongType, DoubleType
import json

In [26]:
spark = SparkSession.builder \
    .appName("Lendo arquivos Parquet") \
    .getOrCreate()

In [27]:
def _get_schema_data(name: str) -> str:
    with open(f"./schemas/{name}.json") as arquivo:
        schema = json.load(arquivo)
    return ",".join(map(lambda p: f"{p['name']}:{p['type']}", schema['fields']))

In [28]:
def _build_schema(schema_arg: str):
    d_types = {
        "string": StringType(),
        "long": LongType(),
        "double": DoubleType(),
        "bool": BooleanType()
    }
    
    split_values = schema_arg.split(",")
    schema = StructType()
    
    for word in split_values:
        x = word.split(":")
        schema.add(x[0], d_types[x[1]], True)
    
    return schema

In [29]:
def _get_file(table: str):
    df = spark.read \
                .schema(_build_schema(_get_schema_data(table))) \
                .parquet(f"./data/silver/{table}")
    return df.show()

In [30]:
_get_file("monitrip")

+-------------------+----------+--------------------+--------------------+--------------------+--------------------+-----------------+--------------+-------------------+
|mes_emissao_bilhete|mes_viagem| ponto_origem_viagem|ponto_destino_viagem|        tipo_servico|     tipo_gratuidade|media_valor_total|dp_valor_total|quantidade_bilhetes|
+-------------------+----------+--------------------+--------------------+--------------------+--------------------+-----------------+--------------+-------------------+
|            08/2022|   08/2022|         SERRINHA/BA|    MIGUEL CALMON/BA|           Executivo|Tarifa Normal - s...|            58.05|           0.0|                3.0|
|            08/2022|   10/2022|      JOAO PESSOA/PB|         BRASILIA/DF|           Executivo|Tarifa Normal - s...|            638.2|           0.0|                2.0|
|            08/2022|   07/2022|   RIO DE JANEIRO/RJ|              UBA/MG|           Executivo|Tarifa Promociona...|             88.4|         16.14| 