### Importação de configurações e funções

In [0]:
%run Config/storage_config

In [0]:
%run Config/secrets_config

In [0]:

from pyspark.sql.functions import (
    input_file_name,
    regexp_extract,
    current_timestamp,
    lit,
    col,
    lpad
)

### Atribuição de variáveis e path

In [0]:
TIPO_OPERACAO_EXP = "EXP"
TIPO_OPERACAO_IMP = "IMP"
NCM = "NCM"
TB_AUX = "tb_aux"

In [0]:
input_path = f"{balanca_comercial_path}"

In [0]:
output_path_exp = f"{bronze_path}/balancacomercial/{TIPO_OPERACAO_EXP.lower()}"
output_path_imp = f"{bronze_path}/balancacomercial/{TIPO_OPERACAO_IMP.lower()}"
output_path_NCM= f"{bronze_path}/balancacomercial/ncm"
output_path_tb_aux = f"{bronze_path}/balancacomercial/tb_aux"


In [0]:
dbutils.fs.ls(balanca_comercial_path)

In [0]:
df_raw_exp = (
    spark.read
    .option("header", "true")
    .option("delimiter", ";")
    .option("recursiveFileLookup", "true")
    .option("pathGlobFilter", "EXP*")
    .csv(input_path)
)

In [0]:
display(df_raw_exp.limit(10))

In [0]:
df_raw_imp = (
    spark.read
    .option("header", "true")
    .option("delimiter", ";")
    .option("recursiveFileLookup", "true")
    .option("pathGlobFilter", "IMP*")
    .csv(input_path)
)

In [0]:
display(df_raw_imp.limit(10))

In [0]:
df_bronze_exp = (
    df_raw_exp
    .withColumn("file_name", input_file_name())
    .withColumn("ano", col("CO_ANO").cast("int"))
    .withColumn("mes", col("CO_MES"))
    .withColumn("tipo_operacao", lit(TIPO_OPERACAO_EXP))
    .withColumn("data_ingestao", current_timestamp())
)


In [0]:
display(df_bronze_exp.limit(10))

In [0]:
df_bronze_imp = (
    df_raw_imp
    .withColumn("file_name", input_file_name())
    .withColumn("ano", col("CO_ANO").cast("int"))
    .withColumn("mes", col("CO_MES"))
    .withColumn("tipo_operacao", lit(TIPO_OPERACAO_IMP))
    .withColumn("data_ingestao", current_timestamp())
)


In [0]:
display(df_bronze_imp.limit(10))


In [0]:
dbutils.fs.ls(output_path_imp)

In [0]:
dbutils.fs.ls(output_path_exp)

In [0]:
(df_bronze_exp
    .write
    .format("delta")
    .mode("append")
    .partitionBy("ano", "mes")
    .save(output_path_exp)
)


In [0]:
(df_bronze_imp
    .write
    .format("delta")
    .mode("overwrite")
    .partitionBy("ano", "mes")
    .save(output_path_imp)
)