In [0]:
import json
import pyspark.sql.functions as F
import re
import sys
import traceback
import pandas as pd
from datetime import datetime, date, timedelta
from delta.tables import DeltaTable
from enum import Enum, unique
from pyspark.sql import DataFrame
from pyspark.sql.window import Window
from types import TracebackType
from typing import List, Type, TypedDict
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.functions import row_number, desc



class Framework:
    
    @unique
    class LoadType(str, Enum):
        OVERWRITE_TABLE = "OVERWRITE_TABLE"
        OVERWRITE_PARTITION = "OVERWRITE_PARTITION"
        APPEND_ALL = "APPEND_ALL"  
        APPEND_NEW = "APPEND_NEW"
        UPSERT = "UPSERT"

    @unique
    class RawFileFormat(str, Enum):
        PARQUET = "PARQUET"
        DELTA = "DELTA"
        ORC = "ORC"
        CSV = "CSV"

    @unique
    class RunStatus(str, Enum):
        SUCCEEDED = "SUCCEEDED"
        FAILED = "FAILED"
        
    @unique
    class SchemaEvolutionMode(str, Enum):
        FAIL_ON_SCHEMA_MISMATCH = "FAIL_ON_SCHEMA_MISMATCH"
        ADD_NEW_COLUMNS = "ADD_NEW_COLUMNS"
        IGNORE_NEW_COLUMNS = "IGNORE_NEW_COLUMNS"
        OVERWRITE_SCHEMA = "OVERWRITE_SCHEMA"


    class ReturnObject(TypedDict):
        status: str
        target_object: str
        num_records_read: int
        num_records_loaded: int
        num_records_errored_out: int
        error_message: str
        error_details: str

    
    def check_workspace(environment)-> str:
        ## busca workspace id
        id = spark.conf.get("spark.databricks.clusterUsageTags.clusterOwnerOrgId")
        if id == 'xxxxxxxxx':
            return  'abfss://xxxxxxxx.dfs.core.windows.net/xxxxxxxx'
        elif id == 'yyyyyyyyy':
            return 'abfss://xxxxxxxx.dfs.core.windows.net/xxxxxxxx'
        elif id == 'zzzzzzzz':
            return 'abfss://xxxxxxxx.dfs.core.windows.net/xxxxxxxx'
        else:
            ##raise ValueError("Este workspace não pertence a este escopo ")
            return f'dbfs:/mnt/lakehouse/{environment}'
    
    LAKEHOUSE_LANDING_ROOT =  check_workspace('land')
    LAKEHOUSE_BRONZE_ROOT  =  check_workspace('bronze')
    LAKEHOUSE_SILVER_ROOT  =  check_workspace('silver')
    LAKEHOUSE_GOLD_ROOT    =  check_workspace('gold')
    
    @classmethod
    def _build_return_object(
        cls,
        status: RunStatus,
        target_object: str,
        num_records_read: int = 0,
        num_records_loaded: int = 0,
        error_message: str = "",
        error_details: str = "",
    ) -> ReturnObject:
        """ Retorna objeto com detalhes da ultima excução/erro
        """
        return {
            "status": status,
            "target_object": target_object,
            "num_records_read": num_records_read,
            "num_records_loaded": num_records_loaded,
            "num_records_errored_out": num_records_read - num_records_loaded,
            "error_message": error_message[:8000],
            "error_details": error_details,
        }
                
    @classmethod
    def exit_with_object(cls, results: ReturnObject):
        """ Retorno de objeto com o resultado da execução 
        """
        dbutils.notebook.exit(json.dumps(results))
    
    
    @classmethod
    def exit_with_last_exception(cls):
        """Busca o ultimo erro e retorno um objeto.
        """
        exc_type, exc_value, _ = sys.exc_info()
        results = cls._build_return_object(
            status=cls.RunStatus.FAILED,
            target_object=None,
            error_message=f"{exc_type.__name__}: {exc_value}",
            error_details=traceback.format_exc(),
        )
        cls.exit_with_object(results)
  
  
        
    @classmethod
    def read_landing_zone_dataframe(
        cls,
        file_format: RawFileFormat,
        location: str,
        delimiter: str = ';',
        quote: str = "",
    ) -> DataFrame:
        """Leitura de arquivo da camada Landing com opções de parametros como csv, parquet, avro """

        ###try:
        df = (
            spark.read
            .format(file_format.lower())
            .option("header", True)
            .option("escape", "\"")
            .option("mergeSchema", True)
            .option("delimiter", delimiter)
            .option("quote", quote)
            .load(location)
        )

        # transforma todas as colunas para string
        if file_format != cls.RawFileFormat.CSV:
            non_string_columns = [col for col, dtype in df.dtypes if dtype != "string"]
            for column in non_string_columns:
                df = df.withColumn(column, F.col(column).cast("string"))

        return df

        ###except:
        ###    cls.exit_with_last_exception()
            
    @classmethod
    def generate_bronze_table_location(
        cls,
        table_name: str,
    ) -> str:
        """Cria caminho para tabela bronze 
        """
        ###try:
            # Verifica se os parametro tem algum campo nulo ou em branco
        params_list = [schema_name, table_name]
        if any(len(x) == 0 for x in params_list):
            raise ValueError("Caminho não pode conter brancos ou nulo, verifique !")
        return f"{cls.LAKEHOUSE_BRONZE_ROOT}/{table_name}/"

            
        ###except:
        ###    cls.exit_with_last_exception()

            
            
    @classmethod
    def generate_silver_table_location(
        cls,
        schema_name: str,
        table_name: str,
    ) -> str:
        """Cria caminho para tabela bronze 
        """
        
        ###try:
            # Verifica se os parametro tem algum campo nulo ou em branco
        params_list = [schema_name, table_name]
        if any(len(x) == 0 for x in params_list):
            raise ValueError("Caminho não pode conter brancos ou nulo, verifique !")
        return f"{cls.LAKEHOUSE_SILVER_ROOT}/{table_name}/"

            
        ###except:
        ###    cls.exit_with_last_exception()
        
    @classmethod
    def generate_gold_table_location(
        cls,
        schema_name: str,
        table_name: str,
    ) -> str:
        """Cria caminho para tabela bronze 
        """
        
        try:
            # Verifica se os parametro tem algum campo nulo ou em branco
            params_list = [schema_name, table_name]
            if any(len(x) == 0 for x in params_list):
                raise ValueError("Caminho não pode conter brancos ou nulo, verifique !")
            return f"{cls.LAKEHOUSE_GOLD_ROOT}/{table_name}/"

            
        except:
            cls.exit_with_last_exception()

    @classmethod
    def write_delta_table(
        cls,
        df: DataFrame,
        location: str,
        schema_name: str,
        table_name: str,
        load_type: LoadType,
        key_columns: List[str] = [],
        partition_columns: List[str] = [],
        schema_evolution_mode: SchemaEvolutionMode = SchemaEvolutionMode.ADD_NEW_COLUMNS,
    ) -> ReturnObject:
      
        """Escreve um DataFrame como Delta Table 
        """
        num_records_read = 0
        num_records_loaded = 0
      
        ###try:
            # Tabela deve existir para aplicar o merge
        if load_type != cls.LoadType.APPEND_ALL and not DeltaTable.isDeltaTable(spark, location):
            #print("Delta table ainda não existe. Altere load_type para APPEND_ALL e reexecute")
            load_type = cls.LoadType.APPEND_ALL

        # Otimiza a escrita para evitar small files 
        spark.conf.set("spark.databricks.delta.optimizeWrite.enabled", True)

        # Determina o load type
        if load_type == cls.LoadType.APPEND_ALL:
            cls._write_table_using_append_all(
                df=df,
                location=location,
                partition_columns=partition_columns,
                schema_evolution_mode=schema_evolution_mode,
            )
        elif load_type == cls.LoadType.UPSERT:
            if len(key_columns) == 0:
                raise ValueError("Nenhuma coluna foi especificada para o upsert")

            cls._write_table_using_upsert(
                df=df,
                location=location,
                key_columns=key_columns,
                schema_evolution_mode=schema_evolution_mode,
            )            
        elif load_type == cls.LoadType.OVERWRITE_TABLE:
               ## if num_records_read == 0:
               ##     raise ValueError("Attempted to overwrite a table with an empty dataset. Operation aborted.")
                cls._write_table_using_overwrite_table(
                    df=df,
                    location=location,
                    partition_columns=partition_columns,
                    schema_evolution_mode=schema_evolution_mode,
                )          
        else:
            raise NotImplementedError

        # Cria Metadados e tabela
        spark.sql(f"CREATE DATABASE IF NOT EXISTS {schema_name};")
        spark.sql(f"CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} USING DELTA LOCATION '{location}';")

            #return cls._build_return_object(
            #    status=cls.RunStatus.SUCCEEDED,
            #    target_object=f"{schema_name}.{table_name}",
            #    num_records_read=num_records_read,
            #    num_records_loaded=num_records_loaded,
            #)

        ###except Exception as e:
        ###    return cls._build_return_object(
        ###        status=cls.RunStatus.FAILED,
        ###        target_object=f"{schema_name}.{table_name}",
        ###        num_records_read=num_records_read,
        ###        num_records_loaded=num_records_loaded,
        ###        error_message=str(e),
        ###        error_details=traceback.format_exc(),
        ###    )
          
                
    @classmethod
    def _write_table_using_overwrite_table(
        cls,
        df: DataFrame,
        location: str,
        partition_columns: List[str] = [],
        schema_evolution_mode: SchemaEvolutionMode = SchemaEvolutionMode.ADD_NEW_COLUMNS,
    ) -> ReturnObject:
        """ Sobrepoe dados de table já existente 
        """
        df_writer = (
            df.write
            .format("delta")
            .mode("overwrite")
        )

        # Set partition options
        if len(partition_columns) > 0:
            df_writer = df_writer.partitionBy(partition_columns)

        # configura schema evolution mode 
        if schema_evolution_mode == cls.SchemaEvolutionMode.FAIL_ON_SCHEMA_MISMATCH:
            pass
        elif schema_evolution_mode == cls.SchemaEvolutionMode.ADD_NEW_COLUMNS:
            df_writer = df_writer.option("mergeSchema", True)
        elif schema_evolution_mode == cls.SchemaEvolutionMode.IGNORE_NEW_COLUMNS:
            if DeltaTable.isDeltaTable(spark, location):
                table_columns = DeltaTable.forPath(spark, location).columns
                new_df_columns = [col for col in df.columns if col not in table_columns]
                df = df.drop(*new_df_columns)
        elif schema_evolution_mode == cls.SchemaEvolutionMode.OVERWRITE_SCHEMA:
            df_writer = df_writer.option("overwriteSchema", True)
        elif schema_evolution_mode == cls.SchemaEvolutionMode.RESCUE_NEW_COLUMNS:
            raise NotImplementedError
        else:
            raise NotImplementedError

        # Write to the delta table
        df_writer.save(location)
                    
          
    @classmethod
    def _write_table_using_append_all(
        cls,
        df: DataFrame,
        location: str,
        partition_columns: List[str] = [],
        schema_evolution_mode: SchemaEvolutionMode = SchemaEvolutionMode.ADD_NEW_COLUMNS,
    ) -> ReturnObject:
        """Escreve o DataFrame utilizando APPEND_ALL.
        """
        
        df_writer = (
            df.write
            .format("delta")
            .mode("append")
        )

        # Checa se havera partições
        if len(partition_columns) > 0:
            df_writer = df_writer.partitionBy(partition_columns)

        # verifica schema evolution
        if schema_evolution_mode == cls.SchemaEvolutionMode.FAIL_ON_SCHEMA_MISMATCH:
            pass
        elif schema_evolution_mode == cls.SchemaEvolutionMode.ADD_NEW_COLUMNS:
            df_writer = df_writer.option("mergeSchema", True)
        elif schema_evolution_mode == cls.SchemaEvolutionMode.IGNORE_NEW_COLUMNS:
            if DeltaTable.isDeltaTable(spark, location):
                table_columns = DeltaTable.forPath(spark, location).columns
                new_df_columns = [col for col in df.columns if col not in table_columns]
                df = df.drop(*new_df_columns)
        elif schema_evolution_mode == cls.SchemaEvolutionMode.OVERWRITE_SCHEMA:
            df_writer = df_writer.option("overwriteSchema", True)
        elif schema_evolution_mode == cls.SchemaEvolutionMode.RESCUE_NEW_COLUMNS:
            raise NotImplementedError
        else:
            raise NotImplementedError

        # Grava a Delta Table
        df_writer.save(location)   
    
    @classmethod
    def _write_table_using_upsert(
        cls,
        df: DataFrame,
        location: str,
        key_columns: List[str] = [],
        schema_evolution_mode: SchemaEvolutionMode = SchemaEvolutionMode.ADD_NEW_COLUMNS,
    ) -> ReturnObject:
        """Atualiza Delta table utilizando UPSERT.
        """
        # Set schema_evolution_mode options
        if schema_evolution_mode == cls.SchemaEvolutionMode.FAIL_ON_SCHEMA_MISMATCH:
            pass
        elif schema_evolution_mode == cls.SchemaEvolutionMode.ADD_NEW_COLUMNS:
            original_auto_merge = spark.conf.get("spark.databricks.delta.schema.autoMerge.enabled")
            spark.conf.set("spark.databricks.delta.schema.autoMerge.enabled", True)
        elif schema_evolution_mode == cls.SchemaEvolutionMode.IGNORE_NEW_COLUMNS:
            if DeltaTable.isDeltaTable(spark, location):
                table_columns = DeltaTable.forPath(spark, location).columns
                new_df_columns = [col for col in df.columns if col not in table_columns]
                df = df.drop(*new_df_columns)
        elif schema_evolution_mode == cls.SchemaEvolutionMode.OVERWRITE_SCHEMA:
            raise ValueError("OVERWRITE_SCHEMA não é suportado no UPSERT load type")
        elif schema_evolution_mode == cls.SchemaEvolutionMode.RESCUE_NEW_COLUMNS:
            raise NotImplementedError
        else:
            raise NotImplementedError

        # Constroi a condição de merge
        merge_condition_parts = [f"source.`{col}` = target.`{col}`" for col in key_columns]
        merge_condition = " AND ".join(merge_condition_parts)

        # Escreve a delta table
        delta_table = DeltaTable.forPath(spark, location)
        (
            delta_table.alias("target")
            .merge(df.alias("source"), merge_condition)
            .whenMatchedUpdateAll()
            .whenNotMatchedInsertAll()
            .execute()
        )

        # Reset spark.conf
        spark.conf.set("spark.databricks.delta.schema.autoMerge.enabled", original_auto_merge)
        
    @classmethod     
    def read_metadata(
        cls, 
        df, 
        file_format: RawFileFormat,
        table: str,
        delimiter: str = ';',
        extra_columns: list = [''],
    ) -> DataFrame:

        location   = f"/mnt/bemoldigitalde/Trusted/DATA_DICTIONARY/{table}.csv"
                
        ###try:
          
        df_metadata = (
              spark.read
              .format(file_format.lower())
              .option("header", True)
              .option("escape", "\"")
              .option("mergeSchema", True)
              .option("delimiter", delimiter)
              .load(location)
        )
        
        
        # aplica regra somente colunas com o de/para e que podem ser apresentadas 
        df_metadata = df_metadata.where("TRUSTED_EXCLUIDO = 'False' and TRUSTED_COLUNA is not null")
        
        # Verifica se existe colunas na Bronze com nome igual ao dicionario de dados que será aplicado na Silver  
        ##field_list = []
        Columns_Dropped = []
        for field in df.schema.names:  
          for linha in df_metadata.collect():
              if linha["TRUSTED_COLUNA"] == field and linha["TRUSTED_COLUNA"] != linha["TRANSIENT_COLUNA"]:
                  #print(field + ' / ' + linha["TRUSTED_COLUNA"] + ' / ' + linha["TRANSIENT_COLUNA"])
                  Columns_Dropped.append(field) 
                  df = df.drop(field)        
        
        # colunas enviadas como parametro para serem mantidas como "DATA_EXTRACAO"
        Columns_to_keep = extra_columns     
        
        # aplica decode em cada coluna
        for linha in df_metadata.collect():
          
          if linha["TRANSIENT_COLUNA"] not in Columns_Dropped:
                        
            if  linha["ARQUIVO_DECODIFICADOR"] is not None :
                _table   =  linha["ARQUIVO_DECODIFICADOR"]
                _column  =  'XX_' + linha["TRANSIENT_COLUNA"]

                df_decode = spark.read.format("CSV").option("header", True)\
                      .option("escape", "\"")\
                      .option("mergeSchema", True)\
                      .option("delimiter", "|")\
                      .load(f'/mnt/bemoldigitalde/Trusted/Arquivos_decodificacao/{_table}-DECODE.csv')

                # retira duplicidade no arquivo de decode quando assim houver
                df_decode.createOrReplaceTempView('tab_decode')  
                df_decode = (spark.sql("SELECT code, description FROM tab_decode"))                          
                windowSpec  = Window.partitionBy("code").orderBy(col("code").desc())
                df_decode = df_decode.withColumn("row_number",row_number().over(windowSpec)).filter('row_number = 1').drop('row_number')

                mapping = eval(linha['MAPEAMENTO_DECODIFICACAO'])
                key_list = list(mapping.keys())
                
                # monta de/para de acordo com os campos no arquivo de decodificação
                if   len(key_list) == 1:
                    df = df.withColumn('key',df[key_list[0]]) 
                elif len(key_list) == 2:
                    df = df.withColumn('key',concat(col(key_list[0]),lit('_'), col(key_list[1])))
                elif len(key_list) == 3:
                    df = df.withColumn('key',concat(col(key_list[0]),lit('_'), col(key_list[1]),lit('_'), col(key_list[2])))
                elif len(key_list) == 4:
                    df = df.withColumn('key',concat(col(key_list[0]),lit('_'), col(key_list[1]),lit('_'), col(key_list[2]),lit('_'), col(key_list[3])))
                else :
                    df = df.withColumn('key',concat(col(key_list[0]),lit('_'), col(key_list[1]),lit('_'), col(key_list[2]),lit('_'), col(key_list[3]),lit('_'), col(key_list[4])))

                df = df.join(df_decode,(df.key == df_decode.code),how="left") 
                df = df.drop('code')
                df = df.drop('key')
                df = df.withColumnRenamed('description',_column)
        

        for linha in df_metadata.collect():
            
            if linha["TRANSIENT_COLUNA"] not in Columns_Dropped:
              
                # renomeia colunas decodificadas e remove colunas originais
                if  linha["ARQUIVO_DECODIFICADOR"] is not None :
                    _column    =  'XX_' + linha["TRANSIENT_COLUNA"]
                    _transient =  linha["TRANSIENT_COLUNA"]
                    
                    df = df.drop(_transient)
                    df = df.withColumnRenamed(_column,_transient)
                    
                # renomeia a coluna
                df = df.withColumnRenamed(linha["TRANSIENT_COLUNA"],linha["TRUSTED_COLUNA"])
                Columns_to_keep.append(linha["TRUSTED_COLUNA"])  
                
                column_type = linha["TRUSTED_TIPO"]
                column_name = linha["TRUSTED_COLUNA"]
                
                # altera o type de cada coluna
                if column_type == 'timestamp':
                  df = df.withColumn(column_name, to_date(unix_timestamp(column_name, "yyyyMMdd").cast(column_type)))
                elif column_type[:7] == 'decimal':
                  if column_type[:10] == 'decimal_br': 
                    column_type = column_type.replace('_br','')
                    decimal_br_udf = udf(decimal_br)
                    df = df.withColumn(column_name, decimal_br_udf(column_name))
                  int_part = int(column_type[8:-1].split(',')[0])
                  decimal_places = int(column_type[8:-1].split(',')[1])
                  df = df.withColumn(column_name, df[column_name].cast(DecimalType(int_part,decimal_places)))
                else:
                  df = df.withColumn(column_name, df[column_name].cast(column_type))   
                                                  
        #Remove colunas que não existe dicionario de dados                                       
        df = df.select(Columns_to_keep)
        
        # insere zeros a esquerda na coluna ID_CLIENTE
        if 'ID_CLIENTE' in df.columns:
           df = df.withColumn('ID_CLIENTE', lpad(df['ID_CLIENTE'], 10, '0')) 
            
        return df

        ###except:

        ###  print(f"erro de leitura {table}")

        
        
        
        
#     @classmethod     
#     def read_metadata(
#         cls, 
#         df, 
#         file_format: RawFileFormat,
#         table: str,
#         delimiter: str = ';',
#         extra_columns: list = [''],
# ##        partition_key_check: str='', 
# ##        orderBy_key_check: str='',
#     ) -> DataFrame:

#         location   = f"/mnt/bemoldigitalde/Trusted/DATA_DICTIONARY/{table}.csv"
                
#         ###try:
          
#         df_metadata = (
#               spark.read
#               .format(file_format.lower())
#               .option("header", True)
#               .option("escape", "\"")
#               .option("mergeSchema", True)
#               .option("delimiter", delimiter)
#               .load(location)
#         )
        
        
#         # aplica regra somente colunas com o de/para e que podem ser apresentadas 
#         df_metadata = df_metadata.where("TRUSTED_EXCLUIDO = 'False' and TRUSTED_COLUNA is not null")
        
#         # Verifica se existe colunas na Bronze com nome igual ao dicionario de dados que será aplicado na Silver  
#         ##field_list = []
#         Columns_Dropped = []
#         for field in df.schema.names:  
#           for linha in df_metadata.collect():
#               if linha["TRUSTED_COLUNA"] == field and linha["TRUSTED_COLUNA"] != linha["TRANSIENT_COLUNA"]:
#                   #print(field + ' / ' + linha["TRUSTED_COLUNA"] + ' / ' + linha["TRANSIENT_COLUNA"])
#                   Columns_Dropped.append(field) 
#                   df = df.drop(field)        
        
#         # colunas enviadas como parametro para serem mantidas como "DATA_EXTRACAO"
#         Columns_to_keep = extra_columns      

#         for linha in df_metadata.collect():
            
#             if linha["TRANSIENT_COLUNA"] not in Columns_Dropped:
              
#                 ## verifica se coluna partition by ou order by 
#  ##               if linha["TRANSIENT_COLUNA"] == partition_key_check:
#  ##                  partition_key_check = linha["TRUSTED_COLUNA"]
#  ##               if linha["TRANSIENT_COLUNA"] == orderBy_key_check:
#  ##                  orderBy_key_check = linha["TRUSTED_COLUNA"]
                    
#                 # renomeia a coluna
#                 df = df.withColumnRenamed(linha["TRANSIENT_COLUNA"],linha["TRUSTED_COLUNA"])
#                 Columns_to_keep.append(linha["TRUSTED_COLUNA"])  
#                 # altera o type de cada coluna
#                 column_type = linha["TRUSTED_TIPO"]
#                 column_name = linha["TRUSTED_COLUNA"]
#                 # altera o type de cada coluna
#                 #if ( column_type == 'timestamp' ) & ( column_type is not None ) :
#                 #if column_type == 'timestamp': 
#                 #  df = df.withColumn(column_name, when(substring(column_name,3,1) == '/', to_date(unix_timestamp(column_name, "dd/mm/yyyy").cast(column_type)) ).otherwise(to_date(unix_timestamp(column_name, "yyyyMMdd").cast(column_type)))) 
#                 if column_type == 'timestamp':
#                   df = df.withColumn(column_name, to_date(unix_timestamp(column_name, "yyyyMMdd").cast(column_type)))
#                 elif column_type[:7] == 'decimal':
#                   if column_type[:10] == 'decimal_br': 
#                     column_type = column_type.replace('_br','')
#                     decimal_br_udf = udf(decimal_br)
#                     df = df.withColumn(column_name, decimal_br_udf(column_name))
#                   int_part = int(column_type[8:-1].split(',')[0])
#                   decimal_places = int(column_type[8:-1].split(',')[1])
#                   df = df.withColumn(column_name, df[column_name].cast(DecimalType(int_part,decimal_places)))
#                 else:
#                   #print(linha["TRANSIENT_COLUNA"] + ' / ' + linha["TRUSTED_COLUNA"])
#                   df = df.withColumn(column_name, df[column_name].cast(column_type))   

#                 # aplica decode em cada coluna
#                 if  linha["ARQUIVO_DECODIFICADOR"] is not None:
#                     decode_table   =  linha["ARQUIVO_DECODIFICADOR"]
#                     decode_column  =  linha["TRUSTED_COLUNA"]

#                     df_decode = spark.read.format("CSV").option("header", True)\
#                           .option("escape", "\"")\
#                           .option("mergeSchema", True)\
#                           .option("delimiter", "|")\
#                           .load(f'/mnt/bemoldigitalde/Trusted/Arquivos_decodificacao/{decode_table}-DECODE.csv')

#                     # retira duplicidade no arquivo de decode quando assim houver
#                     df_decode.createOrReplaceTempView('tab_decode')  
#                     df_decode = (spark.sql("SELECT * FROM tab_decode"))                          
#                     windowSpec  = Window.partitionBy("code").orderBy(col("code").desc())
#                     df_decode = df_decode.withColumn("row_number",row_number().over(windowSpec)).filter('row_number = 1').drop('row_number')

                    
#                     df = df.join(df_decode,(df[decode_column] == df_decode.code),how="left") 
#                     df = df.drop('code')
#                     df = df.drop(decode_column)
#                     df = df.withColumnRenamed('description',decode_column)
                    
#         # Verifica se coluna de partition ou orderby tiveram alteração no nome             
# ##        if partition_key_check != '':
# ##            windowSpec  = Window.partitionBy(f"{partition_key_check}").orderBy(col(f"{orderBy_key_check}").desc())
# ##            df = df.withColumn("row_number",row_number().over(windowSpec)).filter('row_number = 1').drop('row_number')
# ##        else:
# ##            windowSpec  = Window.partitionBy(f"{partition_key}").orderBy(col(f"{orderBy_key}").desc())
# ##            df = df.withColumn("row_number",row_number().over(windowSpec)).filter('row_number = 1').drop('row_number')
                    
#         # Remove colunas que não existe dicionario de dados                                       
#         df = df.select(Columns_to_keep)
        
#         # insere zeros a esquerda na coluna ID_CLIENTE
#         if 'ID_CLIENTE' in df.columns:
#            df = df.withColumn('ID_CLIENTE', lpad(df['ID_CLIENTE'], 10, '0')) 

# ##        df = df.distinct()     
            
#         return df

#         ###except:

#         ###  print(f"erro de leitura {table}")
        

    ####################################################
    ## codigo importado funções já existentes Bemol   ##
    ####################################################
    
    ### Lista arquivos disponiveis na landing
    @classmethod        
    def folder_read_path(
        cls, 
        source_table, 
        source_path_landing,
        years: List[str] = [],
    ) -> list:
      
        ###try:
        if years == []: 
          years = [file.name[:-1] for file in dbutils.fs.ls(source_path_landing)]
        regex = '\d+X*_\d+X*_*\d*.csv$'
        file_list = []
        for year in years:
          #lista de arquivos nas pastas
          filenames = dbutils.fs.ls(source_path_landing + '/' + year)
          for file in filenames:
            # obtenção do mês do arquivo
            start, _ = re.search(regex, file.path).span()    #FALAR COM O SAULO DESSA BRUXARIA
            month = file.path[start+4:start+6]
            file_list.append(file.path)
        return file_list 
        ###except :
        ###    cls.exit_with_last_exception()
            
    ### Copia os arquivos da coletado para o diretório da processado
    @staticmethod
    def transport_collected_precessed(
        file_list:List[str]):
      
        ###try:
        for file in file_list:
            dbutils.fs.mv(file, file.replace('Coletado', 'Processado'), True)  
        ###except : 
        ###    print('Erro ao tentar mover os arquivos')
          
    ### Leitura da tabela Bronze já aplicando deduplicação 
    @staticmethod  
    def read_bronze(
        schema_bronze,
        bronze_table, 
        partition_key, 
        orderBy_key, 
        start_datetime, 
        end_datetime,
        from_to_field, 
    ) -> DataFrame:
      
        ###try:
          
        df = (spark.sql(f'''SELECT * FROM {schema_bronze}.{bronze_table}''')
              .where((col(f"{from_to_field}") >= start_datetime) & (col(f"{from_to_field}") <= end_datetime) ))
        windowSpec  = Window.partitionBy(f"{partition_key}").orderBy(col(f"{orderBy_key}").desc())
        df = df.withColumn("row_number",row_number().over(windowSpec)).filter('row_number = 1').drop('row_number')

        return df
          
        ###except :
        ###    print('Erro ao tentar selecionar tabela bronze')
      
    ### Transforma a moeda americana em br-pt
    @staticmethod 
    def decimal_br(decimal):
        if decimal == None:
          return None
        return decimal.replace('.','').replace(',','.')
      

