In [4]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import StructType, StructField, StringType, DateType, FloatType, IntegerType

# Inicialização da sessão do Spark
spark = SparkSession.builder.appName("Exemplo").getOrCreate()

# Schema para simular os dados
schema = StructType([
    StructField("dt_ref", StringType(), True),
    StructField("tp_pes", StringType(), True),
    StructField("V_Sld_Oprc", FloatType(), True),
    StructField("qtd_Concessao", IntegerType(), True),
    StructField("vlr_Concessao", FloatType(), True),
    StructField("taxa_efetiva", FloatType(), True),
    StructField("Cartao", StringType(), True)
])

# Dados simulados
data = [
    ("2023-01-01", "1", 1500.0, 1, 500.0, 0.05, "Em dia"),
    ("2023-01-01", "2", 2000.0, 2, 800.0, 0.03, "Atraso"),
    ("2023-01-02", "1", 1200.0, 1, 600.0, 0.06, "Em dia"),
    ("2023-01-02", "1", 1800.0, 1, 700.0, 0.04, "Atraso")
]

# Criar DataFrame simulado
df = spark.createDataFrame(data, schema)

In [14]:
result = df.groupBy('dt_ref', when(col('tp_pes') == '1', '<pesFisica>').otherwise('<pesJuridica>').alias('Segmento')) \
    .agg(
        expr('date_format(dt_ref, "yyyy-MM-dd")').alias('dt_ref'),
        round(sum(when(col('Cartao') == 'Em dia', col('V_Sld_Oprc')))/1000).alias('V_Sld_Oprc_em_dia'),
        sum(when(col('Cartao') == 'Em dia', col('qtd_Concessao'))).alias('qtd_Concessao_em_dia'),
        round(sum(when(col('Cartao') == 'Em dia', col('vlr_Concessao')))/1000).alias('vlr_Concessao_em_dia'),
        expr('cast(round((sum(CASE WHEN Cartao = "Em dia" AND qtd_Concessao = 1 THEN taxa_efetiva ELSE 0 END) / sum(CASE WHEN Cartao = "Em dia" AND qtd_Concessao = 1 THEN vlr_Concessao ELSE 0 END)) * 100) as int) / 100').alias('taxa_efetiva_em_dia'),
    ).orderBy('Segmento')

# Exibir o resultado
result.show()

+----------+-------------+----------+-----------------+--------------------+--------------------+-------------------+
|    dt_ref|     Segmento|    dt_ref|V_Sld_Oprc_em_dia|qtd_Concessao_em_dia|vlr_Concessao_em_dia|taxa_efetiva_em_dia|
+----------+-------------+----------+-----------------+--------------------+--------------------+-------------------+
|2023-01-02|  <pesFisica>|2023-01-02|              1.0|                   1|                 1.0|                0.0|
|2023-01-01|  <pesFisica>|2023-01-01|              2.0|                   1|                 1.0|                0.0|
|2023-01-01|<pesJuridica>|2023-01-01|             null|                null|                null|               null|
+----------+-------------+----------+-----------------+--------------------+--------------------+-------------------+

