In [None]:
from pyspark.sql import SparkSession, DataFrame
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from pyspark.sql.utils import AnalysisException
from pyspark.sql.types import StructType, StructField, StringType, BinaryType, IntegerType, DoubleType, TimestampType, DateType, LongType
from delta.tables import DeltaTable
from pyspark.sql.utils import AnalysisException
from pyspark.storagelevel import StorageLevel
from typing import Union, Optional
from pyspark.sql.functions import input_file_name

# --- Credenciais AWS ---
accessKeyId = ""
secretAccessKey = ""

# --- Sessão Spark ---
def create_spark_session() -> SparkSession:
    spark = (
        SparkSession
        .builder
        .appName("Bronze Zone Streaming")
        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
        .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
        .enableHiveSupport()
        .getOrCreate()
    )
    
    spark.sparkContext.setLogLevel("WARN")

    conf = spark.sparkContext._jsc.hadoopConfiguration()
    conf.set("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider")
    conf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    conf.set("fs.s3a.fast.upload", "true")
    conf.set("fs.s3a.bucket.all.committer.magic.enabled", "true")
    conf.set("fs.s3a.directory.marker.retention", "keep")
    conf.set("spark.driver.extraClassPath", "/usr/local/spark/jars/*")
    conf.set("spark.driver.memory", "8g")
    conf.set("spark.executor.memory", "16g")
    conf.set("fs.s3a.access.key", accessKeyId)
    conf.set("fs.s3a.secret.key", secretAccessKey)

    return spark

spark = create_spark_session()

In [4]:
bronze_path = f"s3a://dev-lab-02-us-east-2-bronze/spotify/"
spark.read.format("delta").load(f"{bronze_path}usuarios").createOrReplaceTempView("usuarios")

In [15]:
spark.sql(
"""
select count(1) from usuarios
"""
).show()

+--------+
|count(1)|
+--------+
|     110|
+--------+



In [14]:
spark.sql(
"""
select * from usuarios
order by timestamp desc
"""
).show()

+---+--------------------+--------------------+-------------------+--------------------+
| id|                nome|               email|          timestamp|      origem_arquivo|
+---+--------------------+--------------------+-------------------+--------------------+
| 22|  Dr. Paulette Aubry|virginie60@exampl...|2025-05-06 15:01:20|s3a://dev-lab-02-...|
| 42|       Marvin Staude|siegmar34@example...|2025-05-06 15:01:20|s3a://dev-lab-02-...|
| 23|      Jaroslaw Karge|xschlosser@exampl...|2025-05-06 15:01:20|s3a://dev-lab-02-...|
| 27| Benigno Ocaña López|victoriagomila@ex...|2025-05-06 15:01:20|s3a://dev-lab-02-...|
| 28|  Karl-Heinrich Henk|enginkambs@exampl...|2025-05-06 15:01:20|s3a://dev-lab-02-...|
| 11|  Micaela Jara Peiró| cosme82@example.net|2025-05-06 15:01:20|s3a://dev-lab-02-...|
| 29|   Sylvie Carpentier| hbrunel@example.net|2025-05-06 15:01:20|s3a://dev-lab-02-...|
| 13|        Émilie Baron|charlescapucine@e...|2025-05-06 15:01:20|s3a://dev-lab-02-...|
| 30|   Gabrielly Rib

In [6]:
df_fato_streaming = spark.read.format("delta").load("s3a://dev-lab-02-us-east-2-silver/fato_streamings/")

In [7]:
df_fato_streaming.count()

550

In [8]:
df_fato_streaming.count()

1550

In [9]:
df_fato_streaming.show()

+---+----------+--------------------+------------------+--------------------+--------+-------------------+--------------------+
| id|id_usuario|        masked_email|           artista|              musica|flg_feat|          timestamp|      origem_arquivo|
+---+----------+--------------------+------------------+--------------------+--------+-------------------+--------------------+
|  1|        10|a***********@exam...|Sabrina Carpenter |               Taste|   false|2025-05-07 17:20:19|s3a://dev-lab-02-...|
|  2|         9|  f*****@example.org|       Ed Sheeran |        Shape of You|   false|2025-05-07 17:20:19|s3a://dev-lab-02-...|
|  3|         3| s******@example.net|     Blood Orange |     Champagne Coast|   false|2025-05-07 17:20:19|s3a://dev-lab-02-...|
|  4|         1|a****************...|             d4vd |        Here With Me|   false|2025-05-07 17:20:19|s3a://dev-lab-02-...|
|  5|         2|h**********@examp...|       The Weeknd |     Blinding Lights|   false|2025-05-07 17:20:1

In [10]:
df_top_artistas = spark.read.format("delta").load("s3a://dev-lab-02-us-east-2-gold/top_artistas/") 

In [11]:
df_top_artistas.show()

+------------------+----------------+--------------------+
|           artista|qtd_de_streaming|    data_atualizacao|
+------------------+----------------+--------------------+
|        Bad Bunny |             126|2025-05-07 19:44:...|
|         Coldplay |             116|2025-05-07 19:44:...|
|        Lady Gaga |             104|2025-05-07 19:44:...|
|     Benson Boone |             103|2025-05-07 19:44:...|
|Sabrina Carpenter |              97|2025-05-07 19:44:...|
|    Billie Eilish |              92|2025-05-07 19:44:...|
|   Arctic Monkeys |              87|2025-05-07 19:44:...|
|       Ed Sheeran |              78|2025-05-07 19:44:...|
|       Tate McRae |              77|2025-05-07 19:44:...|
|      Teddy Swims |              61|2025-05-07 19:44:...|
+------------------+----------------+--------------------+



In [13]:
df_top_artistas.show()

+------------------+----------------+--------------------+
|           artista|qtd_de_streaming|    data_atualizacao|
+------------------+----------------+--------------------+
|        Bad Bunny |             395|2025-05-07 20:12:...|
|         Coldplay |             370|2025-05-07 20:12:...|
|        Lady Gaga |             325|2025-05-07 20:12:...|
|     Benson Boone |             324|2025-05-07 20:12:...|
|   Arctic Monkeys |             297|2025-05-07 20:12:...|
|Sabrina Carpenter |             288|2025-05-07 20:12:...|
|    Billie Eilish |             280|2025-05-07 20:12:...|
|       Ed Sheeran |             250|2025-05-07 20:12:...|
|       Tate McRae |             224|2025-05-07 20:12:...|
|      Teddy Swims |             208|2025-05-07 20:12:...|
+------------------+----------------+--------------------+

