In [0]:
from pyspark.sql.functions import col

alerts_path = "abfss://datos@mastertfm002sta.dfs.core.windows.net/gold/alerts"
results_path = "abfss://datos@mastertfm002sta.dfs.core.windows.net/gold/resultados"
gold_path = "abfss://datos@mastertfm002sta.dfs.core.windows.net/gold/activos"
estrategias_csv_path = "abfss://datos@mastertfm002sta.dfs.core.windows.net/gold/config/estrategias_optimas.csv"
checkpoint_path = "abfss://datos@mastertfm002sta.dfs.core.windows.net/checkpoints/resultados_stream"

In [0]:
alerts_stream = (
    spark.readStream
        .format("delta")
        .load(alerts_path)
)

In [0]:
from pyspark.sql import functions as F

def process_batch(batch_df, batch_id):

    if batch_df.isEmpty():
        print(f"[STREAM] batch_id={batch_id} vacío")
        return

    # Leer gold (Delta)
    df_gold = (
        spark.read
            .format("delta")
            .load(gold_path)
            .select("symbol", "asset_class")
            .distinct()
    )

    # Leer estrategias (CSV)
    df_estrategias = (
        spark.read
            .option("header", "true")
            .option("inferSchema", "true")
            .csv(estrategias_csv_path)
    )

    # Aplicar la misma lógica que tu SQL
    resultados_df = (
        batch_df.alias("a")
            .join(df_gold.alias("g"), on="symbol", how="inner")
            .join(df_estrategias.alias("e"), on="symbol", how="inner")
            .select(
                F.col("a.*"),
                F.col("g.asset_class"),
                F.col("e.pred_score")
            )
    )

    resultados_df.write \
        .format("delta") \
        .mode("append") \
        .save(results_path)

    print(f"[STREAM RESULTADOS] batch_id={batch_id} filas={resultados_df.count()}")


In [0]:
dbutils.fs.rm(
    "abfss://datos@mastertfm002sta.dfs.core.windows.net/checkpoints/resultados_stream",
    recurse=True
)

True

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS tfm
LOCATION 'abfss://datos@mastertfm002sta.dfs.core.windows.net/';

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS tfm.resultados
USING DELTA
LOCATION '{results_path}'
""")

DataFrame[]

In [0]:
query = (
    alerts_stream.writeStream
        .foreachBatch(process_batch)
        .outputMode("append")
        .option("checkpointLocation", checkpoint_path)
        .start()
)

query.awaitTermination()



[STREAM RESULTADOS] batch_id=0 filas=233
[STREAM RESULTADOS] batch_id=1 filas=1
[STREAM RESULTADOS] batch_id=2 filas=1
[STREAM RESULTADOS] batch_id=3 filas=1
[STREAM RESULTADOS] batch_id=4 filas=1
[STREAM RESULTADOS] batch_id=5 filas=1
[STREAM RESULTADOS] batch_id=6 filas=1
[STREAM RESULTADOS] batch_id=7 filas=1
[STREAM RESULTADOS] batch_id=8 filas=1
[STREAM RESULTADOS] batch_id=9 filas=1
[STREAM RESULTADOS] batch_id=10 filas=1
[STREAM RESULTADOS] batch_id=11 filas=1
[STREAM RESULTADOS] batch_id=12 filas=1
[STREAM RESULTADOS] batch_id=13 filas=1
[STREAM RESULTADOS] batch_id=14 filas=1
[STREAM RESULTADOS] batch_id=15 filas=1
[STREAM RESULTADOS] batch_id=16 filas=1
[STREAM RESULTADOS] batch_id=17 filas=1
[STREAM RESULTADOS] batch_id=18 filas=1
[STREAM RESULTADOS] batch_id=19 filas=1
[STREAM RESULTADOS] batch_id=20 filas=1
[STREAM RESULTADOS] batch_id=21 filas=1
[STREAM RESULTADOS] batch_id=22 filas=1
[STREAM RESULTADOS] batch_id=23 filas=1
[STREAM RESULTADOS] batch_id=24 filas=1
[STREAM 

com.databricks.backend.common.rpc.CommandCancelledException
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$5(SequenceExecutionState.scala:132)
	at scala.Option.getOrElse(Option.scala:201)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3(SequenceExecutionState.scala:132)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3$adapted(SequenceExecutionState.scala:129)
	at scala.collection.immutable.Range.foreach(Range.scala:190)
	at com.databricks.spark.chauffeur.SequenceExecutionState.cancel(SequenceExecutionState.scala:129)
	at com.databricks.spark.chauffeur.ExecContextState.cancelRunningSequence(ExecContextState.scala:715)
	at com.databricks.spark.chauffeur.ExecContextState.$anonfun$cancel$1(ExecContextState.scala:435)
	at scala.Option.getOrElse(Option.scala:201)
	at com.databricks.spark.chauffeur.ExecContextState.cancel(ExecContextState.scala:435)
	at com.databricks.spark.chauffeur.ExecutionContextManagerV1.can