In [1]:
import os, sys
from pathlib import Path
from pyspark.sql import SparkSession

# === Fuerza el Python correcto (mismo venv) ===
this_python = sys.executable
os.environ["PYSPARK_PYTHON"] = this_python
os.environ["PYSPARK_DRIVER_PYTHON"] = this_python

# === Fuerza IPv4/localhost y temp dir sin espacios ===
os.environ["SPARK_LOCAL_IP"] = "127.0.0.1"
spark_tmp = r"C:\spark-tmp"
Path(spark_tmp).mkdir(parents=True, exist_ok=True)

spark = (
    SparkSession.builder
    .appName("DiagnosticoSparkLocal")
    .master("local[*]")
    .config("spark.driver.bindAddress","127.0.0.1")
    .config("spark.driver.host","127.0.0.1")
    .config("spark.local.dir", spark_tmp)
    .config("spark.sql.execution.arrow.pyspark.enabled","false")
    .config("spark.network.timeout","300s")
    .getOrCreate()
)

print("✅ Spark iniciado:", spark.version)

# --- PRUEBA JVM pura (no necesita worker Python) ---
print("Cuenta JVM:", spark.range(10).count())

# --- PRUEBA worker Python (RDD con lambda) ---
rdd = spark.sparkContext.parallelize([1,2,3,4], 2).map(lambda x: x*2)
print("RDD collect:", rdd.collect())

# (Diagnóstico) imprime versión de Python dentro del worker
def pyver_in_worker(it):
    import sys
    yield "Worker Python: " + sys.version
print(rdd.mapPartitions(pyver_in_worker).take(1)[0])

# --- PRUEBA DataFrame ---
df = spark.createDataFrame([(1,"hola"), (2,"mundo")], ["id","texto"])
df.show()

spark.stop()
print("✅ OK")


✅ Spark iniciado: 3.5.4
Cuenta JVM: 10
RDD collect: [2, 4, 6, 8]
Worker Python: 3.11.0 (main, Oct 24 2022, 18:26:48) [MSC v.1933 64 bit (AMD64)]
+---+-----+
| id|texto|
+---+-----+
|  1| hola|
|  2|mundo|
+---+-----+

✅ OK
