In [1]:
import findspark
findspark.init()
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import explode
from pyspark.sql.functions import split

spark = SparkSession \
    .builder \
    .appName("HealthCarePrediction") \
    .getOrCreate()

In [3]:
from pyspark.sql.functions import regexp_extract
from functools import partial

inputStream = spark \
    .readStream \
    .format("socket") \
    .option("host", "localhost") \
    .option("port", 9999) \
    .load() \

fields = partial(
    regexp_extract, str="value", pattern="^(\w*)\s*,\s*(\d+\.?\d*)\s*,\s*(\d+\.?\d*)\s*,\s*(\d+\.?\d*)\s*,\s*(\d+\.?\d*)\s*,\s*(\d+\.?\d*)\s*,\s*(\d+\.?\d*)\s*,\s*(\d+\.?\d*)\s*,\s*(\d+\.?\d*)\s*,\s*(\d+\.?\d*)\s*,\s*(\d+\.?\d*)"
)

topic = inputStream.select(
    fields(idx=1).alias("id"),
    fields(idx=2).cast('long').alias("gender"), 
    fields(idx=3).cast('double').alias("age"), 
    fields(idx=4).cast('long').alias("hypertension"),
    fields(idx=5).cast('long').alias("heart_disease"),
    fields(idx=6).cast('long').alias("ever_married"),
    fields(idx=7).cast('long').alias("work_type"),
    fields(idx=8).cast('long').alias("Residence_type"),
    fields(idx=9).cast('double').alias("avg_glucose_level"),
    fields(idx=10).cast('double').alias("bmi"),
    fields(idx=11).cast('long').alias("smoking_status")
)

In [4]:
query = topic \
    .writeStream \
    .queryName("healthCare")\
    .outputMode("append") \
    .format("memory") \
    .start()

In [5]:
display(spark.sql(f"SELECT * FROM {query.name}").show())

+----+------+----+------------+-------------+------------+---------+--------------+-----------------+----+--------------+
|  id|gender| age|hypertension|heart_disease|ever_married|work_type|Residence_type|avg_glucose_level| bmi|smoking_status|
+----+------+----+------------+-------------+------------+---------+--------------+-----------------+----+--------------+
|9046|     0|0.08|           0|            0|           0|        1|             1|           139.67|14.1|             0|
|9047|     0| 5.0|           0|            0|           0|        1|             1|           139.67|14.1|             0|
+----+------+----+------------+-------------+------------+---------+--------------+-----------------+----+--------------+



None