# Example to Read / Write to Elasticsearch with Spark

Documentation: https://www.elastic.co/guide/en/elasticsearch/hadoop/current/spark.html#spark-python


In [1]:
import pyspark
from pyspark.sql import SparkSession

In [2]:
# ELASTICSEARCH CONFIGURATION
elastic_host = "elasticsearch"
elastic_port = "9200"

In [3]:
# Spark init
spark = SparkSession.builder \
    .master("local") \
    .appName('jupyter-pyspark') \
    .config("spark.jars.packages","org.elasticsearch:elasticsearch-spark-30_2.12:8.17.0")\
    .config("spark.es.nodes", elastic_host) \
    .config("spark.es.port",elastic_port) \
    .getOrCreate()
sc = spark.sparkContext
sc.setLogLevel("ERROR")

In [4]:
# read local data
df = spark.read.option("multiline","true").json("/home/jovyan/datasets/json-samples/stocks.json")
df.toPandas()

Unnamed: 0,price,symbol
0,126.82,AAPL
1,3098.12,AMZN
2,251.11,FB
3,1725.05,GOOG
4,128.39,IBM
5,212.55,MSFT
6,78.0,NET
7,497.0,NFLX
8,823.8,TSLA
9,45.11,TWTR


In [6]:
# Write to Elastic Under index stocks with default type (_doc)
df.write.mode("Overwrite").format("es").save("stocks")

In [8]:
# read back from Elasticsearch
df1 = spark.read.format("es").load("stocks")
df1.toPandas()

Unnamed: 0,price,symbol
0,126.82,AAPL
1,3098.120117,AMZN
2,251.110001,FB
3,1725.050049,GOOG
4,128.389999,IBM
5,212.550003,MSFT
6,78.0,NET
7,497.0,NFLX
8,823.799988,TSLA
9,45.110001,TWTR
