# Example to Read / Write to Mongo DB with Spark

Documentation: https://docs.mongodb.com/spark-connector/master/python-api/ 


In [1]:
import pyspark
from pyspark.sql import SparkSession

In [2]:
# MONGO CONFIGURATION
mongo_uri = "mongodb://admin:mongopw@mongo:27017/demo.feedback?authSource=admin"

In [3]:
# Spark init
spark = SparkSession \
    .builder \
    .master("local") \
    .appName('jupyter-pyspark') \
      .config("spark.mongodb.input.uri", mongo_uri) \
      .config("spark.mongodb.output.uri", mongo_uri) \
      .config("spark.jars.packages","org.mongodb.spark:mongo-spark-connector_2.12:3.0.2")\
    .getOrCreate()
sc = spark.sparkContext
sc.setLogLevel("ERROR")

In [4]:
# read local data
df = spark.read.option("multiline","true").json("/home/jovyan/datasets/json-samples/stocks.json")
df.show()

+-------+------+
|  price|symbol|
+-------+------+
| 126.82|  AAPL|
|3098.12|  AMZN|
| 251.11|    FB|
|1725.05|  GOOG|
| 128.39|   IBM|
| 212.55|  MSFT|
|   78.0|   NET|
|  497.0|  NFLX|
|  823.8|  TSLA|
|  45.11|  TWTR|
+-------+------+



In [5]:
# Write to back to mongo... it doesnt give a crap about schema... its a document database!
df.write.format("mongo") \
    .mode("overwrite") \
    .option("database","example") \
    .option("collection","stocks") \
    .save()

In [6]:
# read back from Mongo
df1 = spark.read.format("mongo") \
    .option("database","example") \
    .option("collection","stocks") \
    .load()
df1.toPandas()

Unnamed: 0,_id,price,symbol
0,"(6760de6d1f42543207eb1308,)",126.82,AAPL
1,"(6760de6d1f42543207eb1309,)",3098.12,AMZN
2,"(6760de6d1f42543207eb130a,)",251.11,FB
3,"(6760de6d1f42543207eb130b,)",1725.05,GOOG
4,"(6760de6d1f42543207eb130c,)",128.39,IBM
5,"(6760de6d1f42543207eb130d,)",212.55,MSFT
6,"(6760de6d1f42543207eb130e,)",78.0,NET
7,"(6760de6d1f42543207eb130f,)",497.0,NFLX
8,"(6760de6d1f42543207eb1310,)",823.8,TSLA
9,"(6760de6d1f42543207eb1311,)",45.11,TWTR
