# MongoDB-Spark connector
This notebook demonstrates how to use MongoDB from Spark app.
Mongo is installed to same Rahti namespace than Spark

In [1]:
# First include Maven repository for Mongo-Spark connector
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages org.mongodb.spark:mongo-spark-connector_2.11:2.2.7 pyspark-shell'

In [2]:
from pyspark.sql import SparkSession

# Create configs for Mongo connection. 
# format is:
# mongodb://<<username>>:<<password>>@<<service__host_name or IP>>:<<port>>/<<database_name>>.<<table_name>>
# Database should be created before connecting but table will be created if not exist.

spark = SparkSession \
    .builder \
    .appName("MongoDB-connector") \
    .config("spark.mongodb.input.uri", "mongodb://user:password@mongodb:27017/sparkdb.dwarfs") \
    .config("spark.mongodb.output.uri", "mongodb://user:password@mongodb:27017/sparkdb.dwarfs") \
    .getOrCreate()

In [3]:
# Creating DataFrame containing names and ages
dwarfs = spark.createDataFrame([("Thorin", 195), ("Balin", 178), ("Kili", 77),
   ("Dwalin", 169), ("Oin", 167), ("Gloin", 158), ("Fili", 82), ("Bombur", None)], ["name", "age"])

In [4]:
# Checking format and content
dwarfs.show()

+------+----+
|  name| age|
+------+----+
|Thorin| 195|
| Balin| 178|
|  Kili|  77|
|Dwalin| 169|
|   Oin| 167|
| Gloin| 158|
|  Fili|  82|
|Bombur|null|
+------+----+



In [5]:
# Write to MongoDB with append mode
dwarfs.write.format("mongo").mode("append").save()

In [6]:
# Read from Mongo and save to dataframe
df = spark.read.format("mongo").load()

In [7]:
# Check the content. Notic Mongo has created its own unique id.
df.show()

+--------------------+----+------+
|                 _id| age|  name|
+--------------------+----+------+
|[5d9ee215d401160a...| 167|   Oin|
|[5d9ee215d401160a...| 158| Gloin|
|[5d9ee215d401160a...|  82|  Fili|
|[5d9ee215d401160a...|null|Bombur|
|[5d9ee215d401160a...| 195|Thorin|
|[5d9ee215d401160a...| 178| Balin|
|[5d9ee215d401160a...|  77|  Kili|
|[5d9ee215d401160a...| 169|Dwalin|
+--------------------+----+------+

