<a href="https://colab.research.google.com/github/stephenfrein/csc8491/blob/main/MongoClusterExamples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# install a Python library that interacts with MongoDB
!pip install pymongo


Collecting pymongo
  Downloading pymongo-4.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dnspython-2.7.0-py3-none-any.whl (313 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.6/313.6 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dnspython, pymongo
Successfully installed dnspython-2.7.0 pymongo-4.13.2


In [2]:
# set up your credentials - same as your Oracle credentials
username = ''
password = ''

In [4]:
# test your connection - if it fails, check username/password
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
uri = "mongodb+srv://"+username+":"+password+"@cluster0.yzyehcz.mongodb.net"
# create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))
# send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [9]:
# OBSERVE THE PERFORMANCE IMPACTS OF DIFFERENT WRITE CONCERNS

import pandas as pd
import time
from pymongo import WriteConcern

# transactions in a CSV file in an AWS S3 bucket
transactions_url = 'https://csc8491.s3.amazonaws.com/mongo_transactions_24.csv'
# name of collection
trans_collection = 'transactions'

# you have a database that matches your username
db = client[username]
collection = db[trans_collection]

# get rid of collection if it already exists
try:
  collection.drop()
except e:
  print(e)

# function to load our collection from the S3 file
# do it in a loop to better see performance impacts
def load_collection(collection, filename):
    df = pd.read_csv(filename, header=0)
    df = df.iloc[:500] # grab first 200 records
    for index, row in df.iterrows():
      collection.insert_one(row.to_dict())

# reference to the collection for loading
# manipulate write concern to affect speed

# default gives majority - 2 nodes in this 3-node cluster (primary + 1 secondary)
trans_load = db.get_collection(trans_collection)
# setting write concern to 0 means no write acknowledgement needed - super fast
# trans_load = db.get_collection(trans_collection, write_concern=WriteConcern(w=0))
# setting write concern to 1 means just the primary has to acknowledge
# trans_load = db.get_collection(trans_collection, write_concern=WriteConcern(w=1))

# start the clock
start = time.time()
# load the data
load_collection(trans_load, transactions_url)
# stop the clock
end = time.time()
print("Run took: " + str(end - start) + ' seconds')

Run took: 93.4885311126709 seconds
