# Lab01 - Introduction

In this tutorial, we will explore the following tasks:
- Installing Python libraries on Google Colab
- Connecting Google Colab to Atlas MongoDB
- Utilizing CRUD methods (Create, Read, Update, and Delete)
- Employing the find method

## Step 1: Setup packages

We will need to install (i) the `pymongo` python library to create a new mongodb database and collection to store the data, and (ii) the `pandas` library as to help us to interact with a sample dataset easily.


In [31]:
!pip install -q pymongo
!pip install pandas



## Step 2: Import packages, connect to mongo, and download the data (that will be imported)

In [32]:
# Import necessary packages
import pandas as pd
from pymongo import MongoClient, ASCENDING
from bson.objectid import ObjectId

In [33]:
# TODO: copy paste your connection string (see slide 22, and REMEMBER to put your admin password in the string)
connection_string = "mongodb+srv://nicolaspolycarpou:1234@cluster0.sdjdgyg.mongodb.net/?retryWrites=true&w=majority"
database = "Lab01"
collection = "weather"

In [34]:
# We create a MongoClient instance
client = MongoClient(connection_string)

In [35]:
# From that instance, we need to get the connection for the desired database
db = client[database]

In [36]:
# Let's see how the MongoClient object looks like
db

Database(MongoClient(host=['ac-peuvyqn-shard-00-00.sdjdgyg.mongodb.net:27017', 'ac-peuvyqn-shard-00-02.sdjdgyg.mongodb.net:27017', 'ac-peuvyqn-shard-00-01.sdjdgyg.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, retrywrites=True, w='majority', authsource='admin', replicaset='atlas-hydn3w-shard-0', tls=True), 'Lab01')

In [37]:
# Now, let's load a sample JSON dataset (from a github repository - see slide 29) into a pandas DataFrame
# For this case, we have used the weather sample dataset
url = "https://raw.githubusercontent.com/neelabalan/mongodb-sample-dataset/main/sample_weatherdata/data.json"
df = pd.read_json(url, lines=True)

In [38]:
# Convert DataFrame to a list of dictionaries (one dictionary per document)
df = df.drop(columns=["_id"]) # we will drop the _ids, so that mongo will create new ones
data = df.to_dict(orient='records')

In [39]:
# Let's explore how the object looks like (by printing the list's first object)
data[0]

{'st': 'x+47600-047900',
 'ts': {'$date': {'$numberLong': '447339600000'}},
 'position': {'type': 'Point',
  'coordinates': [{'$numberDouble': '-47.9'}, {'$numberDouble': '47.6'}]},
 'elevation': {'$numberInt': '9999'},
 'callLetters': 'VCSZ',
 'qualityControlProcess': 'V020',
 'dataSource': 4,
 'type': 'FM-13',
 'airTemperature': {'value': {'$numberDouble': '-3.1'}, 'quality': '1'},
 'dewPoint': {'value': {'$numberDouble': '999.9'}, 'quality': '9'},
 'pressure': {'value': {'$numberDouble': '1015.3'}, 'quality': '1'},
 'wind': {'direction': {'angle': {'$numberInt': '999'}, 'quality': '9'},
  'type': '9',
  'speed': {'rate': {'$numberDouble': '999.9'}, 'quality': '9'}},
 'visibility': {'distance': {'value': {'$numberInt': '999999'},
   'quality': '9'},
  'variability': {'value': 'N', 'quality': '9'}},
 'skyCondition': {'ceilingHeight': {'value': {'$numberInt': '99999'},
   'quality': '9',
   'determination': '9'},
  'cavok': 'N'},
 'sections': ['AG1'],
 'precipitationEstimatedObservatio

## Step 3: CRUD Methods
- Insert data into MongoDB
- Count the number of documents in the collection
- Retrieve the last inserted document
- Delete the last inserted document.
- Update the first inserted document.

In [40]:
# The insert_many method is used to insert the data into the specified collection.
db[collection].insert_many(data)

InsertManyResult([ObjectId('65c233da71a269129b3d3685'), ObjectId('65c233da71a269129b3d3686'), ObjectId('65c233da71a269129b3d3687'), ObjectId('65c233da71a269129b3d3688'), ObjectId('65c233da71a269129b3d3689'), ObjectId('65c233da71a269129b3d368a'), ObjectId('65c233da71a269129b3d368b'), ObjectId('65c233da71a269129b3d368c'), ObjectId('65c233da71a269129b3d368d'), ObjectId('65c233da71a269129b3d368e'), ObjectId('65c233da71a269129b3d368f'), ObjectId('65c233da71a269129b3d3690'), ObjectId('65c233da71a269129b3d3691'), ObjectId('65c233da71a269129b3d3692'), ObjectId('65c233da71a269129b3d3693'), ObjectId('65c233da71a269129b3d3694'), ObjectId('65c233da71a269129b3d3695'), ObjectId('65c233da71a269129b3d3696'), ObjectId('65c233da71a269129b3d3697'), ObjectId('65c233da71a269129b3d3698'), ObjectId('65c233da71a269129b3d3699'), ObjectId('65c233da71a269129b3d369a'), ObjectId('65c233da71a269129b3d369b'), ObjectId('65c233da71a269129b3d369c'), ObjectId('65c233da71a269129b3d369d'), ObjectId('65c233da71a269129b3d36

In [41]:
# Count documents
db[collection].count_documents({})

19999

In [43]:
# Get the last inserted document based on the _id field in descending order
last_inserted_document = db[collection].find_one(sort=[('_id', -1)])
last_inserted_document

{'_id': ObjectId('65c233da71a269129b3d5d94'),
 'st': 'x+36700+122800',
 'ts': {'$date': {'$numberLong': '447940800000'}},
 'position': {'type': 'Point',
  'coordinates': [{'$numberDouble': '122.8'}, {'$numberDouble': '36.7'}]},
 'elevation': {'$numberInt': '9999'},
 'callLetters': 'BPGU',
 'qualityControlProcess': 'V020',
 'dataSource': 4,
 'type': 'FM-13',
 'airTemperature': {'value': {'$numberDouble': '1.9'}, 'quality': '1'},
 'dewPoint': {'value': {'$numberDouble': '999.9'}, 'quality': '9'},
 'pressure': {'value': {'$numberDouble': '1021.4'}, 'quality': '1'},
 'wind': {'direction': {'angle': {'$numberInt': '40'}, 'quality': '1'},
  'type': 'N',
  'speed': {'rate': {'$numberInt': '8'}, 'quality': '1'}},
 'visibility': {'distance': {'value': {'$numberInt': '20000'}, 'quality': '1'},
  'variability': {'value': 'N', 'quality': '9'}},
 'skyCondition': {'ceilingHeight': {'value': {'$numberInt': '1500'},
   'quality': '1',
   'determination': 'C'},
  'cavok': 'N'},
 'sections': ['AG1', 'AY

In [44]:
# Delete a specific document (in this case scenario, the last document that was inserted)
db[collection].delete_one({"_id": ObjectId(last_inserted_document["_id"])})

DeleteResult({'n': 1, 'electionId': ObjectId('7fffffff0000000000000368'), 'opTime': {'ts': Timestamp(1707226123, 69), 't': 872}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1707226123, 1106), 'signature': {'hash': b'\xd8\x13\x12\xef\xb1\x1c\x8e\xda\x03\x91\x97\x857\xc29._={\xdd', 'keyId': 7279305758049566721}}, 'operationTime': Timestamp(1707226123, 69)}, acknowledged=True)

In [45]:
# TODO: Check that it is indeed deleted
db[collection].find_one(last_inserted_document['_id'])

In [46]:
# To insert a new document
db[collection].insert_one(last_inserted_document)

InsertOneResult(ObjectId('65c233da71a269129b3d5d94'), acknowledged=True)

In [47]:
# Let's try to update the first inserted document. So, firstly we need to get the id to know which one we want to update.
# TODO: get the first inserted document's id:
first_inserted_document = db[collection].find_one(sort=[('_id', ASCENDING)])
update_operation = {"$set": {"type": 'FM-8'}}
db[collection].update_one({"_id": ObjectId(first_inserted_document["_id"])}, update_operation)
# We print the initial document
first_inserted_document

{'_id': ObjectId('65c22f3771a269129b3d0f74'),
 'st': 'x+47600-047900',
 'ts': {'$date': {'$numberLong': '447339600000'}},
 'position': {'type': 'Point',
  'coordinates': [{'$numberDouble': '-47.9'}, {'$numberDouble': '47.6'}]},
 'elevation': {'$numberInt': '9999'},
 'callLetters': 'VCSZ',
 'qualityControlProcess': 'V020',
 'dataSource': 4,
 'type': 'FM-8',
 'airTemperature': {'value': {'$numberDouble': '-3.1'}, 'quality': '1'},
 'dewPoint': {'value': {'$numberDouble': '999.9'}, 'quality': '9'},
 'pressure': {'value': {'$numberDouble': '1015.3'}, 'quality': '1'},
 'wind': {'direction': {'angle': {'$numberInt': '999'}, 'quality': '9'},
  'type': '9',
  'speed': {'rate': {'$numberDouble': '999.9'}, 'quality': '9'}},
 'visibility': {'distance': {'value': {'$numberInt': '999999'},
   'quality': '9'},
  'variability': {'value': 'N', 'quality': '9'}},
 'skyCondition': {'ceilingHeight': {'value': {'$numberInt': '99999'},
   'quality': '9',
   'determination': '9'},
  'cavok': 'N'},
 'sections'

In [48]:
# TODO: Check that it indeed has been updated, retrieve the updated document
db[collection].find_one(first_inserted_document['_id'])

{'_id': ObjectId('65c22f3771a269129b3d0f74'),
 'st': 'x+47600-047900',
 'ts': {'$date': {'$numberLong': '447339600000'}},
 'position': {'type': 'Point',
  'coordinates': [{'$numberDouble': '-47.9'}, {'$numberDouble': '47.6'}]},
 'elevation': {'$numberInt': '9999'},
 'callLetters': 'VCSZ',
 'qualityControlProcess': 'V020',
 'dataSource': 4,
 'type': 'FM-8',
 'airTemperature': {'value': {'$numberDouble': '-3.1'}, 'quality': '1'},
 'dewPoint': {'value': {'$numberDouble': '999.9'}, 'quality': '9'},
 'pressure': {'value': {'$numberDouble': '1015.3'}, 'quality': '1'},
 'wind': {'direction': {'angle': {'$numberInt': '999'}, 'quality': '9'},
  'type': '9',
  'speed': {'rate': {'$numberDouble': '999.9'}, 'quality': '9'}},
 'visibility': {'distance': {'value': {'$numberInt': '999999'},
   'quality': '9'},
  'variability': {'value': 'N', 'quality': '9'}},
 'skyCondition': {'ceilingHeight': {'value': {'$numberInt': '99999'},
   'quality': '9',
   'determination': '9'},
  'cavok': 'N'},
 'sections'

In [49]:
# Query, we want to find all documents that have recorded the as timestamp the number "447940800000"
timestamp_condition = {"ts.$date.$numberLong": "447339600000"}
result = db[collection].find(timestamp_condition)

# Print the matched documents
for document in result:
    print(document)

{'_id': ObjectId('65c22f3771a269129b3d0f74'), 'st': 'x+47600-047900', 'ts': {'$date': {'$numberLong': '447339600000'}}, 'position': {'type': 'Point', 'coordinates': [{'$numberDouble': '-47.9'}, {'$numberDouble': '47.6'}]}, 'elevation': {'$numberInt': '9999'}, 'callLetters': 'VCSZ', 'qualityControlProcess': 'V020', 'dataSource': 4, 'type': 'FM-8', 'airTemperature': {'value': {'$numberDouble': '-3.1'}, 'quality': '1'}, 'dewPoint': {'value': {'$numberDouble': '999.9'}, 'quality': '9'}, 'pressure': {'value': {'$numberDouble': '1015.3'}, 'quality': '1'}, 'wind': {'direction': {'angle': {'$numberInt': '999'}, 'quality': '9'}, 'type': '9', 'speed': {'rate': {'$numberDouble': '999.9'}, 'quality': '9'}}, 'visibility': {'distance': {'value': {'$numberInt': '999999'}, 'quality': '9'}, 'variability': {'value': 'N', 'quality': '9'}}, 'skyCondition': {'ceilingHeight': {'value': {'$numberInt': '99999'}, 'quality': '9', 'determination': '9'}, 'cavok': 'N'}, 'sections': ['AG1'], 'precipitationEstimated

In [50]:
# Query, we want to find all documents that has as coordinates a numberDouble equal to -10.1
coordinate_condition = {"position.coordinates.$numberDouble": "-10.1"}
result = db[collection].find(coordinate_condition)

# Print the matched documents
for document in result:
    print(document)

{'_id': ObjectId('65c22f3771a269129b3d1668'), 'st': 'x-00200-010100', 'ts': {'$date': {'$numberLong': '447379200000'}}, 'position': {'type': 'Point', 'coordinates': [{'$numberDouble': '-10.1'}, {'$numberDouble': '-0.2'}]}, 'elevation': {'$numberInt': '9999'}, 'callLetters': 'GOVZ', 'qualityControlProcess': 'V020', 'dataSource': 4, 'type': 'FM-13', 'airTemperature': {'value': {'$numberInt': '28'}, 'quality': '1'}, 'dewPoint': {'value': {'$numberDouble': '25.1'}, 'quality': '1'}, 'pressure': {'value': {'$numberDouble': '1012.5'}, 'quality': '1'}, 'wind': {'direction': {'angle': {'$numberInt': '170'}, 'quality': '1'}, 'type': 'N', 'speed': {'rate': {'$numberDouble': '2.1'}, 'quality': '1'}}, 'visibility': {'distance': {'value': {'$numberInt': '20000'}, 'quality': '1'}, 'variability': {'value': 'N', 'quality': '9'}}, 'skyCondition': {'ceilingHeight': {'value': {'$numberInt': '22000'}, 'quality': '1', 'determination': 'C'}, 'cavok': 'N'}, 'sections': ['AG1', 'AY1', 'GF1', 'MD1', 'MW1', 'SA1

In [51]:
# Now, let's combine the last two queries (get the document(s) that have the specific timestamp AND that coordinate)
result = db[collection].find({"$and": [timestamp_condition, coordinate_condition]})

# Print the matched documents
for document in result:
    print(document)

{'_id': ObjectId('65c22f3771a269129b3d11c5'), 'st': 'x+55400-010100', 'ts': {'$date': {'$numberLong': '447339600000'}}, 'position': {'type': 'Point', 'coordinates': [{'$numberDouble': '-10.1'}, {'$numberDouble': '55.4'}]}, 'elevation': {'$numberInt': '9999'}, 'callLetters': 'GWUK', 'qualityControlProcess': 'V020', 'dataSource': 4, 'type': 'FM-13', 'airTemperature': {'value': {'$numberDouble': '10.4'}, 'quality': '1'}, 'dewPoint': {'value': {'$numberDouble': '8.1'}, 'quality': '1'}, 'pressure': {'value': {'$numberDouble': '1033.4'}, 'quality': '1'}, 'wind': {'direction': {'angle': {'$numberInt': '260'}, 'quality': '1'}, 'type': 'N', 'speed': {'rate': {'$numberDouble': '7.7'}, 'quality': '1'}}, 'visibility': {'distance': {'value': {'$numberInt': '10000'}, 'quality': '1'}, 'variability': {'value': 'N', 'quality': '9'}}, 'skyCondition': {'ceilingHeight': {'value': {'$numberInt': '1050'}, 'quality': '1', 'determination': 'C'}, 'cavok': 'N'}, 'sections': ['AG1', 'AY1', 'GA1', 'GA2', 'GA3', '

In [52]:
# TODO: Get the document(s) that have the specific timestamp OR that coordinate)
result = db[collection].find({"$or": [timestamp_condition, coordinate_condition]})

# Print the matched documents
for document in result:
    print(document)

{'_id': ObjectId('65c22f3771a269129b3d0f74'), 'st': 'x+47600-047900', 'ts': {'$date': {'$numberLong': '447339600000'}}, 'position': {'type': 'Point', 'coordinates': [{'$numberDouble': '-47.9'}, {'$numberDouble': '47.6'}]}, 'elevation': {'$numberInt': '9999'}, 'callLetters': 'VCSZ', 'qualityControlProcess': 'V020', 'dataSource': 4, 'type': 'FM-8', 'airTemperature': {'value': {'$numberDouble': '-3.1'}, 'quality': '1'}, 'dewPoint': {'value': {'$numberDouble': '999.9'}, 'quality': '9'}, 'pressure': {'value': {'$numberDouble': '1015.3'}, 'quality': '1'}, 'wind': {'direction': {'angle': {'$numberInt': '999'}, 'quality': '9'}, 'type': '9', 'speed': {'rate': {'$numberDouble': '999.9'}, 'quality': '9'}}, 'visibility': {'distance': {'value': {'$numberInt': '999999'}, 'quality': '9'}, 'variability': {'value': 'N', 'quality': '9'}}, 'skyCondition': {'ceilingHeight': {'value': {'$numberInt': '99999'}, 'quality': '9', 'determination': '9'}, 'cavok': 'N'}, 'sections': ['AG1'], 'precipitationEstimated

In [53]:
# TODO: Create a new database and a new collection named (both of them) "test".
# Insert data the same data as before and go check them through the Atlas Mongo Dashboard
test_db = client['test']
test_db['test'].insert_many(data)

InsertManyResult([ObjectId('65c233da71a269129b3d3685'), ObjectId('65c233da71a269129b3d3686'), ObjectId('65c233da71a269129b3d3687'), ObjectId('65c233da71a269129b3d3688'), ObjectId('65c233da71a269129b3d3689'), ObjectId('65c233da71a269129b3d368a'), ObjectId('65c233da71a269129b3d368b'), ObjectId('65c233da71a269129b3d368c'), ObjectId('65c233da71a269129b3d368d'), ObjectId('65c233da71a269129b3d368e'), ObjectId('65c233da71a269129b3d368f'), ObjectId('65c233da71a269129b3d3690'), ObjectId('65c233da71a269129b3d3691'), ObjectId('65c233da71a269129b3d3692'), ObjectId('65c233da71a269129b3d3693'), ObjectId('65c233da71a269129b3d3694'), ObjectId('65c233da71a269129b3d3695'), ObjectId('65c233da71a269129b3d3696'), ObjectId('65c233da71a269129b3d3697'), ObjectId('65c233da71a269129b3d3698'), ObjectId('65c233da71a269129b3d3699'), ObjectId('65c233da71a269129b3d369a'), ObjectId('65c233da71a269129b3d369b'), ObjectId('65c233da71a269129b3d369c'), ObjectId('65c233da71a269129b3d369d'), ObjectId('65c233da71a269129b3d36

In [54]:
# TODO: Delete the collection test using the .drop() method
# note: if the database has only one collection, then both the database and connection are deleted
test_db['test'].drop()