## Enforce a schema on a collection of MongoDB atlas

### Working on a new collection

We can setup a `enforce_schema()` function inside `insert_document()` so that every documents are checked before they are inserted into the collection. 

In [None]:
from pymongo import MongoClient
from pymongo.errors import WriteError

# Connect to MongoDB
client = MongoClient('<mongodb_connection_string>')
db = client['your_database']
collection = db['your_collection']

# Define the schema
schema = {
    'name': str,
    'age': int,
    'email': str
}

# Function to enforce the schema
def enforce_schema(document):
    for field, data_type in schema.items():
        if field not in document or not isinstance(document[field], data_type):
            raise WriteError(f"Invalid schema for field '{field}'")

# Function to insert a document with enforced schema
def insert_document(document):
    enforce_schema(document)
    collection.insert_one(document)
    print("Document inserted successfully.")

# Example usage
document1 = {
    'name': 'John Doe',
    'age': 30,
    'email': 'john.doe@example.com'
}
insert_document(document1)

document2 = {
    'name': 'Jane Smith',
    'age': '25',  # Invalid age type
    'email': 'jane.smith@example.com'
}

try:
    insert_document(document2)
except WriteError as e:
    print(e)

## Working on existing collection

- Iterate over the documents in the collection, validate each document against the schema
- Store the set of invalid documents into a list. 
- Print out the set of invalid document id in the final step

In [None]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('<mongodb_connection_string>')
db = client['your_database']
collection = db['your_collection']

# Define the schema
schema = {
    'name': str,
    'age': int,
    'email': str
}

# Function to enforce the schema for an existing collection
def enforce_collection_schema():
    invalid_documents = []

    for document in collection.find():
        for field, data_type in schema.items():
            if field not in document or not isinstance(document[field], data_type):
                invalid_documents.append(document)
                break

    return invalid_documents

# Usage
invalid_docs = enforce_collection_schema()

# Print the invalid documents
for doc in invalid_docs:
    print(f"Invalid document: {doc}")