# This tutorials is sourced from [Bulk Write Operations](https://pymongo.readthedocs.io/en/stable/examples/bulk.html) that official documentation.

In [1]:
from dotenv import dotenv_values
from pymongo import MongoClient
import pymongo

In [2]:
config = dotenv_values(".env")

In [3]:
client = MongoClient(config['ATLAS_URI'])

In [39]:
db = client.bulk_example

# Bulk Insert

A batch of documents can be inserted by passing a list to the `insert_many()` method. PyMongo will automatically split the batch into smaller sub-batches based on the maximum message size accepted by MongoDB, supporting very large bulk insert operations.

In [40]:
db.test.insert_many(
    [{"i": i} for i in range(10000)]
).inserted_ids

[ObjectId('6594f939f5fe21ec53ce5141'),
 ObjectId('6594f939f5fe21ec53ce5142'),
 ObjectId('6594f939f5fe21ec53ce5143'),
 ObjectId('6594f939f5fe21ec53ce5144'),
 ObjectId('6594f939f5fe21ec53ce5145'),
 ObjectId('6594f939f5fe21ec53ce5146'),
 ObjectId('6594f939f5fe21ec53ce5147'),
 ObjectId('6594f939f5fe21ec53ce5148'),
 ObjectId('6594f939f5fe21ec53ce5149'),
 ObjectId('6594f939f5fe21ec53ce514a'),
 ObjectId('6594f939f5fe21ec53ce514b'),
 ObjectId('6594f939f5fe21ec53ce514c'),
 ObjectId('6594f939f5fe21ec53ce514d'),
 ObjectId('6594f939f5fe21ec53ce514e'),
 ObjectId('6594f939f5fe21ec53ce514f'),
 ObjectId('6594f939f5fe21ec53ce5150'),
 ObjectId('6594f939f5fe21ec53ce5151'),
 ObjectId('6594f939f5fe21ec53ce5152'),
 ObjectId('6594f939f5fe21ec53ce5153'),
 ObjectId('6594f939f5fe21ec53ce5154'),
 ObjectId('6594f939f5fe21ec53ce5155'),
 ObjectId('6594f939f5fe21ec53ce5156'),
 ObjectId('6594f939f5fe21ec53ce5157'),
 ObjectId('6594f939f5fe21ec53ce5158'),
 ObjectId('6594f939f5fe21ec53ce5159'),
 ObjectId('6594f939f5fe21

In [41]:
db.test.count_documents({})

10000

# Mixed Bulk write Operations
PyMongo also supports executing mixed bulk write operations. A batch of insert, update, and remove operations can be executed togather using the bulk write operation API.

## Ordered Bulk Write Operations
Ordered bulk write operations are batched and sent to the server in the order provieded for serial execution. The return value is an instance of `BulkWriteResult` describing the type and count of operations performed.

In [43]:
from pprint import pprint
from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateOne

In [44]:
result = db.test.bulk_write(
    [
        DeleteMany({}), # Remove all documents from the previous example.
        InsertOne({"_id": 1}),
        InsertOne({"_id": 2}),
        InsertOne({"_id": 3}),
        UpdateOne({"_id": 1}, {"$set": {"foo": 'bar'}}),
        UpdateOne({"_id": 4}, {"$inc": {"j": 1}}, upsert=True), # upsert: update + insert 존재하지 않는 경우 insert 존재하는 경우 update
        ReplaceOne({"j": 1}, {"j": 2}),
        
    ]
)

In [45]:
pprint(result.bulk_api_result)

{'nInserted': 3,
 'nMatched': 2,
 'nModified': 2,
 'nRemoved': 10000,
 'nUpserted': 1,
 'upserted': [{'_id': 4, 'index': 5}],
 'writeConcernErrors': [],
 'writeErrors': []}


The first write failure that occurs (e.g. duplicate key error) aborts the remaining operations, and PyMongo raises `BulkWriteError`. The `details` attribute of the exception instance provides the execution results up until the failure occurred and details about the failure - including the operation that caused the failure.

In [46]:
from pymongo import DeleteOne
from pymongo.errors import BulkWriteError

In [47]:
requests = [
    ReplaceOne({"j": 2}, {"i": 5}),
    InsertOne({"_id": 4}), # Violates the unique key constraint on _id.
]

try:
    db.test.bulk_write(requests)
except BulkWriteError as bwe:
    pprint(bwe.details)

{'nInserted': 0,
 'nMatched': 1,
 'nModified': 1,
 'nRemoved': 0,
 'nUpserted': 0,
 'upserted': [],
 'writeConcernErrors': [],
 'writeErrors': [{'code': 11000,
                  'errmsg': 'E11000 duplicate key error collection: '
                            'bulk_example.test index: _id_ dup key: { _id: 4 }',
                  'index': 1,
                  'keyPattern': {'_id': 1},
                  'keyValue': {'_id': 4},
                  'op': {'_id': 4}}]}


# Unordered Bulk Write Operations
Unordered bulk write operations are batched and sent to the server in __arbitrary order__ where they may be executed in parallel. Any errors that occur are repored after all operations are attempted.

In the next example the first and third operations fail due to the unique constraint on _id. Since we are doing unordered execution the second and fourth operations succeed.

In [50]:
requests = [
    InsertOne({"_id": 1}), # occured error
    DeleteOne({"_id": 2}),
    InsertOne({"_id": 3}), # occured error
    ReplaceOne({"_id": 4}, {"i": 1})
]

try:
    db.test.bulk_write(requests, ordered=False)
except BulkWriteError as bwe:
    pprint(bwe.details)

{'nInserted': 0,
 'nMatched': 1,
 'nModified': 1,
 'nRemoved': 1,
 'nUpserted': 0,
 'upserted': [],
 'writeConcernErrors': [],
 'writeErrors': [{'code': 11000,
                  'errmsg': 'E11000 duplicate key error collection: '
                            'bulk_example.test index: _id_ dup key: { _id: 1 }',
                  'index': 0,
                  'keyPattern': {'_id': 1},
                  'keyValue': {'_id': 1},
                  'op': {'_id': 1}},
                 {'code': 11000,
                  'errmsg': 'E11000 duplicate key error collection: '
                            'bulk_example.test index: _id_ dup key: { _id: 3 }',
                  'index': 2,
                  'keyPattern': {'_id': 1},
                  'keyValue': {'_id': 3},
                  'op': {'_id': 3}}]}


In [51]:
for i in db.test.find():
    print(i)

{'_id': 1, 'foo': 'bar'}
{'_id': 3}
{'_id': 4, 'i': 1}


# Write Concern

Bulk operations are executed with the `write_concern` of the collection they are executed against. Write concern errors (e.g. wtimeout) will be reported after all operations are attempted, regardless of execution order.


write_concern은 MongoDB에 쓰기 작업을 수행할 때 데이터의 일관성과 지속성을 관리하기 위한 설정입니다. 이 설정은 다양한 파라미터들을 가지고 있으며, 주요한 몇 가지는 다음과 같습니다:

1. w (Write Concern): 쓰기 작업을 얼마나 많은 노드에 복제할지 결정합니다. 값으로는 정수나 "majority"와 같은 문자열이 올 수 있습니다. 예를 들어, w=1은 최소한 한 노드에 쓰기가 성공해야 함을 의미하며, w=majority는 대다수의 노드에 쓰기가 성공해야 함을 의미합니다.  


2. j (Journaling): MongoDB의 journal에 기록되기 전에 쓰기 작업을 성공으로 간주할지 여부를 지정합니다. j=true로 설정하면 journal에 기록되기 전에 성공으로 간주하며, j=false로 설정하면 journal에 기록된 후에 성공으로 간주합니다.  


3. wtimeout (Write Timeout): 쓰기 작업이 성공하기까지 기다리는 최대 시간을 설정합니다. 초 단위로 값을 설정하며, 해당 시간이 초과되면 오류가 발생합니다.  


`write_concern` 설정을 통해 사용자는 데이터의 일관성과 안정성을 조절하고, 시스템의 성능과 가용성을 조절할 수 있습니다. 수 있습니다.

In [52]:
from pymongo import WriteConcern
coll = db.get_collection(
    'test', write_concern = WriteConcern(w=3, wtimeout=1)
)

try: 
    coll.bulk_write([InsertOne({"a": i}) for i in range(4)])
except BulkWriteError as bwe:
    pprint(bwe.details)
    

In [58]:
for i in coll.find():
    print(i)

{'_id': 1, 'foo': 'bar'}
{'_id': 3}
{'_id': 4, 'i': 1}
{'_id': ObjectId('65950159f5fe21ec53ce7851'), 'a': 0}
{'_id': ObjectId('65950159f5fe21ec53ce7852'), 'a': 1}
{'_id': ObjectId('65950159f5fe21ec53ce7853'), 'a': 2}
{'_id': ObjectId('65950159f5fe21ec53ce7854'), 'a': 3}
