In [1]:
%pip install --upgrade pip 
%pip install pymongo
%pip install faker

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
from uuid import uuid4
from datetime import datetime
from contextlib import closing
from time import time
from random import choice as random_choice

from pymongo import MongoClient

from faker import Faker

In [3]:
USERNAME = 'bench_user'
PASS = 'bench_pass'
HOST = 'localhost'
PORT = 27019
DB = 'bench_db'

def get_mongo_client():
    return MongoClient(host=HOST,
                       port=PORT,
                       uuidRepresentation='standard')
# def get_mongo_client():
#     return MongoClient(host=HOST,
#                        port=PORT,
#                        username=USERNAME,
#                        password=PASS,
#                        uuidRepresentation='standard')


In [4]:
# create review collection
with closing(get_mongo_client()) as client:
   database = client[DB]

   database.drop_collection('review')

   print(database.list_collection_names())
   review_schema = {
         "bsonType": "object",
         "required": ["user_id", "movie_id", "score", "review_dt"],
         "properties": {
            "user_id": {
               "bsonType": "binData"
            },
            "movie_id": {
               "bsonType": "binData"
            },
            "score": {
               "bsonType": "int"
            },
            "text": {
               "bsonType": "string"
            },
            "review_dt": {
               "bsonType": "date"
            }
         }
      }

   review_collection = database.create_collection(
      'review',
      check_exists=False,
      validator= {
         '$jsonSchema': review_schema
      })

   print(database.list_collection_names())

   print(review_collection.index_information())
   review_collection.create_index("movie_id")
   print(review_collection.index_information())


[]
['review']
{'_id_': {'v': 2, 'key': [('_id', 1)]}}
{'_id_': {'v': 2, 'key': [('_id', 1)]}, 'movie_id_1': {'v': 2, 'key': [('movie_id', 1)]}}


In [5]:
# create bookmark collection
with closing(get_mongo_client()) as client:
   database = client[DB]

   database.drop_collection('bookmark')

   print(database.list_collection_names())
   bookmark_schema = {
         "bsonType": "object",
         "required": ["user_id", "movie_id"],
         "properties": {
            "user_id": {
               "bsonType": "binData"
            },
            "movie_id": {
               "bsonType": "binData"
            }
         }
      }

   bookmark_collection = database.create_collection(
      'bookmark',
      check_exists=False,
      validator= {
         '$jsonSchema': bookmark_schema
      })

   print(database.list_collection_names())

   print(bookmark_collection.index_information())
   bookmark_collection.create_index("user_id")
   print(bookmark_collection.index_information())

['review']
['bookmark', 'review']
{'_id_': {'v': 2, 'key': [('_id', 1)]}}
{'_id_': {'v': 2, 'key': [('_id', 1)]}, 'user_id_1': {'v': 2, 'key': [('user_id', 1)]}}


In [6]:
# create review_rating collection
with closing(get_mongo_client()) as client:
    database = client[DB]

    database.drop_collection('review_rating')

    print(database.list_collection_names())
    review_rating_schema = {
        "bsonType": "object",
        "required": ["user_id", "review_id", "score"],
        "properties": {
            "user_id": {
                "bsonType": "binData"
            },
            "review_id": {
                "bsonType": "binData"
            },
            "score": {
                "bsonType": "int"
            }
        }
    }

    review_rating_collection = database.create_collection(
        'review_rating',
        check_exists=False,
        validator= {
            '$jsonSchema': review_rating_schema,
            'score': {
                '$in': [-1, 1]
            }
        }
    )

    print(database.list_collection_names())

    print(review_rating_collection.index_information())
    review_rating_collection.create_index("review_id")
    print(review_rating_collection.index_information())

['bookmark', 'review']
['review_rating', 'bookmark', 'review']
{'_id_': {'v': 2, 'key': [('_id', 1)]}}
{'_id_': {'v': 2, 'key': [('_id', 1)]}, 'review_id_1': {'v': 2, 'key': [('review_id', 1)]}}


In [7]:
fake: Faker = Faker()

USERS_COUNT = 500
USERS_IDS = [uuid4() for _ in range(USERS_COUNT)]

MOVIES_COUNT = 1000
MOVIES_IDS = [uuid4() for _ in range(MOVIES_COUNT)]

In [8]:
# insert review
with closing(get_mongo_client()) as client:
    collection = client[DB]['review']

    start_time: float = time()
    for user_id in USERS_IDS:
        for movie_id in MOVIES_IDS:
            new_review = {
                'user_id': uuid4(),
                'movie_id': uuid4(),
                'score': fake.random_int(min=0, max=10),
                'text': fake.text(),
                'review_dt': fake.date_time_between(start_date="-1y", end_date="now")
            }

            collection.insert_one(new_review)

    insertion_time: float = time() - start_time
    insertion_speed: float = round(USERS_COUNT * MOVIES_COUNT / insertion_time, 2)

    print(f'Insert {USERS_COUNT * MOVIES_COUNT} elements.\nSpeed: {insertion_speed:,} records/sec')

Insert 500000 elements.
Speed: 288.8 records/sec


In [9]:
# insert bookmark
with closing(get_mongo_client()) as client:
    collection = client[DB]['bookmark']

    start_time: float = time()
    for user_id in USERS_IDS:
        for movie_id in MOVIES_IDS:
            new_bookmark = {
                'user_id': uuid4(),
                'movie_id': uuid4()
            }
            collection.insert_one(new_bookmark)

    insertion_time: float = time() - start_time
    insertion_speed: float = round(USERS_COUNT * MOVIES_COUNT / insertion_time, 2)

    print(f'Insert {USERS_COUNT * MOVIES_COUNT} bookmarks.\nInsertion speed: {insertion_speed:,} records/sec')

Insert 500000 elements.
Insertion speed: 313.44 records/sec


In [10]:
# insert review rating
with closing(get_mongo_client()) as client:
    collection = client[DB]['review_rating']

    reviews_count_for_bench = 1000
    users_count_for_bench = 500

    query_result = client[DB]['review'].find({})
    all_reviews_ids = [review.get('id') for review in query_result][:reviews_count_for_bench]

    start_time: float = time()
    for user_id in USERS_IDS[:users_count_for_bench]:
        for review_id in all_reviews_ids:
            score = random_choice([-1, 1])
            new_review_rating = {
                'user_id': uuid4(),
                'review_id': uuid4(),
                'score': score
            }
            collection.insert_one(new_review_rating)

    insertion_time: float = time() - start_time
    insertion_speed: float = round((
        reviews_count_for_bench * users_count_for_bench) / insertion_time, 2)

    print(f'Insert {reviews_count_for_bench * users_count_for_bench} elements.\nSpeed: {insertion_speed:,} records/sec')

Insert 500000 elements.
Speed: 300.94 records/sec


In [22]:
# select review
with closing(get_mongo_client()) as client:
    collection = client[DB]['review']

    start_time = time()
    all_reviews = list(collection.find({}))
    selection_time = float(time() - start_time)

    reviews_len = len(all_reviews)
    selection_speed = reviews_len / selection_time

    print(f'Select {reviews_len} reviews.\nSpeed: {selection_speed} records/sec')

Select 500000 reviews.
Speed: 64390.01864319769 records/sec


In [23]:
# select bookmarks
with closing(get_mongo_client()) as client:
    collection = client[DB]['bookmark']

    start_time = time()
    all_bookmarks = list(collection.find({}))
    selection_time = float(time() - start_time)

    bookmarks_len = len(all_bookmarks)
    selection_speed = bookmarks_len / selection_time

    print(f'Select {bookmarks_len} bookmarks.\nSpeed: {selection_speed} records/sec')

Select 500000 bookmarks.
Speed: 92491.15355585288 records/sec


In [24]:
# select review ratings
with closing(get_mongo_client()) as client:
    collection = client[DB]['review_rating']

    start_time: float = time()
    all_review_ratings = list(collection.find({}))
    selection_time: float = float(time() - start_time)

    reviews_ratings_len = len(list(all_review_ratings))
    selection_speed: float = round(reviews_ratings_len / selection_time , 2)

    print(f'Select {reviews_ratings_len} reviews ratings.\nSpeed: {selection_speed} records/sec')

Select 500000 reviews ratings.
Speed: 86625.94 records/sec


In [25]:
# update review
with closing(get_mongo_client()) as client:
    collection = client[DB]['review']

    all_reviews = list(collection.find({}))
    reviews_len = len(all_reviews)

    start_time: float = time()
    for review in all_reviews:
        filter = {
            'movie_id': review.get('movie_id'),
            'user_id': review.get('user_id'),
        }
        new_values = {
            'score': fake.random_int(min=0, max=10),
            'text': fake.text(),
            'review_dt': fake.date_time_between(start_date="-1y", end_date="now")
        }

        collection.update_one(filter, {'$set': new_values})

    updation_time: float = float(time() - start_time)
    print(updation_time)
    updation_speed: float = reviews_len / updation_time

    print(f'Update {reviews_len} reviews.\nUpdation speed: {updation_speed} records/sec')