In [None]:
%pip install --upgrade pip 
%pip install pymongo
%pip install faker

In [None]:
from uuid import uuid4
from datetime import datetime
from contextlib import closing
from time import time
from random import choice as random_choice

from pymongo import MongoClient

from faker import Faker

In [None]:
USERNAME = 'bench_user'
PASS = 'bench_pass'
HOST = 'localhost'
PORT = 27019
DB = 'bench_db'

def get_mongo_client():
    return MongoClient(host=HOST,
                       port=PORT,
                       username=USERNAME,
                       password=PASS,
                       uuidRepresentation='standard')


In [None]:
# create review collection
with closing(get_mongo_client()) as client:
   database = client[DB]

   database.drop_collection('review')

   print(database.list_collection_names())
   review_schema = {
         "bsonType": "object",
         "required": ["user_id", "movie_id", "score", "review_dt"],
         "properties": {
            "user_id": {
               "bsonType": "binData"
            },
            "movie_id": {
               "bsonType": "binData"
            },
            "score": {
               "bsonType": "int"
            },
            "text": {
               "bsonType": "string"
            },
            "review_dt": {
               "bsonType": "date"
            }
         }
      }

   review_collection = database.create_collection(
      'review',
      check_exists=False,
      validator= {
         '$jsonSchema': review_schema
      })

   print(database.list_collection_names())

   print(review_collection.index_information())
   review_collection.create_index("movie_id")
   print(review_collection.index_information())


In [None]:
# create bookmark collection
with closing(get_mongo_client()) as client:
   database = client[DB]

   database.drop_collection('bookmark')

   print(database.list_collection_names())
   bookmark_schema = {
         "bsonType": "object",
         "required": ["user_id", "movie_id"],
         "properties": {
            "user_id": {
               "bsonType": "binData"
            },
            "movie_id": {
               "bsonType": "binData"
            }
         }
      }

   bookmark_collection = database.create_collection(
      'bookmark',
      check_exists=False,
      validator= {
         '$jsonSchema': bookmark_schema
      })

   print(database.list_collection_names())

   print(bookmark_collection.index_information())
   bookmark_collection.create_index("user_id")
   print(bookmark_collection.index_information())

In [None]:
# create review_rating collection
with closing(get_mongo_client()) as client:
    database = client[DB]

    database.drop_collection('review_rating')

    print(database.list_collection_names())
    review_rating_schema = {
        "bsonType": "object",
        "required": ["user_id", "review_id", "score"],
        "properties": {
            "user_id": {
                "bsonType": "binData"
            },
            "review_id": {
                "bsonType": "binData"
            },
            "score": {
                "bsonType": "int"
            }
        }
    }

    review_rating_collection = database.create_collection(
        'review_rating',
        check_exists=False,
        validator= {
            '$jsonSchema': review_rating_schema,
            'score': {
                '$in': [-1, 1]
            }
        }
    )

    print(database.list_collection_names())

    print(review_rating_collection.index_information())
    review_rating_collection.create_index("review_id")
    print(review_rating_collection.index_information())

In [185]:
fake: Faker = Faker()

USERS_COUNT = 500
USERS_IDS = [uuid4() for _ in range(USERS_COUNT)]

MOVIES_COUNT = 1000
MOVIES_IDS = [uuid4() for _ in range(MOVIES_COUNT)]

In [165]:
# insert review
with closing(get_mongo_client()) as client:
    collection = client[DB]['review']

    start_time: float = time()
    for user_id in USERS_IDS:
        for movie_id in MOVIES_IDS:
            new_review = {
                'user_id': uuid4(),
                'movie_id': uuid4(),
                'score': fake.random_int(min=0, max=10),
                'text': fake.text(),
                'review_dt': fake.date_time_between(start_date="-1y", end_date="now")
            }

            collection.insert_one(new_review)

    insertion_time: float = time() - start_time
    insertion_speed: float = round(USERS_COUNT * MOVIES_COUNT / insertion_time, 2)

    print(f'Insert {USERS_COUNT * MOVIES_COUNT} elements.\nSpeed: {insertion_speed:,} records/sec')

Insert 50000 elements.
Speed: 674.79 records/sec


In [157]:
# insert bookmark
with closing(get_mongo_client()) as client:
    collection = client[DB]['bookmark']

    start_time: float = time()
    for user_id in USERS_IDS:
        for movie_id in MOVIES_IDS:
            new_bookmark = {
                'user_id': uuid4(),
                'movie_id': uuid4()
            }
            collection.insert_one(new_bookmark)

    insertion_time: float = time() - start_time
    insertion_speed: float = round(USERS_COUNT * MOVIES_COUNT / insertion_time, 2)

    print(f'Insert {USERS_COUNT * MOVIES_COUNT} elements.\nSpeed: {insertion_speed:,} records/sec')

Insertion speed: 893.8 records/sec


In [187]:
# insert review rating
with closing(get_mongo_client()) as client:
    collection = client[DB]['review_rating']

    reviews_count_for_bench = 100
    users_count_for_bench = 500

    query_result = client[DB]['review'].find({})
    all_reviews_ids = [review.get('id') for review in query_result][:reviews_count_for_bench]

    start_time: float = time()
    for user_id in USERS_IDS[:users_count_for_bench]:
        for review_id in all_reviews_ids:
            score = random_choice([-1, 1])
            new_review_rating = {
                'user_id': uuid4(),
                'review_id': uuid4(),
                'score': score
            }
            collection.insert_one(new_review_rating)

    insertion_time: float = time() - start_time
    insertion_speed: float = round((
        reviews_count_for_bench * users_count_for_bench) / insertion_time, 2)

    print(f'Insert {reviews_count_for_bench * users_count_for_bench} elements.\nSpeed: {insertion_speed:,} records/sec')

Insert 50000 elements.
Speed: 652.38 records/sec


In [142]:
# select review
with closing(get_mongo_client()) as client:
    collection = client[DB]['review']
    start_time = time()
    
    all_reviews = collection.find({})

    selection_time = float(time() - start_time)

    reviews_len = len(list(all_reviews))
    selection_speed = reviews_len / selection_time

    print(f'Select {reviews_len} elements.\nSpeed: {selection_speed} records/sec')

25000
Selecting speed: 25687800.09799118 records/sec


In [159]:
# select bookmarks
with closing(get_mongo_client()) as client:
    collection = client[DB]['bookmark']
    start_time = time()
    all_bookmarks = collection.find({})

    selection_time = float(time() - start_time)
    print(insertion_time)

    bookmarks_len = len(list(all_bookmarks))
    print(bookmarks_len)

    selection_speed = bookmarks_len / selection_time

    print(f'Select {bookmarks_len} elements.\nSpeed: {selection_speed} records/sec')

0.0009984970092773438
75000
Selecting speed: 75112893.98280802 records/sec


In [193]:
# select review ratings
with closing(get_mongo_client()) as client:
    collection = client[DB]['review_rating']
    start_time: float = time()

    all_review_ratings = collection.find({})

    selection_time: float = float(time() - start_time)
    print(selection_time)

    reviews_ratings_len = len(list(all_review_ratings))
    selection_speed: float = round(reviews_ratings_len / selection_time , 2)

    print(f'Select {reviews_ratings_len} elements.\nSpeed: {selection_speed} records/sec')

0.0


ZeroDivisionError: float division by zero

In [175]:
# update review
with closing(get_mongo_client()) as client:
    collection = client[DB]['review']

    all_reviews = list(collection.find({}))
    reviews_len = len(all_reviews)

    print(all_reviews[0])

    start_time: float = time()
    for review in all_reviews:
        filter = {
            'movie_id': review.get('movie_id'),
            'user_id': review.get('user_id'),
        }
        new_values = {
            'score': fake.random_int(min=0, max=10),
            'text': fake.text(),
            'review_dt': fake.date_time_between(start_date="-1y", end_date="now")
        }

        collection.update_one(filter, {'$set': new_values})

    updation_time: float = float(time() - start_time)
    print(updation_time)
    updation_speed: float = reviews_len / updation_time

    print(f'Update {reviews_len} elements.\nSpeed: {updation_speed} records/sec')

{'_id': ObjectId('661540f54b124f2fc508d78d'), 'user_id': UUID('9b5ddd09-466a-4c22-b869-b8650d421c2f'), 'movie_id': UUID('49db14d9-0ad6-44dc-91bd-27286018b13c'), 'score': 10, 'text': 'No maybe happy nothing toward environmental. Agreement certainly quality yet according cost car remain.\nItself try friend debate break. Eat total nice history avoid meet.', 'review_dt': datetime.datetime(2024, 2, 8, 23, 49, 2)}
141.50101113319397
Update 75000 elements.
Speed: 530.031548180267 records/sec
