In [None]:
%pip install --upgrade pip 
%pip install pymongo
%pip install faker

In [80]:
from enum import Enum
from typing import List
from uuid import UUID, uuid4
from datetime import datetime
from contextlib import closing
from time import time
from random import choice as random_choice

from pymongo import MongoClient

from faker import Faker

In [81]:
USERNAME = 'bench_user'
PASS = 'bench_pass'
HOST = 'localhost'
PORT = 27018
DB = 'bench_db'

def get_mongo_client():
    return MongoClient(host=HOST,
                       port=PORT,
                       username=USERNAME,
                       password=PASS,
                       uuidRepresentation='standard')


In [92]:
# create review collection
with closing(get_mongo_client()) as client:
   database = client[DB]

   database.drop_collection('review')

   print(database.list_collection_names())
   review_schema = {
         "bsonType": "object",
         "required": [ "id", "user_id", "movie_id", "score", "review_dt"],
         "properties": {
            "id": {
               "bsonType": "binData",
            },
            "user_id": {
               "bsonType": "binData"
            },
            "movie_id": {
               "bsonType": "binData"
            },
            "score": {
               "bsonType": "int"
            },
            "text": {
               "bsonType": "string"
            },
            "review_dt": {
               "bsonType": "date"
            }
         }
      }

   review_collection = database.create_collection(
      'review',
      check_exists=False,
      validator= {
         '$jsonSchema': review_schema
      })

   print(database.list_collection_names())

['review_rating', 'bookmark']
['review_rating', 'bookmark', 'review']


In [95]:
# create bookmark collection
with closing(get_mongo_client()) as client:
   database = client[DB]

   database.drop_collection('bookmark')

   print(database.list_collection_names())
   review_schema = {
         "bsonType": "object",
         "required": [ "id", "user_id", "movie_id"],
         "properties": {
            "id": {
               "bsonType": "binData"
            },
            "user_id": {
               "bsonType": "binData"
            },
            "movie_id": {
               "bsonType": "binData"
            }
         }
      }

   review_collection = database.create_collection(
      'bookmark',
      check_exists=False,
      validator= {
         '$jsonSchema': review_schema
      })

   print(database.list_collection_names())

['review_rating', 'review']
['bookmark', 'review_rating', 'review']


In [96]:
# create review_rating collection
with closing(get_mongo_client()) as client:
   database = client[DB]

   database.drop_collection('review_rating')

   print(database.list_collection_names())
   review_schema = {
         "bsonType": "object",
         "required": [ "id", "user_id", "review_id", "score"],
         "properties": {
            "id": {
               "bsonType": "binData"
            },
            "user_id": {
               "bsonType": "binData"
            },
            "review_id": {
               "bsonType": "binData"
            },
            "score": {
               "bsonType": "int"
            }
         }
      }

   review_collection = database.create_collection(
      'review_rating',
      check_exists=False,
      validator= {
         '$jsonSchema': review_schema,
         'score': {
               '$in': [-1, 1]
         }
      })

   print(database.list_collection_names())

['bookmark', 'review']
['bookmark', 'review_rating', 'review']


In [89]:
fake: Faker = Faker()

USERS_COUNT = 50
USERS_IDS = [uuid4() for _ in range(USERS_COUNT)]

MOVIES_COUNT = 500
MOVIES_IDS = [uuid4() for _ in range(MOVIES_COUNT)]

REVIEWS_COUNT = USERS_COUNT * MOVIES_COUNT

In [93]:
# insert review
with closing(get_mongo_client()) as client:
    collection = client[DB]['review']

    start_time: float = time()
    for user_id in USERS_IDS:
        for movie_id in MOVIES_IDS:
            new_review = {
                'id': uuid4(),
                'user_id': uuid4(),
                'movie_id': uuid4(),
                'score': fake.random_int(min=0, max=10),
                'text': fake.text(),
                'review_dt': fake.date_time_between(start_date="-1y", end_date="now")
            }

            collection.insert_one(new_review)

    insertion_time: float = time() - start_time
    insertion_speed: float = round(REVIEWS_COUNT / insertion_time, 2)

    print('Insertion speed: {:,} records/sec'.format(insertion_speed))

Insertion speed: 989.93 records/sec


In [98]:
# insert bookmark
with closing(get_mongo_client()) as client:
    collection = client[DB]['bookmark']

    start_time: float = time()
    for user_id in USERS_IDS:
        for movie_id in MOVIES_IDS:
            new_bookmark = {
                'id': uuid4(),
                'user_id': uuid4(),
                'movie_id': uuid4()
            }
            collection.insert_one(new_bookmark)

    insertion_time: float = time() - start_time
    insertion_speed: float = round(REVIEWS_COUNT / insertion_time, 2)

    print('Insertion speed: {:,} records/sec'.format(insertion_speed))

Insertion speed: 1,205.91 records/sec


In [105]:
# insert review rating
with closing(get_mongo_client()) as client:
    collection = client[DB]['review_rating']

    query_result = client[DB]['review'].find({})
    all_reviews_ids = [review.get('id') for review in query_result][:200]

    start_time: float = time()
    for user_id in USERS_IDS[:200]:
        for review_id in all_reviews_ids:
            score = random_choice([-1, 1])
            new_review_rating = {
                'id': uuid4(),
                'user_id': uuid4(),
                'review_id': uuid4(),
                'score': score
            }
            collection.insert_one(new_review_rating)

    insertion_time: float = time() - start_time
    insertion_speed: float = round((REVIEWS_COUNT / 4) / insertion_time, 2)

    print('Insertion speed: {:,} records/sec'.format(insertion_speed))

Insertion speed: 715.14 records/sec


In [118]:
# select review
with closing(get_mongo_client()) as client:
    collection = client[DB]['review']
    start_time: float = time()
    all_reviews = collection.find({})

    insertion_time: float = time() - start_time
    insertion_speed: float = round(REVIEWS_COUNT / insertion_time , 2)

    print('Selecting speed: {:,} records/sec'.format(insertion_speed))

Selecting speed: 24,972,040.96 records/sec


In [120]:
# select bookmarks
with closing(get_mongo_client()) as client:
    collection = client[DB]['bookmark']
    start_time: float = time()
    all_bookmarks = collection.find({})

    insertion_time: float = time() - start_time
    insertion_speed: float = round(REVIEWS_COUNT / insertion_time , 2)

    print('Selecting speed: {:,} records/sec'.format(insertion_speed))

Selecting speed: 76,594,302.41 records/sec


In [122]:
# select review ratings
with closing(get_mongo_client()) as client:
    collection = client[DB]['review_rating']
    start_time: float = time()
    all_review_ratings = collection.find({})

    insertion_time: float = time() - start_time
    insertion_speed: float = round(REVIEWS_COUNT / insertion_time , 2)

    print('Selecting speed: {:,} records/sec'.format(insertion_speed))

Selecting speed: 24,918,631.18 records/sec


In [None]:
# ДОБАВИТЬ UPDATE reviews