In [None]:
""" 
Исследование базы данных для хранения пользовательского контента
Объект: `MongoDB`.

Данные, которые вам необходимо сохранить:
- лайки пользователей к каждому фильму;
- рецензии к фильмам;
- закладки пользователя (отложенные на потом фильмы).

Требования к скорости обработки данных = 200 мс.

Измеряемые параметры:
 - время на запись данных
 - время на чтение данных

Вывод: данное хранилище удовлетворяет заявленным требованиям.
"""


In [1]:
import sys

!{sys.executable} -m pip install pymongo

Collecting pymongo
  Downloading pymongo-4.3.3-cp38-cp38-macosx_10_9_x86_64.whl (381 kB)
[K     |████████████████████████████████| 381 kB 830 kB/s eta 0:00:01
[?25hCollecting dnspython<3.0.0,>=1.16.0
  Using cached dnspython-2.2.1-py3-none-any.whl (269 kB)
Installing collected packages: dnspython, pymongo
Successfully installed dnspython-2.2.1 pymongo-4.3.3
You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8 -m pip install --upgrade pip' command.[0m


In [48]:
from pymongo import MongoClient

# client = MongoClient('localhost:27017', username='superuser', password='123qwe')
client = MongoClient('localhost:27017')
database = client['movies']
reviews = database['reviews']
likes = database['likes']
bookmarks = database['bookmarks']

reviews.drop()
likes.drop()
bookmarks.drop()

In [28]:
def insert(collection, objects) -> int:
    for i in objects:
        result = collection.insert_many(i, ordered=False, bypass_document_validation=True)
#     return result.inserted_ids

In [49]:
import uuid
from bson import ObjectId

user_id = str(uuid.uuid4())

users = []
def generate_reviews():
    user_id = str(uuid.uuid4())
    users.append(user_id)
    for i in range(1000, 10000, 1000):
        yield [{
        '_id': ObjectId(),
        'user_id': user_id,
        'movie_id': str(uuid.uuid4()),
        'value': uuid.uuid4().hex
    } for i in range(i)]
    

In [50]:
import time

start = time.time()
insert(reviews, generate_reviews())
duration = time.time() - start
print(duration / len(objects))

0.0015286316871643067


In [51]:
start = time.time()
result = list(reviews.find({'user_id': users[0]}))
duration = time.time() - start
print(duration, len(result), duration / len(result))

0.35126495361328125 45000 7.805887858072916e-06


In [52]:
from random import randrange

user_id = str(uuid.uuid4())

users = []
def generate_movie_likes(border: int):
    user_id = str(uuid.uuid4())
    users.append(user_id)
    for i in range(1000, border, 1000):
        yield [{
        '_id': ObjectId(),
        'user_id': user_id,
        'movie_id': str(uuid.uuid4()),
        'value': randrange(11)
    } for i in range(i)]

In [53]:
border = 10_000
start = time.time()
insert(likes, generate_movie_likes(border))
duration = time.time() - start
print(duration, duration / border)

1.2999422550201416 0.00012999422550201417


In [None]:
start = time.time()
result = list(likes.find({'user_id': users[0]}))
duration = time.time() - start
print(duration, len(result), duration / len(result))

In [59]:
movies = []

def generate_user_bookmarks(border: int):
    movie_id = str(uuid.uuid4())
    movies.append(movie_id)
    for i in range(1000, border, 1000):
        yield [{
        '_id': ObjectId(),
        'movie_id': movie_id,
        'user_id': str(uuid.uuid4())
    } for i in range(i)]

In [60]:
border = 10_000
start = time.time()
insert(bookmarks, generate_user_bookmarks(border))
duration = time.time() - start
print(duration, duration / border)

1.1298818588256836 0.00011298818588256835


In [61]:
start = time.time()
result = list(bookmarks.find({'movie_id': movies[0]}))
duration = time.time() - start
print(duration, len(result), duration / len(result))

0.39101195335388184 45000 8.689154518975153e-06
