In [118]:
from pymongo import MongoClient
from mimesis import Person
from uuid import uuid4
from collections import Counter
import random
from clickhouse_driver import Client

clickhouse_client = Client(host='clickhouse')
client = MongoClient('mongo', 27017)
db = client['docker']


In [33]:
import time
import statistics

class profile_code():
    def __init__(self):
        self.start = time.time()
        self.all_durations = []
        
    def setup_start_time(self):
        self.start = time.time()

    def checkpoint(self):
        end_time = time.time()
        self.all_durations.append(end_time - self.start)
        self.start = end_time

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        print(f"Median - {statistics.median(self.all_durations)}")
        avg = sum(self.all_durations) / len(self.all_durations)
        print(f"Average - {avg}")
        print(f"Summary - {sum(self.all_durations)}")
        
person = Person()
movies_rand = [str(uuid4()) for x in range(1,200)]
user_rand = [random.randint(1,500) for x in range(1,500)]
movies_rand = [f'{str(uuid4())}'] + movies_rand

def gen_person_data():
    return [{'uuid': str(uuid4()),
                'first_name': person.name(), 
                'age': person.age(),
                 'email': person.email(),
                 'political': person.political_views(),
                 'phone': person.telephone(),
                 'username': person.username()
                 } for x in range(10000)]


def gen_movie_likes():
    return [{'rating': random.randint(0,1),
             'user_id': random.choice(user_rand),
             'movie_uuid': random.choice(movies_rand),
             'created_at': random.randint(633801229, 1675180434),
             'updated_at': random.randint(633801229, 1675180434)} for x in range(10000)]


def gen_bookmarks():
    return [{'user_id': random.choice(user_rand),
             'movie_uuid': random.choice(movies_rand),
             'created_at': random.randint(633801229, 1675180434),
             'updated_at': random.randint(633801229, 1675180434)} for x in range(10000)]

In [130]:
clickhouse_client.execute("""
CREATE TABLE if not exists test (
id UUID,
first_name VARCHAR,
age INT,
email VARCHAR,
political VARCHAR,
phone VARCHAR,
username VARCHAR
) Engine=MergeTree() ORDER BY id;
""")

clickhouse_client.execute("""
CREATE TABLE if not exists movie_likes (
uuid UUID,
rating TINYINT,
user_id BIGINT,
movie_uud UUID,
created_at DateTime,
updated_at DateTime
) Engine=MergeTree() ORDER BY uuid;
""")

clickhouse_client.execute("""
CREATE TABLE if not exists bookmarks (
uuid UUID,
user_id BIGINT,
movie_uuid UUID,
created_at DateTime,
updated_at DateTime
) Engine=MergeTree() ORDER BY uuid;
""")

[]

In [36]:
newcol = db['test']
test_data = gen_person_data()

In [37]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        newcol.insert_many(test_data)
        profiler.checkpoint()
        newcol.delete_many({})

Median - 0.08888423442840576
Average - 0.0970771074295044
Summary - 0.970771074295044
CPU times: user 673 ms, sys: 17.7 ms, total: 690 ms
Wall time: 1.49 s


In [38]:
%%time
with profile_code() as profiler:
    for i in range(10):
        
        for each in test_data:
            profiler.setup_start_time()
            newcol.insert_one(each)
            profiler.checkpoint()
        
        newcol.delete_many({})

Median - 0.00012540817260742188
Average - 0.00013875122785568237
Summary - 13.875122785568237
CPU times: user 8.72 s, sys: 833 ms, total: 9.55 s
Wall time: 14.5 s


In [52]:
newcol = db['movie_likes']
movie_likes_dataset = gen_movie_likes()

In [53]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        newcol.insert_many(movie_likes_dataset)
        profiler.checkpoint()
        newcol.delete_many({})

Median - 0.07180523872375488
Average - 0.07452123165130616
Summary - 0.7452123165130615
CPU times: user 488 ms, sys: 3.22 ms, total: 491 ms
Wall time: 1.31 s


In [54]:
%%time
with profile_code() as profiler:
    for i in range(10):
        
        for each in movie_likes_dataset:
            profiler.setup_start_time()
            newcol.insert_one(each)
            profiler.checkpoint()
        
        newcol.delete_many({})

Median - 0.00012493133544921875
Average - 0.0001369956684112549
Summary - 13.699566841125488
CPU times: user 8.47 s, sys: 889 ms, total: 9.36 s
Wall time: 14.3 s


In [55]:
newcol = db['bookmarks']
bookmarks_dataset = gen_bookmarks()

In [56]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        newcol.insert_many(bookmarks_dataset)
        profiler.checkpoint()
        newcol.delete_many({})

Median - 0.07331383228302002
Average - 0.08227715492248536
Summary - 0.8227715492248535
CPU times: user 565 ms, sys: 0 ns, total: 565 ms
Wall time: 1.41 s


In [57]:
%%time
with profile_code() as profiler:
    for i in range(10):
        
        for each in bookmarks_dataset:
            profiler.setup_start_time()
            newcol.insert_one(each)
            profiler.checkpoint()
        
        newcol.delete_many({})

Median - 0.000125885009765625
Average - 0.00013957533359527588
Summary - 13.957533359527588
CPU times: user 8.7 s, sys: 837 ms, total: 9.54 s
Wall time: 14.6 s


In [58]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        for row in test_data:
            clickhouse_client.execute(f"""INSERT INTO test (id, first_name, age, email, political, phone, username) 
            VALUES ('{row['uuid']}', '{row['first_name']}', {row['age']}, '{row['email']}', 
            '{row['political']}', '{row['phone']}', '{row['username']}');""")
        profiler.checkpoint()
        clickhouse_client.execute("""TRUNCATE TABLE test;""")

Median - 15.11976158618927
Average - 14.170273280143737
Summary - 141.70273280143738
CPU times: user 15.8 s, sys: 6.69 s, total: 22.5 s
Wall time: 2min 22s


In [125]:
movie_likes_dataset = gen_movie_likes()

In [131]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        for row in movie_likes_dataset:
            clickhouse_client.execute(f"""INSERT INTO movie_likes 
            VALUES ('{str(uuid4())}', '{row['rating']}', '{row['user_id']}', '{row['movie_uuid']}', 
            '{row['created_at']}', '{row['updated_at']}');""")
        profiler.checkpoint()
        clickhouse_client.execute("""TRUNCATE TABLE movie_likes;""")

Median - 12.906895160675049
Average - 12.990327596664429
Summary - 129.9032759666443
CPU times: user 14.9 s, sys: 5.9 s, total: 20.8 s
Wall time: 2min 10s


In [132]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        for row in movie_likes_dataset:
            clickhouse_client.execute(f"""INSERT INTO bookmarks 
            VALUES ('{str(uuid4())}', '{row['user_id']}', '{row['movie_uuid']}', '{row['created_at']}', 
            '{row['updated_at']}');""")
        profiler.checkpoint()
        clickhouse_client.execute("""TRUNCATE TABLE bookmarks;""")

Median - 13.215920686721802
Average - 12.974126863479615
Summary - 129.74126863479614
CPU times: user 13.2 s, sys: 5.08 s, total: 18.3 s
Wall time: 2min 10s


In [112]:
movies_col = db['movie_likes']
movies_col.insert_many(gen_movie_likes())
bookmarks_col = db['bookmarks']
bookmarks_col.insert_many(gen_bookmarks())

<pymongo.results.InsertManyResult at 0x7fd07fead6f0>

In [75]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        user = movies_col.find_one().get('user_id')
        mydoc = movies_col.find({"user_id":user})
        result = list(map(lambda x: x, mydoc))
        profiler.checkpoint()

Median - 0.042432308197021484
Average - 0.04280529022216797
Summary - 0.4280529022216797
CPU times: user 14.7 ms, sys: 1.11 ms, total: 15.8 ms
Wall time: 428 ms


In [110]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        movie = movies_col.find_one().get('movie_uuid')
        mydoc = movies_col.find({'movie_uuid': movie})
        result = list(map(lambda x: x, mydoc))
        counter = Counter(x['rating'] for x in result)
        likes = counter.get(1)
        dislikes = counter.get(0)
        profiler.checkpoint()

Median - 0.01088404655456543
Average - 0.011122894287109376
Summary - 0.11122894287109375
CPU times: user 8.36 ms, sys: 14 µs, total: 8.37 ms
Wall time: 111 ms


In [113]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        user = bookmarks_col.find_one().get('user_id')
        mydoc = bookmarks_col.find({'user_id':user})
        result = list(map(lambda x: x, mydoc))
        profiler.checkpoint()

[{'_id': ObjectId('63d96e603cbdc31f65a05833'), 'user_id': 209, 'movie_uuid': 'd2d41831-244a-40fd-8ce7-3cd34c903640', 'created_at': 1293762908, 'updated_at': 1152562735}, {'_id': ObjectId('63d96e603cbdc31f65a05a00'), 'user_id': 209, 'movie_uuid': 'a1fcd54b-22a9-432c-814e-92b5b6408aca', 'created_at': 1443389030, 'updated_at': 1189643254}, {'_id': ObjectId('63d96e603cbdc31f65a05bf1'), 'user_id': 209, 'movie_uuid': '8d10dbee-cca8-473a-8021-36b28fe2b353', 'created_at': 1466469432, 'updated_at': 947901957}, {'_id': ObjectId('63d96e603cbdc31f65a05c00'), 'user_id': 209, 'movie_uuid': 'c3f78df4-f37a-4da9-bf1c-4ad5a6873b5f', 'created_at': 1485302029, 'updated_at': 724480831}, {'_id': ObjectId('63d96e603cbdc31f65a05c0e'), 'user_id': 209, 'movie_uuid': '962282f3-3844-4a2e-bac6-32ccbb2488db', 'created_at': 1075482450, 'updated_at': 1165280152}, {'_id': ObjectId('63d96e603cbdc31f65a05cdb'), 'user_id': 209, 'movie_uuid': '9d386219-55b9-46af-8240-7c69ba0280fc', 'created_at': 804053327, 'updated_at': 7

In [115]:
%%time
with profile_code() as profiler:
    for i in range(10):
        profiler.setup_start_time()
        movie = movies_col.find_one().get('movie_uuid')
        mydoc = movies_col.find({'movie_uuid': movie})
        result = list(map(lambda x: x, mydoc))
        counter = Counter(x['rating'] for x in result)
        likes = counter.get(1)
        dislikes = counter.get(0)
        average_rating = (1 * likes) / (likes + dislikes)
        profiler.checkpoint()

32 23 0.5818181818181818
32 23 0.5818181818181818
32 23 0.5818181818181818
32 23 0.5818181818181818
32 23 0.5818181818181818
32 23 0.5818181818181818
32 23 0.5818181818181818
32 23 0.5818181818181818
32 23 0.5818181818181818
32 23 0.5818181818181818
Median - 0.017008423805236816
Average - 0.017628860473632813
Summary - 0.17628860473632812
CPU times: user 8.96 ms, sys: 821 µs, total: 9.78 ms
Wall time: 176 ms
