### Устанавливаем и запускаем Vertica

In [1]:
pip install vertica-python

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os

In [3]:
os.system('docker run -d -p 5433:5433 jbfavre/vertica:latest')

0

### Подключаемся к БД

In [4]:
import vertica_python

In [5]:
connection_info = {
    'host': 'localhost',
    'port': 5433,
    'user': 'dbadmin',
    'password': '',
    'database': 'docker',
    'autocommit': True,
} 

### Проводим Миграции

In [17]:
with vertica_python.connect(**connection_info) as connection:
    cursor = connection.cursor()
    cursor.execute("""CREATE TABLE movie_view (
    id IDENTITY, user_uuid VARCHAR(256), movie_uuid VARCHAR(256), movie_progress VARCHAR(256), movie_length VARCHAR(256), event_time VARCHAR(256));
""")


### Заполняем БД на 1 000 000 записей

In [8]:
import time
import pandas
from pydantic import parse_obj_as, BaseModel
import datetime
from typing import List
import random
import uuid

In [9]:
movie_uuids = [str(uuid.uuid4()) for _ in range(1000)]
user_uuids = [str(uuid.uuid4()) for _ in range(2000)]

In [10]:
length = random.randint(600, 90000)
movies_gen = ((
    random.choice(user_uuids), random.choice(movie_uuids), str(random.randint(0, length)), str(length), str(datetime.datetime.now())) for _ in range(10_000_000)
)


In [18]:
%%time
step = 10000
count = 1000000 // step
with vertica_python.connect(**connection_info) as connection:
    cursor = connection.cursor()
    for _ in range(count):
        cursor.executemany("""
        INSERT INTO movie_view (user_uuid, movie_uuid, movie_progress, movie_length, event_time)
        VALUES (?, ?, ?, ?, ?)
        """,
        [next(movies_gen) for i in range(step)],
        use_prepared_statements=True) 


CPU times: total: 1min 42s
Wall time: 1h 32min 53s


### Чтение данных из таблицы

In [19]:
%%time
with vertica_python.connect(**connection_info) as connection2:
    cursor2 = connection2.cursor()
    cursor2.execute('SELECT COUNT(*) from movie_view')
    print(cursor2.fetchall())

[[1000000]]
CPU times: total: 0 ns
Wall time: 24.2 ms


In [22]:
%%time
with vertica_python.connect(**connection_info) as connection2:
    cursor2 = connection2.cursor()
    cursor2.execute('SELECT COUNT (DISTINCT movie_uuid) FROM movie_view')
    print(cursor2.fetchall())

[[1000]]
CPU times: total: 15.6 ms
Wall time: 162 ms


In [23]:
%%time
with vertica_python.connect(**connection_info) as connection2:
    cursor2 = connection2.cursor()
    cursor2.execute('SELECT COUNT (DISTINCT user_uuid) FROM movie_view')
    print(cursor2.fetchall())

[[2000]]
CPU times: total: 0 ns
Wall time: 151 ms


In [24]:
%%time
with vertica_python.connect(**connection_info) as connection2:
    cursor2 = connection2.cursor()
    cursor2.execute('SELECT COUNT (DISTINCT movie_uuid) FROM movie_view WHERE movie_length = movie_progress')
    print(cursor2.fetchall())

[[176]]
CPU times: total: 0 ns
Wall time: 45.7 ms
