# Производительность модели семейства столбцов на примере работы с MonetDB

<p> https://en.wikipedia.org/wiki/MonetDB
<p> http://pymonetdb.readthedocs.io/en/1.1.1/

|  reg_id | reg_name |   year  | group_id | group_name | categry | gender |  value  |
|:-------:|:--------:|:-------:|:--------:|:----------:|:-------:|:------:|:-------:|
| integer |  string  | integer |  integer |   string   |  string | string | integer |

In [1]:
import sys
import time

import pymonetdb

sys.path.append('../config/')
from config import config

In [2]:
params = config(section='monetdb')
conn = pymonetdb.connect(
    database=params["database"],
    hostname=params["hostname"],
    username=params["username"],
    password=params["password"])
cur = conn.cursor()
cur.arraysize = 100

In [3]:
commands = (
    """
    SELECT *
    FROM "crimestatsocial";""",
    """
    SELECT *
    FROM "crimestatsocial"
    WHERE "year"=2016;""",
     """
    SELECT *, COUNT("group_id")
    FROM "crimestatsocial"
    WHERE "year"=2016
    GROUP BY "group_id";""",
    """
    SELECT "year", COUNT("year")
    FROM "crimestatsocial"
    GROUP BY "year";""",
    """
    SELECT "group_id", COUNT(DISTINCT "group_id")
    FROM "crimestatsocial"
    GROUP BY "group_id";"""
)

## 1. Собираются исходные кортежи

In [4]:
cur.execute(commands[0])
fetchall = cur.fetchall()
print(len(fetchall))
print(fetchall[0])

27384
(1100, 'Российская Федерация', 2016, 14002, 'по возрасту', '14-15', 'Мужчины', 13573)


In [5]:
%timeit cur.execute(commands[0])

19.9 ms ± 1.43 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 2. Собираются исходные кортежи за 2016 год

In [6]:
cur.execute(commands[1])
fetchall = cur.fetchall()
print(len(fetchall))
print(fetchall[0])

3948
(1100, 'Российская Федерация', 2016, 14002, 'по возрасту', '14-15', 'Мужчины', 13573)


In [7]:
%timeit cur.execute(commands[1])

21.5 ms ± 1.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 3. Подсчет исходных кортежей за 2016 год по группам

In [8]:
cur.execute(commands[2])
cur.fetchall()

[(14002, 1128), (14006, 1692), (14005, 752), (14007, 376)]

In [9]:
%timeit cur.execute(commands[2])

5.73 ms ± 308 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## 4. Подсчет исходных кортежей по годам

In [10]:
cur.execute(commands[3])
cur.fetchall()

[(2016, 3948),
 (2015, 3990),
 (2014, 3990),
 (2013, 3864),
 (2012, 3864),
 (2011, 3864),
 (2010, 3864)]

In [11]:
%timeit cur.execute(commands[3])

6.04 ms ± 161 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## 5. Подсчет уникальных групп

In [12]:
cur.execute(commands[4])

4

In [13]:
%timeit cur.execute(commands[4])

8.04 ms ± 454 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [14]:
cur.close()
conn.close()