In [None]:
# Imports, constants and basic structures
import csv, json
from functools import reduce
from distutils import util
from math import ceil

server = {'ram': 64 * 1024**3, 'cpu':16, 'ssd':256 * 1024**3, 'hdd': 256 * 1024**3}

# Should be input by a user, but using a constant for now (with defaults for checkpoint configs)
rps = 5000
checkpoint_count = 2
checkpoint_interval = 3600

# Storage capacity of a single storage Instance (in Gb)
single_storage_size = 30

# box.cfg and other Tarantool constants
slab_alloc_minimal = 16
slab_alloc_granularity = 8
index_init_insert = 48 * 1024   # initial cost of index creation
tuple_size = 10                 # since 2.10
large_tuple_size = 14           # legacy and large tuples
tuple_str_header = 5            # msgpack str overhead

# Размеры типов в msgpack
types = {'boolean':{'size': 1, 'indexes':('tree', 'hash')},
         'integer': {'size': 9, 'indexes':('tree', 'hash')},
         'unsigned':{'size': 9, 'indexes':('tree', 'hash', 'bitset')},
         'double':{'size': 9, 'indexes':('tree', 'hash')},
         'number':{'size': 9, 'indexes':('tree', 'hash')},
         'decimal':{'size': 8, 'indexes':('tree', 'hash')},
         'string':{'size': 5, 'indexes':('tree', 'hash', 'bitset')}, 
         'varbinary':{'size': 5, 'indexes':('tree', 'hash', 'bitset')},
         'uuid':{'size': 16, 'indexes':('tree', 'hash')},
         'array':{'size': 5, 'indexes':('rtree')},
         'map':{'size': 5, 'indexes':('tree', 'hash')}}

# Базовые размеры ключей 
indexes = {'tree':{'key': 20},
            'hash':{'key': 16}}

# Object for storing basic data on Field
class Field:
    def __init__(self, opts):
        self.name = opts['name']
        self.type = opts['type']
        self.len = int(opts['strlen']) + int(types[opts['type']]['size']) if self.type in ("string", "varbinary", "map", "array") else types[opts['type']]['size']
        self.indexed = util.strtobool(opts['indexed'])
        self.indexes = ((), types[opts['type']]['indexes'])[bool(opts['indexed'])]
        
    def __str__(self):
        return str({"Name": self.name, "Is indexed": self.indexed})


def roundup_by(x, multiple):
    x = int(x); multiple = int(multiple)
    if multiple == 0 and not x % multiple:
        return x
    return x + multiple - x % multiple

In [None]:
# 'Space' class for sizing calculation (sharded by default)
class Space:
    def __init__(self, name, rows, sharded):
        self.name = name
        self.fields = [Field({"name":"bucketid", "type":"number", "indexed": "true"})]
        self.rows = int(rows)
        self.sharded = util.strtobool(sharded)
        self.row_size = 0
        self.row_item_size = 0
    
    def add_field(self, opts):
        if isinstance(opts, list):
            self.fields.extend(opts)
        else:
            self.fields.append(opts)

    def __str__(self):
        return str({"Name": self.name, "Rows count:": self.rows,
                    "Is sharded": ('false', 'true')[self.sharded], "Fields": list(map(lambda x: str(x), self.fields))})
 
    # Method for calculating space size (data + keys) (Counts only tree indexes)
    def calculate_size(self):
        # Посчитать длину всех полей и добавить хедер
        self.row_item_size = reduce(lambda x, y: x + y , map(lambda x: x.len ,self.fields)) + tuple_size
        
        # Посчитать оффсеты для индексов
        tuple_offsets = len(list(filter(lambda x: x.indexed ,self.fields))) - 1
        (0, tuple_offsets * 4)[tuple_offsets > 0]
        
        # Добавляем выравнивание по slab_alloc_minimal и slab_alloc_granularity
        tuple_arena = roundup_by(roundup_by(self.row_item_size + tuple_offsets, slab_alloc_minimal),slab_alloc_granularity) * self.rows
        
        # Считаем размер ключей
        row_keys_size = reduce(lambda x, y: x + y, 
                            map(lambda x: indexes[x.indexes[0]]['key'], 
                            filter(lambda x: x.indexed ,self.fields))) + index_init_insert
                                
        self.row_size = tuple_arena + row_keys_size

        return self.row_size

In [None]:
# import and parse sample data from box.csv
box = {}

with open('box.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    sample_data = list(reader)
    for field in sample_data:
        if field['space'] in box.keys():
            box[field['space']].add_field(Field(field))
        else:
            new_space = Space(field['space'], field['size'], field['sharded'])
            new_space.add_field(Field(field))
            box[new_space.name] = new_space

print("Imported a box with following spaces:")
for space in box: print(box[space], box[space].row_size)

In [None]:
# Calculate sizing in Gb (Calculates: RAM)
# Fill the box.csv and play this cell
total_ram_size_gb = reduce(lambda x,y : x + y, map(lambda x: box[x].calculate_size(), box)) / 1024**3

# HDD stores (total_ram_size_gb * checkpoint_count) Gb for checkpoints and sum of (Space.row_size * rps * checkpoint_interval) for each space for WAL
total_hdd_size_gb = (total_ram_size_gb * checkpoint_count) + + reduce(lambda x,y : x + y, map(lambda x: box[x].row_item_size / (1024**3) * rps * checkpoint_interval , box))

# Calculates CPU for cluster
total_cpu = ceil(total_ram_size_gb / single_storage_size) * 2 + ceil(float(rps) / 5000) 

print("The Total Cluster Size in RAM is:", total_ram_size_gb)
print("The Total HDD needed:", total_hdd_size_gb)
print("The Total CPU needed for the cluster:", total_cpu)

In [None]:
# Okay, let's try to parse Tarantool Avro
with open("tdg_model.avsc") as avro_model:
    model = json.load(avro_model)
