In [47]:
import uuid
from math import ceil
from pprint import pprint
from random import gauss
from statistics import stdev

class Worker:
    def __init__(self, identity=None):
        self.identity = identity or uuid.uuid1().hex
        self.devices = set()
        self.databases = set()
        self.load_index = 0
     
    def __repr__(self):
        return 'Worker(ID: {identity} Load index: {load_index}, Devices: {devices})'.format(**self.__dict__)
    
    def __lt__(self, other):
        return self.load_index < other.load_index
    
    def __contains__(self, device):
        return device in self.devices
        
class Device:
    def __init__(self, id_, load_index=0):
        self.id_ = id_
        self.load_index = load_index
        # TODO reprocessing will be mempry hog, need mechanism for effective dealing with this
        # some out of order data is ok, since scheduler will just reasign properly
        self.reprocessing = False
        
    def __repr__(self):
        return 'ID: {id_}, Load index: {load_index}'.format(**self.__dict__)
    
    def __lt__(self, other):
        return self.load_index < other.load_index
    
    def __eq__(self, other):
        return self.id_ == other.id_
    
    def __hash__(self):
        return hash(self.id_)

    
class Database:
    def __init__(self, name):
        self.name = name
        self.devices = set()
        self.load_index = 0
        
    def __repr__(self):
        return 'DB(Name: {name}, Load Index)'.format(**self.__dict__)
    
max_no_db_per_worker = 3
worker_num = 2
device_num = 10
max_dev = 0.05

decimal_points = len([c for c in str(device_num)])
decimal_points = ceil(decimal_points + (decimal_points*2/3))

load_per_worker = round((1/worker_num), decimal_points)
deviation_per_worker_load = load_per_worker*(1+max_dev)
device_load_dev = round((1/device_num), decimal_points)
    
def random_device_load():
    return abs(round(gauss(device_load_dev, max_dev),4))
    
# devices = [Device(i, random_device_load()) for i in range(device_num)]
workers = [Worker(i+1) for i in range(worker_num)]

def sort_devices_per_worker(devices, workers):
    for device in sorted(devices, reverse=True):
        worker = sorted(workers)[0]
        worker.devices.add(device)
        worker.load_index += device.load_index
    return workers

def sort_devices(devices, workers, load_per_worker, deviation_per_worker_load):
    for worker in sorted(workers, reverse=True):
        worker_new_devices = set()
        worker.load_index = 0
        for device in sorted(devices, reverse=True):
            if worker.load_index >= load_per_worker:
                break
            elif device in worker:
                if(device.load_index + worker.load_index) > deviation_per_worker_load:
                    break
                else:
                    worker_new_devices.add(device)
                    worker.load_index += device.load_index
        devices -= worker_new_devices
        worker.devices = worker_new_devices
    
    # TODO: This part needs to be smarter, not just random 
    # Heavy workers(like device or two with heavy load) shouldn't get small load devices
    # One worker should always be elected as 'reprocessing' worker which would just deal with 
    # devices that are in reprocess state
    
    # existing coord service works great with this! victory!
    for device in sorted(devices, reverse=True):
        worker = sorted(workers)[0]
        worker.devices.add(device)
        worker.load_index += device.load_index
    
    return workers
        
devices_1 = set(
    [
        Device(1, 0.2),
        Device(2, 0.2),
        Device(3, 0.14),
        Device(4, 0.30),
        Device(5, 0.16)
    ]
)

devices_2 = set(
    [
        Device(1, 0.2),
        Device(2, 0.1),
        Device(3, 0.4),
        Device(4, 0.25),
        Device(5, 0.1)
    ]
)    
    

workers = sort_devices(devices_1, workers, load_per_worker, deviation_per_worker_load)
for worker in workers:
    print(worker, '\n')
    
print('+'*114, '\n')
workers = sort_devices(devices_2, workers, load_per_worker, deviation_per_worker_load)
for worker in workers:
    print(worker, '\n')




Worker(ID: 1 Load index: 0.45999999999999996, Devices: {ID: 4, Load index: 0.3, ID: 5, Load index: 0.16}) 

Worker(ID: 2 Load index: 0.54, Devices: {ID: 1, Load index: 0.2, ID: 2, Load index: 0.2, ID: 3, Load index: 0.14}) 

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 

Worker(ID: 1 Load index: 0.55, Devices: {ID: 1, Load index: 0.2, ID: 4, Load index: 0.25, ID: 5, Load index: 0.1}) 

Worker(ID: 2 Load index: 0.5, Devices: {ID: 2, Load index: 0.1, ID: 3, Load index: 0.4}) 



In [46]:
import random
import time

from collections import defaultdict, Counter
from statistics import stdev

random.seed(2)

_statistics = defaultdict(Counter)
_device_num = 10
_devices = [i for i in range(_device_num)]

st_val = 1/_device_num
gauss_var =st_val * 2

decimal_points = len([c for c in str(_device_num)])
decimal_points = ceil(decimal_points + (decimal_points*2/3))
           

def load_simulator():
    device = random.choice(_devices)
    calc_time = abs(round(gauss(st_val, gauss_var), 5))
    _statistics[device]['count'] += 1
    _statistics[device]['calc_time'] += calc_time 
    return calc_time
    
start_time = time.monotonic()

total_messages = 0
int_time = 5  # interval time in seconds

while (time.monotonic() - start_time) < int_time:
    total_messages += 1
    sleep_for = load_simulator()
    time.sleep(sleep_for)
    
def _load_index(int_time, system_msgs, device_calc, device_msgs):
    calc_time_index = .7
    count_index = 0.3    
    return (device_calc*calc_time_index/int_time + device_msgs*count_index/total_messages)/(calc_time_index + count_index)


def run_analytics():
    average_proc_time = int_time/total_messages
    total_load_index = 0
    print('Total msgs: {} Average proc time: {}s'.format(total_messages, average_proc_time))
    for device, value in _statistics.items():
        # TODO need exception handling here for ZeroDivisionError
        proc_time = value['calc_time']/value['count']
        load_index = _load_index(int_time, total_messages, value['calc_time'], value[count])
#         load_index = value['calc_time']/int_time
        total_load_index += load_index
#         _statistics['load_index'] = load_index
#         _statistics['count'] = load_index
#         _statistics['calc_time'] = load_index
        
        print('Device(id: {}, proc_time: {}, count: {}, load_index: {})'.format(
                device, proc_time, value['count'], load_index))
    print(total_load_index)
    
run_analytics()

Total msgs: 23 Average proc time: 0.21739130434782608s
Device(id: 0, proc_time: 0.2169, count: 3, load_index: 0.1302284347826087)
Device(id: 2, proc_time: 0.18765333333333334, count: 3, load_index: 0.11794483478260868)
Device(id: 4, proc_time: 0.09863, count: 1, load_index: 0.026851678260869562)
Device(id: 5, proc_time: 0.12353333333333333, count: 3, load_index: 0.09101443478260869)
Device(id: 6, proc_time: 0.38697, count: 4, load_index: 0.2688771130434782)
Device(id: 7, proc_time: 0.02162, count: 1, load_index: 0.016070278260869566)
Device(id: 8, proc_time: 0.23747, count: 6, load_index: 0.27773566956521734)
Device(id: 9, proc_time: 0.163265, count: 2, load_index: 0.07180115652173913)
1.0005235999999997


In [32]:
# just calc_time
Total msgs: 23 Average proc time: 0.21739130434782608s
Device(id: 0, proc_time: 0.2169, count: 3, load_index: 0.13014)
Device(id: 2, proc_time: 0.18765333333333334, count: 3, load_index: 0.112592)
Device(id: 4, proc_time: 0.09863, count: 1, load_index: 0.019726)
Device(id: 5, proc_time: 0.12353333333333333, count: 3, load_index: 0.07411999999999999)
Device(id: 6, proc_time: 0.38697, count: 4, load_index: 0.30957599999999996)
Device(id: 7, proc_time: 0.02162, count: 1, load_index: 0.004324)
Device(id: 8, proc_time: 0.23747, count: 6, load_index: 0.284964)
Device(id: 9, proc_time: 0.163265, count: 2, load_index: 0.065306)
1.000748


# .6 .4 calc_time, count
Total msgs: 23 Average proc time: 0.21739130434782608s
Device(id: 0, proc_time: 0.2169, count: 3, load_index: 0.13025791304347828)
Device(id: 2, proc_time: 0.18765333333333334, count: 3, load_index: 0.11972911304347827)
Device(id: 4, proc_time: 0.09863, count: 1, load_index: 0.029226904347826085)
Device(id: 5, proc_time: 0.12353333333333333, count: 3, load_index: 0.09664591304347828)
Device(id: 6, proc_time: 0.38697, count: 4, load_index: 0.25531081739130435)
Device(id: 7, proc_time: 0.02162, count: 1, load_index: 0.019985704347826087)
Device(id: 8, proc_time: 0.23747, count: 6, load_index: 0.27532622608695656)
Device(id: 9, proc_time: 0.163265, count: 2, load_index: 0.07396620869565218)
1.0004488



# .7 .3 calc_time, count
Total msgs: 23 Average proc time: 0.21739130434782608s
Device(id: 0, proc_time: 0.2169, count: 3, load_index: 0.1302284347826087)
Device(id: 2, proc_time: 0.18765333333333334, count: 3, load_index: 0.11794483478260868)
Device(id: 4, proc_time: 0.09863, count: 1, load_index: 0.026851678260869562)
Device(id: 5, proc_time: 0.12353333333333333, count: 3, load_index: 0.09101443478260869)
Device(id: 6, proc_time: 0.38697, count: 4, load_index: 0.2688771130434782)
Device(id: 7, proc_time: 0.02162, count: 1, load_index: 0.016070278260869566)
Device(id: 8, proc_time: 0.23747, count: 6, load_index: 0.27773566956521734)
Device(id: 9, proc_time: 0.163265, count: 2, load_index: 0.07180115652173913)
1.0005235999999997

0.0634093637454982
0.010276110444177672


In [1]:
unicorn = """
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼██████▓▓▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓█▓███▓▓██▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓██┼▓██▓┼███▓┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼███████▓██▓▓███▓┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼███████▓┼▓█▓▓███┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼██┼┼┼▓█▓██┼▓██▓█┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼█▓▓██████┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼██┼███▓▓███┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓█┼▓███┼▓██▓▓┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓█▓▓███▓┼███▓┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼▓▓██┼┼┼┼┼▓██▓████┼┼███▓┼┼┼┼┼┼┼┼┼▓┼┼┼
┼┼┼┼██████┼┼██┼┼┼▓███▓████▓┼┼┼┼┼┼┼┼█▓┼┼┼
┼┼┼┼██┼▓▓██▓██████████████▓┼┼┼┼┼┼┼██┼██┼
┼┼┼██┼┼▓▓▓┼▓████████▓████▓┼┼┼┼┼┼┼┼█▓██┼┼
┼┼██┼▓█▓█▓██████████┼▓▓┼▓█┼┼┼┼┼┼┼▓███┼┼┼
┼┼█▓▓███████████████▓┼█┼┼┼┼┼┼┼┼┼┼┼████▓┼
┼┼█▓┼█▓▓▓▓▓█████████████┼┼┼┼┼┼┼┼┼┼████▓┼
┼┼██┼▓┼┼┼████████████████▓┼┼┼┼┼┼┼┼▓█▓█▓┼
┼┼▓┼▓▓▓┼┼┼┼┼██▓▓███████████▓┼┼┼┼┼┼▓█┼██┼
┼┼┼┼▓██┼┼┼┼┼┼▓█┼▓██████▓█████▓┼┼┼┼┼█┼██┼
┼┼┼┼┼▓██▓┼┼┼┼┼┼█▓▓████┼┼███████┼┼┼▓█┼█▓┼
┼┼┼┼┼┼███┼┼┼┼┼┼┼┼▓▓███▓████████▓┼▓█┼██┼┼
┼┼┼┼┼┼┼▓┼┼┼┼┼┼┼┼┼┼▓┼▓██▓▓█████████▓██┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼██┼██┼┼█████▓▓████┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼███▓▓┼██████┼▓████┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓██▓┼▓██████┼█┼██▓┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓██┼▓██████▓┼▓█┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼▓█▓▓┼┼┼┼┼┼▓┼██████┼┼┼█┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼▓███████▓┼▓█████▓┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓▓█████┼█▓┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓┼▓██▓┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓███┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓█┼┼┼┼┼┼┼┼┼┼┼┼┼┼
"""

print(unicorn)


┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼██████▓▓▓▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓█▓███▓▓██▓┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼▓▓██┼▓██▓┼███▓┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼███████▓██▓▓███▓┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼███████▓┼▓█▓▓███┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼██┼┼┼▓█▓██┼▓██▓█┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼█▓▓██████┼┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼██┼███▓▓███┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓█┼▓███┼▓██▓▓┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼┼┼┼┼┼┼┼┼┼┼▓█▓▓███▓┼███▓┼┼┼┼┼┼┼┼┼┼┼┼┼
┼┼┼┼▓▓██┼┼┼┼┼▓██▓████┼┼███▓┼┼┼┼┼┼┼┼┼▓┼┼┼
┼┼┼┼██████┼┼██┼┼┼▓███▓████▓┼┼┼┼┼┼┼┼█▓┼┼┼
┼┼┼┼██┼▓▓██▓██████████████▓┼┼┼┼┼┼┼██┼██┼
┼┼┼██┼┼▓▓▓┼▓████████▓████▓┼┼┼┼┼┼┼┼█▓██┼┼
┼┼██┼▓█▓█▓██████████┼▓▓┼▓█┼┼┼┼┼┼┼▓███┼┼┼
┼┼█▓▓███████████████▓┼█┼┼┼┼┼┼┼┼┼┼┼████▓┼
┼┼█▓┼█▓▓▓▓▓█████████████┼┼┼┼┼┼┼┼┼┼████▓┼
┼┼██┼▓┼┼┼████████████████▓┼┼┼┼┼┼┼┼▓█▓█▓┼
┼┼▓┼▓▓▓┼┼┼┼┼██▓▓█