In [20]:
import yaml
import pymongo
from urllib.parse import quote_plus as quote
import hashlib
import itertools
import random

In [21]:
# Подключимся к нашей коллекции

with open('../env/credsw.yaml', 'r') as file:
    creds_dict = yaml.safe_load(file)
    
url = 'mongodb://{user}:{pw}@{hosts}/?{rs}&authSource={auth_src}&{am}&tls=true&tlsCAFile={cert_file}'.format(
    user=creds_dict['username'],
    pw=quote(creds_dict['password']),
    hosts=creds_dict['host'],
    rs='replicaSet=rs01',
    auth_src=creds_dict['database'],
    am='authMechanism=DEFAULT',
    cert_file='../env/root.crt'
    )

dbs = pymongo.MongoClient(url)

db = dbs[creds_dict['database']]

augmented_collection = db['augmented_dataset']
unique_collection = db['unique_dataset']

In [22]:
# Выгрузим данные из коллекции
pipeline = [{ "$unwind" : "$boxes" },
            {"$project":{
                "size": "$boxes.size_scale",
                "stacking": "$boxes.stacking",
                "turnover": "$boxes.turnover",
                "loading_size": "$loading_size_scale",
                "filling_space_percent": "$filling_space_percent"
                }},
            {"$group": {"_id": "$_id",
                        "loading_size": {"$first":"$loading_size"},
                        "filling_space_percent": {"$first":"$filling_space_percent"},
                        "boxes": {
                            "$push":  {
                                "size": "$size",
                                "stacking": "$stacking",
                                "turnover": "$turnover"                         
                            }
                            
                        }
            }
            }
            ]
result = unique_collection.aggregate(pipeline)
dataset = [i for i in result]

In [23]:
x = [len(dataset[i]['boxes']) for i in range(len(dataset))]
test_item = x.index(4)
print(max(x), min(x))

2849 1


In [24]:
def as_is_hash(box): #box_i = dataset[0]['boxes'][i]
    hash_object = hashlib.sha256(
        ('h'+str(box['size']['height'])+\
         'w'+str(box['size']['width'])+\
         'l'+str(box['size']['length'])+\
         's'+str(box['stacking'])+\
         't'+str(box['turnover'])
        ).encode())
    return hash_object.hexdigest()

In [25]:
# Функция ротации коробок
def rotate_box(box):
    box_rotated = list()
    sizes = box['size']
    sizes = (sizes['width'], sizes['height'], sizes['length'])
    if box['turnover']:
        sizes = [{'width':x[0], 'height':x[1], 'length':x[2]} for x in set(itertools.permutations(sizes, 3))]     
    else:
        sizes = [{'width':x[0], 'height':x[1], 'length':x[2]} for x in [sizes, tuple(reversed(sizes))]]
    t = 6 # тут можно ограничить кол-во ротаций одной коробки (максимум 6)
    if len(sizes)>t:
        sizes = random.sample(sizes,t) 
    for size in sizes:
            new_box = dict(box)
            new_box['size'] = size
            new_box['hash'] = as_is_hash(new_box)
            box_rotated.append(new_box)
    return  box_rotated

def rotate_boxes(boxes):
    if len(boxes) == 1:
        return [[x] for x in rotate_box(boxes[0])]
    else:
       result = list(itertools.product([x for x in rotate_box(boxes[0])], rotate_boxes(boxes[1:])))
       return ([x[0]] +x[1] for x in result)

In [26]:
def augment_item(item):
    boxes_combinations = rotate_boxes(item['boxes'])
    new_items = list()
    for boxes in boxes_combinations:
        hash_object = hashlib.sha256(((item['_id'] + '_' + ','.join([box['hash'] for box in boxes]))).encode())
        new_item = dict(
                _id= hash_object.hexdigest(),
                hash= item['_id'],
                loading_size= item['loading_size'],
                filling_space_percent=item['filling_space_percent'],
                boxes= boxes
        )
        if new_item['_id'] not in [x['_id'] for x in new_items]:
            new_items.append(new_item)
    return new_items

In [27]:
# test
test = augment_item(dataset[test_item])
print(len(dataset[test_item]['boxes']))
boxes_combinations = rotate_boxes(dataset[test_item]['boxes'])
print(len(list(boxes_combinations)))


4
16


In [28]:
i, n = 0, 0
for unique_item in dataset:
    i+=1
    j = 0
    if len(unique_item['boxes']) < 4: # Тут можно ограничить длину вектора коробок, подлежащего вращению коробок
        for item in augment_item(unique_item):
            j+=1
            result = augmented_collection.update_one(
                {'_id': item['_id']},
                {'$set': {'filling_space_percent': item['filling_space_percent'],
                  'loading_size': item['loading_size'],
                  'boxes': item['boxes'],
                  'group_hash': item['hash'],                                   
                  }
                },
                upsert=True)
    else:
        j+=1
        result = augmented_collection.update_one(
                {'_id': unique_item['_id']},
                {'$set': {'filling_space_percent': unique_item['filling_space_percent'],
                  'loading_size': unique_item['loading_size'],
                  'boxes': unique_item['boxes'],
                  'group_hash': unique_item['_id'],                                   
                  }
                },
                upsert=True)
    n+=j
    if i < 5 or i%100 == 0:    
        print(f"Item processed: {i}, created items: {j}, total: {n}")

Item processed: 1, created items: 1, total: 1
Item processed: 2, created items: 216, total: 217
Item processed: 3, created items: 1, total: 218
Item processed: 4, created items: 1, total: 219
Item processed: 100, created items: 1, total: 385
Item processed: 200, created items: 1, total: 518
Item processed: 300, created items: 1, total: 658
Item processed: 400, created items: 1, total: 793
