In [None]:
# !pip install pyyaml
# !pip install pyMongo

In [5]:
import yaml
import pymongo
from urllib.parse import quote_plus as quote

In [6]:
# Подключимся к нашей коллекции

with open('../env/creds.yaml', 'r') as file:
    creds_dict = yaml.safe_load(file)
    
url = 'mongodb://{user}:{pw}@{hosts}/?{rs}&authSource={auth_src}&{am}&tls=true&tlsCAFile={cert_file}'.format(
    user=creds_dict['username'],
    pw=quote(creds_dict['password']),
    hosts=creds_dict['host'],
    rs='replicaSet=rs01',
    auth_src=creds_dict['database'],
    am='authMechanism=DEFAULT',
    cert_file='../env/root.crt'
    )

dbs = pymongo.MongoClient(url)

db = dbs[creds_dict['database']]

collection = db['initial_dataset']

In [53]:
# Пример aggregation pipeline - чтобы достать только нужные поля из Mongo
pipeline = [{ "$unwind" : "$data_result.boxes" },
            {"$project":{
                "mass": "$data_result.boxes.mass",
                "size": "$data_result.boxes.size",
                "stacking": "$data_result.boxes.stacking",
                "turnover": "$data_result.boxes.turnover",
                "stacking_limit": "$data_result.boxes.stacking_limit",
                "loading_size": "$data_result.cargo_space.loading_size",
                "density_percent": "$data_result.cargo_space.calculation_info.density_percent",
                "filling_space_percent": "$data_result.cargo_space.calculation_info.filling_space_percent",
                }},
            {"$group": {"_id": "$_id",
                        "loading_size": {"$first":"$loading_size"},
                        "density_percent": {"$first":"$density_percent"},
                        "filling_space_percent": {"$first":"$filling_space_percent"},
                        "boxes": {
                            "$push":  {
                                "mass": "$mass",
                                "size": "$size",
                                "stacking": "$stacking",
                                "turnover": "$turnover",
                                "stacking_limit": "$stacking_limit"                               
                            }
                            
                        }
            }
            }
            ]
result = collection.aggregate(pipeline)
#item = result.next()
#print(item)
dataset = [i for i in result]

In [54]:
len(dataset)

628

In [56]:
type(dataset)

list

In [55]:
dataset[0]

{'_id': ObjectId('6427400ea78245bedd47ad01'),
 'loading_size': {'width': 800.0, 'height': 2000.0, 'length': 1200.0},
 'density_percent': 77.27151,
 'filling_space_percent': 71.8625,
 'boxes': [{'mass': 5.0,
   'size': {'width': 280.0, 'height': 450.0, 'length': 280.0},
   'stacking': True,
   'turnover': True,
   'stacking_limit': 0.0},
  {'mass': 5.0,
   'size': {'width': 280.0, 'height': 450.0, 'length': 280.0},
   'stacking': True,
   'turnover': True,
   'stacking_limit': 0.0},
  {'mass': 5.0,
   'size': {'width': 280.0, 'height': 450.0, 'length': 280.0},
   'stacking': True,
   'turnover': True,
   'stacking_limit': 0.0},
  {'mass': 5.0,
   'size': {'width': 280.0, 'height': 450.0, 'length': 280.0},
   'stacking': True,
   'turnover': True,
   'stacking_limit': 0.0},
  {'mass': 5.0,
   'size': {'width': 280.0, 'height': 450.0, 'length': 280.0},
   'stacking': True,
   'turnover': True,
   'stacking_limit': 0.0},
  {'mass': 5.0,
   'size': {'width': 280.0, 'height': 450.0, 'length'