In [1]:
from utils import MongoConnectionManager, get_redis_instance
from dotenv import load_dotenv
import jsonlines
import json
import os

In [2]:
def retrieve_segment_data(bounding_box, view_name, fields):
    database_name = 'turf'
    collection_name = f'{view_name}_collection'
    
    latitudes = [coordinate[1] for coordinate in bounding_box]
    longitudes = [coordinate[0] for coordinate in bounding_box]
    match_query = {
        'LAT': {'$gte': min(latitudes), '$lte': max(latitudes)},
        'LON': {'$gte': min(longitudes), '$lte': max(longitudes)},
    }
    
    facet_query = {}
    check_query = {'$or': []}
    for field, field_type in fields.items():
        field_name = field.upper()
        if field_type == 'categorical':
            facet_query[field] = [
                {'$match': {field_name: {'$ne': None}}},
                {'$group': {'_id': f'${field_name}', 'count': {'$sum': 1}}},
                {'$group': {'_id': None, 'values': {'$push': {'k': '$_id', 'v': '$count'}}}},
                {'$replaceRoot': {'newRoot': {'$arrayToObject': '$values'}}},
            ]
            check_query['$or'].append({field: {'$ne': []}})
        else:
            facet_query[field] = [
                {'$match': {field_name: {'$ne': None}}},
                {'$group': {'_id': None, 'max': {'$max': f'${field_name}'}, 'min': {'$min': f'${field_name}'}}},
                {'$project': {'_id': 0, 'max': 1, 'min': 1}}
            ]
    
    pipeline = [{'$match': match_query}, {'$facet': facet_query}, {'$match': check_query}]
    with MongoConnectionManager(database_name, collection_name) as collection:
        data = list(collection.aggregate(pipeline=pipeline, allowDiskUse=True))
    
    if data:
        data = data[0]
    else:
        return {'nodata': 'no data in this grid'}
    
    properties = {}
    for field in fields:
        if data[field]:
            properties[field] = data[field][0]
        else:
            properties[field] = {}
    
    document = {
        'properties': properties,
        'geometry': [{
            'type': 'Polygon',
            'coordinates': [bounding_box],
        }]
    }
    
    with jsonlines.open(f'{view_name}-grid-object-data.jsonl', 'a') as writer:
        writer.write(document)
    
    writer_collection = f'{view_name}_object_clusters'
    with MongoConnectionManager(database_name, writer_collection) as collection:
        collection.insert_one(document)
    
    del document['_id']
    return document

In [3]:
def get_filter_fields(section_name):
    filter_key = f'{section_name}_skeleton'
    redis_instance = get_redis_instance()
    filters = json.loads(redis_instance.get(filter_key))['filters']
    
    fields = {}
    for key, value in filters.items():
        if value['input']:
            if value['type'] == 'select':
                fields[key] = 'categorical'
            elif key not in {'lat', 'lon'}:
                fields[key] = 'numeric'
    
    return fields

In [4]:
def prepare_data(view_name):
    with open('static/processed-small-grids.geojson', 'r') as reader:
        data = json.loads(reader.read())
    
    processed = []
    fields = get_filter_fields(view_name)
    count = 0
    
    for feature in data['features']:
        bbox = feature['geometry']['coordinates'][0]
        doc = retrieve_segment_data(bbox, view_name, fields)
        
        if 'nodata' not in doc:
            processed.append(doc)
        count += 1
        break
    
    processed_dump = json.dumps(processed)
    with open(f'data/processed-{view_name}.json', 'w') as writer:
        writer.write(processed_dump)

In [5]:
load_dotenv()
for view in os.environ.get('SECTIONS').split(','):
    prepare_data(view)