In [4]:
from utils import MongoConnectionManager, get_redis_instance
import json

In [6]:
redis = get_redis_instance()

In [19]:
def read_static_data(section_name):
    filter_key = f'{section_name}_skeleton'
    data = json.loads(redis.get(filter_key))['filters']
    
    filters = {}
    for key, value in data.items():
        if value['input'] and value['type'] == 'select' and value['item_id'] != value['item_title']:
            filters[key] = {
                'item_id': value['item_id'],
                'item_title': value['item_title'] if key != 'postal_region' else 'postal_code_label',
            }
    
    return filters

In [28]:
def build_facet_query(filters):
    facet_query = {}
    for key, value in filters.items():
        item_id = value['item_id'].upper()
        item_title = value['item_title'].upper()
        
        match_query = {
            item_id: {'$ne': None},
            item_title: {'$ne': None},
        }
        name_group_query = {
            '_id': f'${item_title}',
            'number': {'$first': f'${item_id}'},
        }
        number_group_query = {
            '_id': '$number',
            'items': {'$addToSet': '$_id'},
            'count': {'$sum': 1}
        }
        duplicate_finder = {'count': {'$gt': 1}}
        
        facet_query[key] = [
            {'$match': match_query},
            {'$group': name_group_query},
            {'$group': number_group_query},
            {'$match': duplicate_finder},
        ]
    
    return facet_query

In [34]:
def fetch_data(section_name):
    filters = read_static_data(section_name)
    facet_query = build_facet_query(filters)
    
    with MongoConnectionManager('Dashboard', f'{section_name}_collection_temp') as collection:
        data = list(collection.aggregate(pipeline=[{'$facet': facet_query}], allowDiskUse=True))[0]
    
    return data

In [42]:
def verify_duplicacy(section_name):
    data = fetch_data(section_name)
    result = {'fields': []}
    for key, value in data.items():
        if value:
            result['fields'].append(key)
    
    if result['fields']:
        result['status'] = True
        result['message'] = f'Duplicacy available in {section_name} data'
    else:
        result['status'] = False
        result['message'] = f'No duplicate issues found in {section_name} data'
    
    return result

In [43]:
views = ['property', 'building', 'transaction']
for view in views:
    result = verify_duplicacy(view)
    print(result)

{'fields': [], 'status': False, 'message': 'No duplicate issues found in property data'}
{'fields': [], 'status': False, 'message': 'No duplicate issues found in building data'}
{'fields': [], 'status': False, 'message': 'No duplicate issues found in transaction data'}
