In [3]:
# Fetch municipal_entities_gold data and compare with MUNICIPALITY_LOOKUP
from elt_core.db_connector import DBConnector
from sources.lookups.districts_municipalities import MUNICIPALITY_LOOKUP
# Connect and fetch data
db = DBConnector()
municipal_docs = db.get_all_documents("municipal_entities_gold")
# Get all 'adminstrates' values
found_municipalities = {doc.get('adminstrates') for doc in municipal_docs if doc.get('adminstrates')}
# Get all municipalities from lookup
all_municipalities = MUNICIPALITY_LOOKUP.keys()
# Find missing ones
missing_from_gold = all_municipalities 
print(f"Total in MUNICIPALITY_LOOKUP: {len(all_municipalities)}")
print(f"Total in municipal_entities_gold: {len(found_municipalities)}")
print(f"Missing from municipal_entities_gold: {len(missing_from_gold)}")
print("\nMissing municipalities:")
for m in sorted(missing_from_gold):
    print(f"  - {m}")

Total in MUNICIPALITY_LOOKUP: 310
Total in municipal_entities_gold: 0
Missing from municipal_entities_gold: 310

Missing municipalities:
  - Abrantes
  - Agueda
  - Aguiar da Beira
  - Alandroal
  - Albergaria-a-Velha
  - Albufeira
  - Alcacer do Sal
  - Alcanena
  - Alcobaca
  - Alcochete
  - Alcoutim
  - Alenquer
  - Alfandega da Fe
  - Alijo
  - Aljezur
  - Aljustrel
  - Almada
  - Almeida
  - Almeirim
  - Almodovar
  - Alpiarca
  - Alter do Chao
  - Alvaiazere
  - Alvito
  - Amadora
  - Amarante
  - Amares
  - Anadia
  - Angra do Heroismo
  - Ansiao
  - Arcos de Valdevez
  - Arganil
  - Armamar
  - Arouca
  - Arraiolos
  - Arronches
  - Arruda dos Vinhos
  - Aveiro
  - Avis
  - Azambuja
  - Baiao
  - Barcelos
  - Barrancos
  - Barreiro
  - Batalha
  - Beja
  - Belmonte
  - Benavente
  - Bombarral
  - Borba
  - Boticas
  - Braga
  - Braganca
  - Cabeceiras de Basto
  - Cadaval
  - Caldas da Rainha
  - Calheta
  - Calheta Madeira
  - Calheta Sao Jorge
  - Camara de Lobos
  - Caminha


In [5]:
len(MUNICIPALITY_LOOKUP.keys())

310

In [6]:
# Count occurrences of each 'administrates' value
from collections import Counter

# Get all 'administrates' values (including None)
administrates_values = [doc.get('administrates') for doc in municipal_docs]

# Count occurrences
counts = Counter(administrates_values)

# Find duplicates (appearing more than once)
duplicates = {k: v for k, v in counts.items() if v > 1}

print(f"Total documents: {len(municipal_docs)}")
print(f"Unique 'administrates' values: {len(counts)}")
print(f"\nDuplicates (appearing more than once):")
for key, count in sorted(duplicates.items(), key=lambda x: -x[1]):
    print(f"  - '{key}': {count} times")

# Also check for None/missing values
if None in counts:
    print(f"\nDocuments with no 'administrates': {counts[None]}")

Total documents: 311
Unique 'administrates' values: 308

Duplicates (appearing more than once):
  - 'Espinho': 2 times
  - 'Moita': 2 times
  - 'Ilhavo': 2 times
