In [80]:
import geopandas as gpd
import os
import pandas as pd
import subprocess
import json
import subprocess
from google.protobuf import descriptor_pb2
import importlib.util
import shutil
import glob

In [81]:
DEST_DATA="../data/"
URL_SECTIONS = "https://gis.comune.trento.it/dbexport?db=base&sc=demografici&ly=civici_elettorali&fr=shp"
URL_PREFERENCES_CANDIDATES = "https://www.comune.trento.it/content/download/1582227/15012199/file/Preferenze.csv"
URL_PREFERENCES_GROUPS = "https://www.comune.trento.it/content/download/1582230/15012211/file/Voti_Lista.csv"
URL_PREFERENCES_MAYORS = "https://www.comune.trento.it/content/download/1582233/15012223/file/Voti_Sindaco.csv"
URL_DISTRICTS = "https://gis.comune.trento.it/dbexport?db=base&sc=confini&ly=circoscrizioni&fr=shp"
if os.path.exists(DEST_DATA) == False:
   os.mkdir(DEST_DATA) 

# Donwload

In [82]:
sections = None
file_sections = DEST_DATA + "sezioni_elettorali.shp"
if os.path.exists(file_sections):
    sections = gpd.read_file(file_sections)
else:
    sections = gpd.read_file(URL_SECTIONS)
    sections.to_file(file_sections)

In [83]:
sections.head(1)

Unnamed: 0,vicodi,desvia,numero,barra,seg_elett,sez_elett,sede,chiave,civico_alf,geometry
0,1125,VIA G. BARTALI,23,,427,40,SCUOLA ELEMENTARE CLARINA,1125 23,23,POINT (664844.863 5100832.255)


In [84]:
districts = None
file_districts = "circoscrizioni.shp"
if os.path.exists(file_districts):
    districts = gpd.read_file(file_districts)
else:
    districts = gpd.read_file(URL_DISTRICTS)
    districts.to_file(file_districts)

In [85]:
districts.head(1)

Unnamed: 0,numero_cir,area,perimetro,nome,fumetto,geometry
0,2,15712181,17850,MEANO,Circoscrizione n. 2 - MEANO,"POLYGON ((666651.786 5109264.902, 666650.353 5..."


In [86]:
preferences_candidates = None
file_preferences_candidates = DEST_DATA + "Preferenze.csv"
if os.path.exists(file_preferences_candidates):
    preferences_candidates = pd.read_csv(file_preferences_candidates,sep=";",encoding="ISO-8859-15")
else:
    preferences_candidates = pd.read_csv(URL_PREFERENCES_CANDIDATES,sep=";",encoding="ISO-8859-15")
    preferences_candidates.to_csv(file_preferences_candidates,sep=";",encoding="ISO-8859-15",index=False)

In [87]:
preferences_groups = None
file_preferences_groups = DEST_DATA + "Voti_Lista.csv"
if os.path.exists(file_preferences_groups):
    preferences_groups = pd.read_csv(file_preferences_groups,sep=";",encoding="ISO-8859-15")
else:
    preferences_groups = pd.read_csv(URL_PREFERENCES_GROUPS,sep=";",encoding="ISO-8859-15")
    preferences_groups.to_csv(file_preferences_groups,sep=";",encoding="ISO-8859-15",index=False)

In [88]:
preferences_mayors = None
file_preferences_mayors = DEST_DATA + "Voti_Sindaco.csv"
if os.path.exists(file_preferences_mayors):
    preferences_mayors = pd.read_csv(file_preferences_mayors,sep=";",encoding="ISO-8859-15")
else:
    preferences_mayors = pd.read_csv(URL_PREFERENCES_MAYORS,sep=";",encoding="ISO-8859-15")
    preferences_mayors.to_csv(file_preferences_mayors,sep=";",encoding="ISO-8859-15",index=False)

# Data preparation

In [89]:
columns_districts = {
    'numero_cir':'id_district',
    'nome':'district'}
districts.rename(columns=columns_districts,inplace=True)
districts = districts[['id_district','district','geometry']].to_crs(epsg=4326)

In [90]:
districts.columns

Index(['id_district', 'district', 'geometry'], dtype='object')

In [91]:
columns_sections =  {
    "sez_elett": "id_section",
    "sede":"station",
    "desvia":"streetname",
    "civico_alf":"housenumber"
}
sections.rename(columns=columns_sections,inplace=True)
sections = gpd.sjoin(sections, districts, how='left', predicate='within')
stations = sections[['id_section','station']].drop_duplicates().reset_index()[['id_section','station']]
sections = sections[['id_district','district','id_section','station','streetname','housenumber','geometry']].to_crs(epsg=4326)


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:25832
Right CRS: EPSG:4326

  sections = gpd.sjoin(sections, districts, how='left', predicate='within')


In [92]:
stations = sections[['id_section','station']].drop_duplicates().reset_index()[['id_section','station']]


In [93]:
preferences_columns = {
    'Sezione':'id_section',
    'Cod. circoscrizione':'id_district',
    'Circoscrizione':'district',
    'Nome Lista':'group',
    'Cognome':'lastname',
    'Nome':'name',
    'Nome Detto':'nickname',
    'Voti':'votes'
}
preferences_candidates.rename(columns=preferences_columns,inplace=True)


In [94]:
preferences_columns = {
    'Sezione':'id_section',
    'Cod. circoscrizione':'id_district',
    'Circoscrizione':'district',
    'Lista':'group',
    'Sindaco':'supportedmayor',
    'Voti':'votes'
}
preferences_groups.rename(columns=preferences_columns,inplace=True)
preferences_groups = preferences_groups.dropna(subset=['supportedmayor'])

In [95]:
preferences_groups['id_section'] = preferences_groups['id_section'].fillna(-1)
preferences_groups['id_section'] = preferences_groups['id_section'].astype(int)
preferences_groups['id_district'] = preferences_groups['id_district'].fillna(-1)
preferences_groups['id_district'] = preferences_groups['id_district'].astype(int)

In [96]:
preferences_candidates = preferences_candidates.merge(
    preferences_groups[['group', 'supportedmayor']].drop_duplicates(),
    on='group',
    how='left'
)

In [97]:
preferences_candidates = preferences_candidates[[
    'id_section','id_district','district','supportedmayor',
    'group','lastname','name','nickname','votes']]
preferences_candidates['nickname'] = preferences_candidates['nickname'].fillna("")

In [98]:
preferences_columns = {
    "Sezione":"id_section",
    "Cod. circoscrizione":"id_district",
    "Circoscrizione":"district",
    "Schede Bianche":"blanks",
    "Schede nulle o contenenti solo voti nulli":"canceled",
    "Voti contestati e non attribuiti":"disputed",
    "Cognome":"lastname",
    "Nome":"name",
    "Voti":"votes"
}
preferences_mayors.rename(columns=preferences_columns,inplace=True)

# Export data

In [99]:
def to_json(df,filename):
    records = df.to_dict(orient="records")
    json_output = json.dumps(records, indent=2)
    # Salva il file JSON
    json_path = filename
    with open(json_path, "w", encoding="utf-8") as f:
        f.write(json_output)
    json_path

In [100]:
districts.to_file(DEST_DATA + "districts.geojson",driver="GeoJSON",encoding="utf-8")

In [101]:
sections.to_file(DEST_DATA + "sections.geojson",driver="GeoJSON",encoding="utf-8")

In [102]:
stations.to_csv(DEST_DATA + "stations.csv",index=False,encoding="utf-8")
to_json(stations,DEST_DATA + "stations.json")

In [103]:
preferences_candidates.to_csv(DEST_DATA + 'preferences_candidates.csv',index=False,encoding="utf-8")
to_json(preferences_candidates,DEST_DATA + "preferences_candidates.json")

In [104]:
preferences_groups.to_csv(DEST_DATA + 'preferences_groups.csv',index=False,encoding="utf-8")
to_json(preferences_groups,DEST_DATA + "preferences_groups.json")

In [105]:
preferences_mayors.to_csv(DEST_DATA + 'preferences_mayors.csv',index=False,encoding="utf-8")
to_json(preferences_mayors,DEST_DATA + "preferences_mayors.json")

# PMTiles

In [106]:
geojson_file = DEST_DATA + 'sections.geojson'
mbtiles_file = DEST_DATA + 'sections.mbtiles'
tippecanoe = [
    "tippecanoe",
    "--force",
    "-o", mbtiles_file,
    "-l", "sections",
    "-pC",
    "-z20",
    "-Z10",
    "-r1",
    "--no-feature-limit",
    "--no-tile-size-limit",
    geojson_file
]

result = subprocess.run(tippecanoe, capture_output=True, text=True)

In [107]:
geojson_file = DEST_DATA + 'districts.geojson'
mbtiles_file = DEST_DATA + 'districts.mbtiles'
tippecanoe = [
    "tippecanoe",
    "--force",
    "-o", mbtiles_file,
    "-l", "districts",
    "-pC",
    "-z20",
    "-Z10",
    "-r1",
    "--no-feature-limit",
    "--no-tile-size-limit",
    geojson_file
]

result = subprocess.run(tippecanoe, capture_output=True, text=True)

In [108]:
mbtiles_file = DEST_DATA + 'sections.mbtiles'
pmtiles_file = DEST_DATA + "sections.pmtiles"

pmtiles = [
    "pmtiles", "convert",
    mbtiles_file,
    pmtiles_file
]

result = subprocess.run(pmtiles, capture_output=True, text=True)


In [109]:
mbtiles_file = DEST_DATA + 'districts.mbtiles'
pmtiles_file = DEST_DATA + "districts.pmtiles"

pmtiles = [
    "pmtiles", "convert",
    mbtiles_file,
    pmtiles_file
]

result = subprocess.run(pmtiles, capture_output=True, text=True)

# Data in PBF

In [110]:
def json_to_pbf(json_path):
    # Derive filenames based on input JSON
    base_name = os.path.splitext(os.path.basename(json_path))[0]
    proto_file = f"{base_name}.proto"
    pb_py_file = f"{base_name}_pb2.py"
    pbf_file = f"{base_name}.pbf"

    # Remove existing files if present
    for path in [proto_file, pb_py_file, pbf_file]:
        if os.path.exists(path):
            os.remove(path)

    # Step 1 – Load JSON data
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    if not isinstance(data, list) or not data:
        raise ValueError("The JSON file must contain a non-empty list of objects.")

    sample = data[0]

    # Map Python types to Protobuf types
    def infer_type(value):
        if isinstance(value, int):
            return "int32"
        elif isinstance(value, float):
            return "float"
        elif isinstance(value, bool):
            return "bool"
        else:
            return "string"

    msg_name = base_name.capitalize()
    proto_lines = [
        'syntax = "proto3";\n',
        f'message {msg_name} {{\n'
    ]
    for idx, (key, value) in enumerate(sample.items(), start=1):
        field_type = infer_type(value)
        proto_lines.append(f"  {field_type} {key} = {idx};\n")
    proto_lines.append("}\n\n")
    proto_lines.append(f"message {msg_name}List {{\n")
    proto_lines.append(f"  repeated {msg_name} items = 1;\n")
    proto_lines.append("}\n")

    with open(proto_file, "w", encoding="utf-8") as f:
        f.writelines(proto_lines)

    # Step 3 – Compile the .proto file
    subprocess.run(["protoc", f"--python_out=.", proto_file], check=True)

    # Step 4 – Dynamically import the compiled Python module
    spec = importlib.util.spec_from_file_location(f"{base_name}_pb2", f"./{base_name}_pb2.py")
    pb_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(pb_module)

    MsgClass = getattr(pb_module, msg_name)
    ContainerClass = getattr(pb_module, f"{msg_name}List")

    # Step 5 – Populate Protobuf container and write to .pbf
    container = ContainerClass()
    for item in data:
        entry = container.items.add()
        for key, value in item.items():
            setattr(entry, key, value)

    with open(pbf_file, "wb") as f:
        f.write(container.SerializeToString())

    return pbf_file

In [111]:
out = json_to_pbf(DEST_DATA + "preferences_candidates.json")

In [112]:
out = json_to_pbf(DEST_DATA + "preferences_mayors.json")

In [113]:
out = json_to_pbf(DEST_DATA + "preferences_groups.json")

In [114]:
out = json_to_pbf(DEST_DATA + "stations.json")

In [115]:
for proto_file in glob.glob("*.proto"):
    shutil.copy(proto_file, DEST_DATA)

In [116]:
for proto_file in glob.glob("*.pbf"):
    shutil.copy(proto_file, DEST_DATA)

# Testing data

In [117]:
from fuzzywuzzy import process

def get_section_code(address, housenumber):
    # Estrai il nome della via dall'indirizzo
    streetname = address.upper()
    
    # Trova la via più simile in sections
    best_match = process.extractOne(streetname, sections['streetname'])
    
    if best_match:
        matched_street = best_match[0]
        # Filtra per via e numero civico
        result = sections[(sections['streetname'] == matched_street) & (sections['housenumber'] == housenumber)]
        if not result.empty:
            return result.iloc[0]['id_section']
    return None

def get_votes_by_candidate_and_section(first_name, last_name, section_id):
    # Filtra il dataframe preferences_candidates per nome, cognome e id_section
    filtered = preferences_candidates[
        (preferences_candidates['name'] == first_name.upper()) &
        (preferences_candidates['lastname'] == last_name.upper()) &
        (preferences_candidates['id_section'] == section_id)
    ]
    
    # Restituisci il numero di voti se esiste una corrispondenza
    if not filtered.empty:
        return filtered.iloc[0]['votes']
    return 0


In [None]:
first_name = "Maurizio"
last_name = "Napolitano"
address = ""
housenumber = "1"
section_id = get_section_code(address, housenumber)
votes = get_votes_by_candidate_and_section(first_name, last_name, section_id)
print(f"Numero di voti per {first_name} {last_name} nella sezione {section_id}: {votes}")

Numero di voti per Maurizio Napolitano nella sezione 68: 2


In [122]:
preferences_candidates[(preferences_candidates.lastname == "NAPOLITANO") &
                       (preferences_candidates.id_section == section_id)]

Unnamed: 0,id_section,id_district,district,supportedmayor,group,lastname,name,nickname,votes
39166,68,6,ARGENTARIO,IANESELLI FRANCO,INSIEME PER TRENTO,NAPOLITANO,MAURIZIO,,2
