In [None]:
import requests
import json
import pandas as pd
from time import sleep

In [None]:
df = pd.read_csv('../output/codici_umbria_er.csv', dtype=str)
df_comuni = df[df['tipo'] == 'CM']
# Filtering for only Emilia Romagna
df_er = df_comuni[df_comuni['cod_regione'] == '08']

In [None]:
BASE_URL = 'https://eleapi.interno.gov.it/siel/PX/votantiZ/DE/20241117/TE/07/PR/'

df_er['url_aff'] = df_er.apply(
    lambda row: f"{BASE_URL}{row['cod_provincia']}/{row['tipo']}/{row['cod_comune']}", axis=1
)

df_er['url'] = df_er.apply(
    lambda row: f"https://elezioni.interno.gov.it/risultati/20241117/regionali/votanti/italia/{row['cod_regione']}{row['cod_provincia']}{row['cod_comune']}", axis=1
)
df_er.sample(3)

In [None]:
headers = {
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'en-US,en;q=0.6',
    'dnt': '1',
    'origin': 'https://elezioni.interno.gov.it',
    'priority': 'u=1, i',
    'referer': 'https://elezioni.interno.gov.it/',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'sec-gpc': '1'
}

In [None]:
print(f"Total URLs to process: {len(df_er['url_aff'])}")

comuni_data = []

for idx, url in enumerate(df_er['url_aff']):
    print(f"\nProcessing record {idx + 1}")
    print(f"URL: {url}")

    response = requests.get(url, headers=headers)
    print(f"Response status code: {response.status_code}")
    
    data = response.json()
    
    provincia = df_er.iloc[idx]['provincia']
    regione = df_er.iloc[idx]['regione']
    url = df_er.iloc[idx]['url']
    print(f"Processing data for Provincia: {provincia}, Regione: {regione}")

    # Get total number of updates available
    n_updates = len(data['enti']['ente_p']['com_vot'])
    print(f"Number of updates available: {n_updates}")

    # Process each update
    for n_update in range(n_updates):
        raw_dt_com = str(data['enti']['ente_p']['com_vot'][n_update]['dt_com'])
        date = raw_dt_com[:8]
        time = raw_dt_com[8:12]

        formatted_date = f"{date[:4]}-{date[4:6]}-{date[6:]}"
        formatted_time = f"{time[:2]}:{time[2:]}"
        print(f"Processing update {n_update + 1}/{n_updates} - Timestamp: {formatted_date} {formatted_time}")

        comune_info = {
            'ele_info': data['int']['t_ele'],
            'comune': data['enti']['ente_p']['desc'],
            'provincia': provincia,
            'regione': regione,
            'ele_f': data['enti']['ente_p']['ele_f'],
            'ele_m': data['enti']['ente_p']['ele_m'],
            'ele_t': data['enti']['ente_p']['ele_t'],
            'perc': data['enti']['ente_p']['com_vot'][n_update]['perc'],
            'perc_r': data['enti']['ente_p']['com_vot'][n_update]['perc_r'],
            'vot_t': data['enti']['ente_p']['com_vot'][n_update]['vot_t'],
            'data': formatted_date,
            'ora': formatted_time,
            'update_number': n_update,
            'url': url
        }
        
        print(f"Comune: {comune_info['comune']} - Update {n_update}")
        print(f"Electoral data - Total: {comune_info['ele_t']}, Female: {comune_info['ele_f']}, Male: {comune_info['ele_m']}")
        print(f"Voting percentage: {comune_info['perc']}%, Reporting percentage: {comune_info['perc_r']}%")

        comuni_data.append(comune_info)
    
    print(f"Waiting 0.2 seconds before next request...")
    sleep(0.2)

print("\nCreating DataFrame...")
df_data = pd.DataFrame(comuni_data)

print("\nConverting percentage columns to float...")
df_data['perc_r'] = df_data['perc_r'].str.replace(',', '.').astype(float)
df_data['perc'] = df_data['perc'].str.replace(',', '.').astype(float)

print("\nFinal DataFrame shape:", df_data.shape)
print("\nSample of processed data:")
print(df_data.head())

In [None]:
df_data['comune'] = df_data['comune'].str.title()
df_data['provincia'] = df_data['provincia'].str.title()
df_data['regione'] = df_data['regione'].str.title()
df_data

In [None]:
df_data['perc_diff'] = round(((df_data['perc'] - df_data['perc_r']) / df_data['perc_r']) * 100, 2)
df_data.sample(5)

In [None]:
df_data.to_csv('../output/affluenze_er.csv', index=False, encoding='UTF-8-sig')

In [None]:
# Find the latest update_number where perc is not 0
latest_update = df_data[df_data['perc'] != 0]['update_number'].max()
print(f"Latest update number with non-zero percentages: {latest_update}")

# Filter DataFrame to only include rows from that update
latest_data = df_data[df_data['update_number'] == latest_update]
print(f"\nShape of filtered data: {latest_data.shape}")

# Remove rows where 'comune' is either 'Sassofeltrio' or 'Montecopiolo'
latest_data = latest_data[~latest_data['comune'].isin(['Sassofeltrio', 'Montecopiolo'])]

# Use .str.replace() to handle string replacement more safely
latest_data['comune'] = latest_data['comune'].str.replace("Forli'", "Forlì", regex=False)
latest_data['comune'] = latest_data['comune'].str.replace("Montescudo - Monte Colombo", "Montescudo-Monte Colombo", regex=False)



# Save to CSV
latest_data.to_csv('../output/viz/affluenze_er_latest.csv', index=False, encoding='UTF-8-sig')