In [1]:
import requests
import json
import pandas as pd
from time import sleep

In [2]:
df = pd.read_csv('../output/codici_li.csv', dtype=str)
df_comuni = df[df['tipo'] == 'CM']

In [5]:
# Create a copy of the DataFrame
df_comuni = df_comuni.copy()

BASE_URL = 'https://eleapi.interno.gov.it/siel/PX/votantiZ/DE/20241027/TE/07/PR/'

df_comuni['url_aff'] = df_comuni.apply(
    lambda row: f"{BASE_URL}{row['cod_provincia']}/{row['tipo']}/{row['cod_comune']}", axis=1
)

df_comuni['url'] = df_comuni.apply(
    lambda row: f"https://elezioni.interno.gov.it/risultati/20241117/regionali/votanti/italia/{row['cod_regione']}{row['cod_provincia']}{row['cod_comune']}", axis=1
)
df_comuni.sample(3)

Unnamed: 0,cod,desc,tipo,tipo_comune,dt_agg,cod_ele,cod_regione,regione,cod_provincia,provincia,cod_provincia_2,cod_comune,url_aff,url
46,70340340440,PORTOFINO,CM,N,20241028195301,7,7,LIGURIA,34,GENOVA,34,440,https://eleapi.interno.gov.it/siel/PX/votantiZ...,https://elezioni.interno.gov.it/risultati/2024...
89,70370370180,CHIUSANICO,CM,N,20241028171252,7,7,LIGURIA,37,IMPERIA,37,180,https://eleapi.interno.gov.it/siel/PX/votantiZ...,https://elezioni.interno.gov.it/risultati/2024...
29,70340340270,ISOLA DEL CANTONE,CM,N,20241028184339,7,7,LIGURIA,34,GENOVA,34,270,https://eleapi.interno.gov.it/siel/PX/votantiZ...,https://elezioni.interno.gov.it/risultati/2024...


In [6]:
headers = {
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'en-US,en;q=0.6',
    'dnt': '1',
    'origin': 'https://elezioni.interno.gov.it',
    'priority': 'u=1, i',
    'referer': 'https://elezioni.interno.gov.it/',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'sec-gpc': '1'
}

In [11]:
print(f"Total URLs to process: {len(df_comuni['url_aff'])}")

comuni_data = []

for idx, url in enumerate(df_comuni['url_aff']):
    print(f"\nProcessing record {idx + 1}")
    print(f"URL: {url}")

    response = requests.get(url, headers=headers)
    print(f"Response status code: {response.status_code}")
    
    data = response.json()
    
    provincia = df_comuni.iloc[idx]['provincia']
    regione = df_comuni.iloc[idx]['regione']
    url = df_comuni.iloc[idx]['url']
    print(f"Processing data for Provincia: {provincia}, Regione: {regione}")

    # Get total number of updates available
    n_updates = len(data['enti']['ente_p']['com_vot'])
    print(f"Number of updates available: {n_updates}")

    # Process each update
    for n_update in range(n_updates):
        raw_dt_com = str(data['enti']['ente_p']['com_vot'][n_update]['dt_com'])
        date = raw_dt_com[:8]
        time = raw_dt_com[8:12]

        formatted_date = f"{date[:4]}-{date[4:6]}-{date[6:]}"
        formatted_time = f"{time[:2]}:{time[2:]}"
        print(f"Processing update {n_update + 1}/{n_updates} - Timestamp: {formatted_date} {formatted_time}")

        comune_info = {
            'ele_info': data['int']['t_ele'],
            'comune': data['enti']['ente_p']['desc'],
            'provincia': provincia,
            'regione': regione,
            'ele_f': data['enti']['ente_p']['ele_f'],
            'ele_m': data['enti']['ente_p']['ele_m'],
            'ele_t': data['enti']['ente_p']['ele_t'],
            'perc': data['enti']['ente_p']['com_vot'][n_update]['perc'],
            'perc_r': data['enti']['ente_p']['com_vot'][n_update]['perc_r'],
            'vot_t': data['enti']['ente_p']['com_vot'][n_update]['vot_t'],
            'data': formatted_date,
            'ora': formatted_time,
            'update_number': n_update,
            'url': url
        }
        
        print(f"Comune: {comune_info['comune']} - Update {n_update}")
        print(f"Electoral data - Total: {comune_info['ele_t']}, Female: {comune_info['ele_f']}, Male: {comune_info['ele_m']}")
        print(f"Voting percentage: {comune_info['perc']}%, Reporting percentage: {comune_info['perc_r']}%")

        comuni_data.append(comune_info)
    
    print(f"Waiting 0.2 seconds before next request...")
    sleep(0.2)

print("\nCreating DataFrame...")
df_data = pd.DataFrame(comuni_data)

print("\nConverting percentage columns to float...")
df_data['perc_r'] = df_data['perc_r'].str.replace(',', '.').astype(float)
df_data['perc'] = df_data['perc'].str.replace(',', '.').astype(float)

print("\nFinal DataFrame shape:", df_data.shape)
print("\nSample of processed data:")
print(df_data.head())

Total URLs to process: 234

Processing record 1
URL: https://eleapi.interno.gov.it/siel/PX/votantiZ/DE/20241027/TE/07/PR/034/CM/0010
Response status code: 200
Processing data for Provincia: GENOVA, Regione: LIGURIA
Number of updates available: 4
Processing update 1/4 - Timestamp: 2024-10-27 12:00
Comune: ARENZANO - Update 0
Electoral data - Total: 10317, Female: 5395, Male: 4922
Voting percentage: 17,57%, Reporting percentage: 14,54%
Processing update 2/4 - Timestamp: 2024-10-27 19:00
Comune: ARENZANO - Update 1
Electoral data - Total: 10317, Female: 5395, Male: 4922
Voting percentage: 36,95%, Reporting percentage: 34,74%
Processing update 3/4 - Timestamp: 2024-10-27 23:00
Comune: ARENZANO - Update 2
Electoral data - Total: 10317, Female: 5395, Male: 4922
Voting percentage: 42,13%, Reporting percentage: 44,89%
Processing update 4/4 - Timestamp: 2024-10-28 15:00
Comune: ARENZANO - Update 3
Electoral data - Total: 10317, Female: 5395, Male: 4922
Voting percentage: 56,54%, Reporting perce

In [12]:
df_data['comune'] = df_data['comune'].str.title()
df_data['provincia'] = df_data['provincia'].str.title()
df_data['regione'] = df_data['regione'].str.title()
df_data

Unnamed: 0,ele_info,comune,provincia,regione,ele_f,ele_m,ele_t,perc,perc_r,vot_t,data,ora,update_number,url
0,Regionali Speciali,Arenzano,Genova,Liguria,5395,4922,10317,17.57,14.54,1813,2024-10-27,12:00,0,https://elezioni.interno.gov.it/risultati/2024...
1,Regionali Speciali,Arenzano,Genova,Liguria,5395,4922,10317,36.95,34.74,3812,2024-10-27,19:00,1,https://elezioni.interno.gov.it/risultati/2024...
2,Regionali Speciali,Arenzano,Genova,Liguria,5395,4922,10317,42.13,44.89,4347,2024-10-27,23:00,2,https://elezioni.interno.gov.it/risultati/2024...
3,Regionali Speciali,Arenzano,Genova,Liguria,5395,4922,10317,56.54,62.39,5833,2024-10-28,15:00,3,https://elezioni.interno.gov.it/risultati/2024...
4,Regionali Speciali,Avegno,Genova,Liguria,1114,1130,2244,10.96,11.35,246,2024-10-27,12:00,0,https://elezioni.interno.gov.it/risultati/2024...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,Regionali Speciali,Villanova D'Albenga,Savona,Liguria,1177,1180,2357,48.15,65.87,1135,2024-10-28,15:00,3,https://elezioni.interno.gov.it/risultati/2024...
932,Regionali Speciali,Zuccarello,Savona,Liguria,165,152,317,13.88,16.29,44,2024-10-27,12:00,0,https://elezioni.interno.gov.it/risultati/2024...
933,Regionali Speciali,Zuccarello,Savona,Liguria,165,152,317,32.18,43.00,102,2024-10-27,19:00,1,https://elezioni.interno.gov.it/risultati/2024...
934,Regionali Speciali,Zuccarello,Savona,Liguria,165,152,317,36.28,50.81,115,2024-10-27,23:00,2,https://elezioni.interno.gov.it/risultati/2024...


In [13]:
df_data['perc_diff'] = round(((df_data['perc'] - df_data['perc_r']) / df_data['perc_r']) * 100, 2)
df_data.sample(5)

Unnamed: 0,ele_info,comune,provincia,regione,ele_f,ele_m,ele_t,perc,perc_r,vot_t,data,ora,update_number,url,perc_diff
364,Regionali Speciali,Diano Castello,Imperia,Liguria,948,907,1855,9.38,11.1,174,2024-10-27,12:00,0,https://elezioni.interno.gov.it/risultati/2024...,-15.5
382,Regionali Speciali,Dolcedo,Imperia,Liguria,492,526,1018,29.67,36.03,302,2024-10-27,23:00,2,https://elezioni.interno.gov.it/risultati/2024...,-17.65
694,Regionali Speciali,Bardineto,Savona,Liguria,377,374,751,26.9,40.75,202,2024-10-27,23:00,2,https://elezioni.interno.gov.it/risultati/2024...,-33.99
438,Regionali Speciali,Pompeiana,Imperia,Liguria,348,348,696,28.88,42.92,201,2024-10-27,23:00,2,https://elezioni.interno.gov.it/risultati/2024...,-32.71
455,Regionali Speciali,Ranzo,Imperia,Liguria,226,209,435,41.15,51.85,179,2024-10-28,15:00,3,https://elezioni.interno.gov.it/risultati/2024...,-20.64


In [14]:
df_data.to_csv('../output/affluenze_li.csv', index=False, encoding='UTF-8-sig')

In [15]:
# Find the latest update_number where perc is not 0
latest_update = df_data[df_data['perc'] != 0]['update_number'].max()
print(f"Latest update number with non-zero percentages: {latest_update}")

# Filter DataFrame to only include rows from that update
latest_data = df_data[df_data['update_number'] == latest_update]
print(f"\nShape of filtered data: {latest_data.shape}")

# Remove rows where 'comune' is either 'Sassofeltrio' or 'Montecopiolo'
latest_data = latest_data[~latest_data['comune'].isin(['Sassofeltrio', 'Montecopiolo'])]

# Use .str.replace() to handle string replacement more safely
latest_data['comune'] = latest_data['comune'].str.replace("Forli'", "Forlì", regex=False)
latest_data['comune'] = latest_data['comune'].str.replace("Montescudo - Monte Colombo", "Montescudo-Monte Colombo", regex=False)



# Save to CSV
latest_data.to_csv('../output/viz/affluenze_li_latest.csv', index=False, encoding='UTF-8-sig')

Latest update number with non-zero percentages: 3

Shape of filtered data: (234, 15)
