In [10]:
import requests
import json
import pandas as pd
from time import sleep

In [11]:
df = pd.read_csv('../output/codici_umbria_er_20241117.csv', dtype=str)
df_comuni = df[df['tipo'] == 'CM']
# Filtering for only Emilia Romagna
df_er = df_comuni[df_comuni['cod_regione'] == '08']

In [12]:
BASE_URL = 'https://eleapi.interno.gov.it/siel/PX/votantiZ/DE/20241117/TE/07/PR/'

df_er['url_aff'] = df_er.apply(
    lambda row: f"{BASE_URL}{row['cod_provincia']}/{row['tipo']}/{row['cod_comune']}", axis=1
)

df_er['url'] = df_er.apply(
    lambda row: f"https://elezioni.interno.gov.it/risultati/20241117/regionali/votanti/italia/{row['cod_regione']}{row['cod_provincia']}{row['cod_comune']}", axis=1
)
df_er.sample(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_er['url_aff'] = df_er.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_er['url'] = df_er.apply(


Unnamed: 0,cod,desc,tipo,tipo_comune,dt_agg,cod_ele,cod_regione,regione,cod_provincia,provincia,cod_provincia_2,cod_comune,url_aff,url
151,80500500360,SAN FELICE SUL PANARO,CM,N,,7,8,EMILIA-ROMAGNA,50,MODENA,50,360,https://eleapi.interno.gov.it/siel/PX/votantiZ...,https://elezioni.interno.gov.it/risultati/2024...
55,80130130585,VALSAMOGGIA,CM,M,,7,8,EMILIA-ROMAGNA,13,BOLOGNA,13,585,https://eleapi.interno.gov.it/siel/PX/votantiZ...,https://elezioni.interno.gov.it/risultati/2024...
138,80500500240,MONTECRETO,CM,N,,7,8,EMILIA-ROMAGNA,50,MODENA,50,240,https://eleapi.interno.gov.it/siel/PX/votantiZ...,https://elezioni.interno.gov.it/risultati/2024...


In [13]:
headers = {
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'en-US,en;q=0.6',
    'dnt': '1',
    'origin': 'https://elezioni.interno.gov.it',
    'priority': 'u=1, i',
    'referer': 'https://elezioni.interno.gov.it/',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'sec-gpc': '1'
}

In [14]:
print(f"Total URLs to process: {len(df_er['url_aff'])}")

comuni_data = []

for idx, url in enumerate(df_er['url_aff']):
    print(f"\nProcessing record {idx + 1}")
    print(f"URL: {url}")

    response = requests.get(url, headers=headers)
    print(f"Response status code: {response.status_code}")
    
    data = response.json()
    
    provincia = df_er.iloc[idx]['provincia']
    regione = df_er.iloc[idx]['regione']
    url = df_er.iloc[idx]['url']
    print(f"Processing data for Provincia: {provincia}, Regione: {regione}")

    # Get total number of updates available
    n_updates = len(data['enti']['ente_p']['com_vot'])
    print(f"Number of updates available: {n_updates}")

    # Process each update
    for n_update in range(n_updates):
        raw_dt_com = str(data['enti']['ente_p']['com_vot'][n_update]['dt_com'])
        date = raw_dt_com[:8]
        time = raw_dt_com[8:12]

        formatted_date = f"{date[:4]}-{date[4:6]}-{date[6:]}"
        formatted_time = f"{time[:2]}:{time[2:]}"
        print(f"Processing update {n_update + 1}/{n_updates} - Timestamp: {formatted_date} {formatted_time}")

        comune_info = {
            'ele_info': data['int']['t_ele'],
            'comune': data['enti']['ente_p']['desc'],
            'provincia': provincia,
            'regione': regione,
            'ele_f': data['enti']['ente_p']['ele_f'],
            'ele_m': data['enti']['ente_p']['ele_m'],
            'ele_t': data['enti']['ente_p']['ele_t'],
            'perc': data['enti']['ente_p']['com_vot'][n_update]['perc'],
            'perc_r': data['enti']['ente_p']['com_vot'][n_update]['perc_r'],
            'vot_t': data['enti']['ente_p']['com_vot'][n_update]['vot_t'],
            'data': formatted_date,
            'ora': formatted_time,
            'update_number': n_update,
            'url': url
        }
        
        print(f"Comune: {comune_info['comune']} - Update {n_update}")
        print(f"Electoral data - Total: {comune_info['ele_t']}, Female: {comune_info['ele_f']}, Male: {comune_info['ele_m']}")
        print(f"Voting percentage: {comune_info['perc']}%, Reporting percentage: {comune_info['perc_r']}%")

        comuni_data.append(comune_info)
    
    print(f"Waiting 0.2 seconds before next request...")
    sleep(0.2)

print("\nCreating DataFrame...")
df_data = pd.DataFrame(comuni_data)

print("\nConverting percentage columns to float...")
df_data['perc_r'] = df_data['perc_r'].str.replace(',', '.').astype(float)
df_data['perc'] = df_data['perc'].str.replace(',', '.').astype(float)

print("\nFinal DataFrame shape:", df_data.shape)
print("\nSample of processed data:")
print(df_data.head())

Total URLs to process: 330

Processing record 1
URL: https://eleapi.interno.gov.it/siel/PX/votantiZ/DE/20241117/TE/07/PR/013/CM/0005
Response status code: 200
Processing data for Provincia: BOLOGNA, Regione: EMILIA-ROMAGNA
Number of updates available: 4
Processing update 1/4 - Timestamp: 2024-11-17 12:00
Comune: ALTO RENO TERME - Update 0
Electoral data - Total: 6036, Female: 3057, Male: 2979
Voting percentage: 8,90%, Reporting percentage: 21,45%
Processing update 2/4 - Timestamp: 2024-11-17 19:00
Comune: ALTO RENO TERME - Update 1
Electoral data - Total: 6036, Female: 3057, Male: 2979
Voting percentage: 26,34%, Reporting percentage: 60,27%
Processing update 3/4 - Timestamp: 2024-11-17 23:00
Comune: ALTO RENO TERME - Update 2
Electoral data - Total: 6036, Female: 3057, Male: 2979
Voting percentage: 31,44%, Reporting percentage: 68,00%
Processing update 4/4 - Timestamp: 2024-11-18 15:00
Comune: ALTO RENO TERME - Update 3
Electoral data - Total: 6036, Female: 3057, Male: 2979
Voting perc

In [15]:
df_data['comune'] = df_data['comune'].str.title()
df_data['provincia'] = df_data['provincia'].str.title()
df_data['regione'] = df_data['regione'].str.title()
df_data

Unnamed: 0,ele_info,comune,provincia,regione,ele_f,ele_m,ele_t,perc,perc_r,vot_t,data,ora,update_number,url
0,Regionali Speciali,Alto Reno Terme,Bologna,Emilia-Romagna,3057,2979,6036,8.90,21.45,537,2024-11-17,12:00,0,https://elezioni.interno.gov.it/risultati/2024...
1,Regionali Speciali,Alto Reno Terme,Bologna,Emilia-Romagna,3057,2979,6036,26.34,60.27,1590,2024-11-17,19:00,1,https://elezioni.interno.gov.it/risultati/2024...
2,Regionali Speciali,Alto Reno Terme,Bologna,Emilia-Romagna,3057,2979,6036,31.44,68.00,1898,2024-11-17,23:00,2,https://elezioni.interno.gov.it/risultati/2024...
3,Regionali Speciali,Alto Reno Terme,Bologna,Emilia-Romagna,3057,2979,6036,0.00,0.00,0,2024-11-18,15:00,3,https://elezioni.interno.gov.it/risultati/2024...
4,Regionali Speciali,Anzola Dell'Emilia,Bologna,Emilia-Romagna,4867,4800,9667,14.54,27.20,1406,2024-11-17,12:00,0,https://elezioni.interno.gov.it/risultati/2024...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1315,Regionali Speciali,Talamello,Rimini,Emilia-Romagna,587,568,1155,0.00,0.00,0,2024-11-18,15:00,3,https://elezioni.interno.gov.it/risultati/2024...
1316,Regionali Speciali,Verucchio,Rimini,Emilia-Romagna,4475,4402,8877,8.45,22.18,750,2024-11-17,12:00,0,https://elezioni.interno.gov.it/risultati/2024...
1317,Regionali Speciali,Verucchio,Rimini,Emilia-Romagna,4475,4402,8877,24.87,56.36,2208,2024-11-17,19:00,1,https://elezioni.interno.gov.it/risultati/2024...
1318,Regionali Speciali,Verucchio,Rimini,Emilia-Romagna,4475,4402,8877,29.17,64.83,2589,2024-11-17,23:00,2,https://elezioni.interno.gov.it/risultati/2024...


In [16]:
df_data['perc_diff'] = round(df_data['perc']-df_data['perc_r'],2)
df_data.sample(5)

Unnamed: 0,ele_info,comune,provincia,regione,ele_f,ele_m,ele_t,perc,perc_r,vot_t,data,ora,update_number,url,perc_diff
265,Regionali Speciali,Masi Torello,Ferrara,Emilia-Romagna,993,874,1867,33.96,64.37,634,2024-11-17,19:00,1,https://elezioni.interno.gov.it/risultati/2024...,-30.41
1231,Regionali Speciali,Gemmano,Rimini,Emilia-Romagna,628,657,1285,0.0,0.0,0,2024-11-18,15:00,3,https://elezioni.interno.gov.it/risultati/2024...,0.0
1216,Regionali Speciali,Casteldelci,Rimini,Emilia-Romagna,160,166,326,11.35,21.39,37,2024-11-17,12:00,0,https://elezioni.interno.gov.it/risultati/2024...,-10.04
1026,Regionali Speciali,Ravenna,Ravenna,Emilia-Romagna,65187,61187,126374,37.75,68.94,45630,2024-11-17,23:00,2,https://elezioni.interno.gov.it/risultati/2024...,-31.19
1239,Regionali Speciali,Misano Adriatico,Rimini,Emilia-Romagna,5898,5843,11741,0.0,0.0,0,2024-11-18,15:00,3,https://elezioni.interno.gov.it/risultati/2024...,0.0


In [17]:
df_data.to_csv('../output/affluenze_er.csv', index=False, encoding='UTF-8-sig')

In [18]:
# Find the latest update_number where perc is not 0
latest_update = df_data[df_data['perc'] != 0]['update_number'].max()
print(f"Latest update number with non-zero percentages: {latest_update}")

# Filter DataFrame to only include rows from that update
latest_data = df_data[df_data['update_number'] == latest_update]
print(f"\nShape of filtered data: {latest_data.shape}")

# Remove rows where 'comune' is either 'Sassofeltrio' or 'Montecopiolo'
latest_data = latest_data[~latest_data['comune'].isin(['Sassofeltrio', 'Montecopiolo'])]

# Use .str.replace() to handle string replacement more safely
latest_data['comune'] = latest_data['comune'].str.replace("Forli'", "Forlì", regex=False)
latest_data['comune'] = latest_data['comune'].str.replace("Montescudo - Monte Colombo", "Montescudo-Monte Colombo", regex=False)



# Save to CSV
latest_data.to_csv('../output/viz/affluenze_er_latest.csv', index=False, encoding='UTF-8-sig')

Latest update number with non-zero percentages: 2

Shape of filtered data: (330, 15)
