In [49]:
import pandas as pd
from time import sleep
import requests
from dotenv import load_dotenv
import os

In [50]:
# Load environment variables
load_dotenv()

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-US,en;q=0.7',
    'Authorization': 'Bearer null',
    'Connection': 'keep-alive',
    'Referer': 'https://salute.regione.veneto.it/ivgStatistiche/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-GPC': '1',
    'User-Agent': os.getenv('USER_AGENT'),
    'dnt': '1',
    'sec-ch-ua': f'"Brave";v="{os.getenv("BROWSER_VERSION")}", "Chromium";v="{os.getenv("BROWSER_VERSION")}", "Not_A Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': f'"{os.getenv("PLATFORM")}"',
}

In [51]:
plot_ids = list(range(1, 21))
year_ids = [1,2,3,4,5,6,7,8,9]
facility_ids = list(range(101, 111)) + list(range(112, 123)) + list(range(501, 510)) + [901, 912]

In [52]:
for plot_id in plot_ids:
    print(f"\nProcessing plot_id: {plot_id}")
    # Initialize empty lists to store the data
    all_data = []

    for year_id in year_ids:
        print(f"\n  Year: {year_id}")
        for facility_id in facility_ids:
            print(f"    Processing facility: {facility_id}", end='\r')
            
            params = {
                'annoA': year_id,
                'annoDa': year_id,
                'idAzienda': facility_id,
                'idTipoGrafico': plot_id,
                'isCategoriaEnable': 'false',
                'isCategoriaMax1': 'false',
            }

            try:
                response = requests.get('https://salute.regione.veneto.it/ivgStatistiche/api/grafico', params=params, headers=headers)
                response.raise_for_status()  # Check for HTTP errors
                data = response.json()

                if 'graficoBarre' in data and data['graficoBarre'] is not None:
                    print(f"    Found graficoBarre for facility {facility_id}")
                    
                    try:
                        for item in data['graficoBarre']['list']:
                            nome = item['name']
                            for categoria in data['graficoBarre']['categorie']:
                                cat_name = categoria['id']
                                if cat_name in item:
                                    all_data.append({
                                        'nome_azienda': facility_id,
                                        'nome': nome,
                                        'categoria': cat_name,
                                        'valori': item[cat_name],
                                        'anno': year_id
                                    })
                    except KeyError as e:
                        print(f"    Error processing graficoBarre data: {e}")
                        print(f"    Data structure: {data['graficoBarre']}")

                elif 'graficoTortaDto' in data and data['graficoTortaDto'] is not None:
                    print(f"    Found graficoTortaDto for facility {facility_id}")
                    
                    try:
                        for item in data['graficoTortaDto']:
                            all_data.append({
                                'nome_azienda': facility_id,
                                'categoria': item['name'],
                                'valori': item['value'],
                                'anno': year_id
                            })
                    except KeyError as e:
                        print(f"    Error processing graficoTortaDto data: {e}")
                        print(f"    Data structure: {data['graficoTortaDto']}")

            except requests.exceptions.RequestException as e:
                print(f"\n    Error making request for facility {facility_id}: {e}")
                continue
            except ValueError as e:
                print(f"\n    Error parsing JSON for facility {facility_id}: {e}")
                continue
            
            sleep(0.1)
        sleep(0.2)
    print(f"\nCollected {len(all_data)} records for plot_id {plot_id}")
    
    try:
        df = pd.DataFrame(all_data)
        title = data['title'].split(' - ')[0].lower().replace(' ', '_')
        output_path = f'../output/raw/{title}.csv'
        df.to_csv(output_path, index=False, encoding='utf-8')
        print(f"Successfully saved data to {output_path}")
        print(f"DataFrame shape: {df.shape}")
        print(f"Columns: {df.columns.tolist()}")
    except Exception as e:
        print(f"Error saving data to CSV: {e}")


Processing plot_id: 1

  Year: 1
    Found graficoTortaDto for facility 101
    Found graficoTortaDto for facility 102
    Found graficoTortaDto for facility 103
    Found graficoTortaDto for facility 104
    Found graficoTortaDto for facility 105
    Found graficoTortaDto for facility 106
    Found graficoTortaDto for facility 107
    Found graficoTortaDto for facility 108
    Found graficoTortaDto for facility 109
    Found graficoTortaDto for facility 110
    Found graficoTortaDto for facility 112
    Found graficoTortaDto for facility 113
    Found graficoTortaDto for facility 114
    Found graficoTortaDto for facility 115
    Found graficoTortaDto for facility 116
    Found graficoTortaDto for facility 117
    Found graficoTortaDto for facility 118
    Found graficoTortaDto for facility 119
    Found graficoTortaDto for facility 120
    Found graficoTortaDto for facility 121
    Found graficoTortaDto for facility 122
    Found graficoTortaDto for facility 501
    Found graficoTor