In [3]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

import pandas as pd

# set plotly as the default plotter
pd.options.plotting.backend = "plotly"

In [8]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import pendulum

def fetch_data(start_date, end_date, area_code):
    start = pendulum.parse(start_date)
    end = pendulum.parse(end_date)

    all_data = []
    current_start = start

    while current_start < end:
        current_end = min(current_start.add(days=90), end)

        url = f'https://apicarga.ons.org.br/prd/cargaverificada?dat_inicio={current_start.to_date_string()}&dat_fim={current_end.to_date_string()}&cod_areacarga={area_code}'

        session = requests.Session()
        retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
        session.mount('http://', HTTPAdapter(max_retries=retries))

        response = session.get(url)
        response.raise_for_status()

        chunk_data = response.json()

        # Check for correct number of records
        expected_records = (current_end.add(days=1) - current_start).in_hours() * 2
        if len(chunk_data) != expected_records:
            raise ValueError(f"Data integrity check failed. Expected {expected_records} records, but received {len(chunk_data)} for period {current_start.to_date_string()} to {current_end.to_date_string()}")

        all_data.extend(chunk_data)

        current_start = current_end.add(days=1)

    return all_data

# Example usage
try:
    data = fetch_data('2024-08-01', '2024-08-14', 'SP')
    print(f"Successfully retrieved {len(data)} records.")
except ValueError as e:
    print(f"Error: {str(e)}")
except requests.exceptions.RequestException as e:
    print(f"Request failed: {str(e)}")

Successfully retrieved 672 records.


In [9]:
df = pd.DataFrame(data).sort_values('din_referenciautc')
df

Unnamed: 0,cod_areacarga,din_atualizacao,dat_referencia,din_referenciautc,val_cargaglobal,val_cargaglobalcons,val_cargaglobalsmmgd,val_cargasupervisionada,val_carganaosupervisionada,val_cargammgd,val_consistencia
0,SP,2024-08-14T03:18:09.750Z,2024-08-01,2024-08-01T03:30:00.000Z,15984.807,15984.807,15968.517,14550.327,1418.1892,16.2900,0
1,SP,2024-08-14T03:18:09.750Z,2024-08-01,2024-08-01T04:00:00.000Z,15317.397,15317.397,15301.107,13881.828,1419.2789,16.2900,0
2,SP,2024-08-14T03:18:09.750Z,2024-08-01,2024-08-01T04:30:00.000Z,14741.783,14741.783,14725.493,13305.545,1419.9484,16.2900,0
3,SP,2024-08-14T03:18:09.750Z,2024-08-01,2024-08-01T05:00:00.000Z,14382.717,14382.717,14366.427,12947.011,1419.4161,16.2900,0
4,SP,2024-08-14T03:18:09.750Z,2024-08-01,2024-08-01T05:30:00.000Z,14062.006,14062.006,14045.716,12627.370,1418.3458,16.2900,0
...,...,...,...,...,...,...,...,...,...,...,...
667,SP,2024-08-15T02:01:23.915Z,2024-08-14,2024-08-15T01:00:00.000Z,20064.450,20064.450,20048.160,18639.084,1409.0771,16.2900,0
668,SP,2024-08-15T02:01:23.915Z,2024-08-14,2024-08-15T01:30:00.000Z,19048.002,19048.002,19031.525,17618.805,1412.7210,16.4758,0
669,SP,2024-08-15T02:01:23.915Z,2024-08-14,2024-08-15T02:00:00.000Z,18232.314,18232.314,18215.951,16800.266,1415.6857,16.3642,0
670,SP,2024-08-15T02:01:23.915Z,2024-08-14,2024-08-15T02:30:00.000Z,0.000,0.000,0.000,0.000,1418.4727,16.2900,0


In [10]:
df.plot(x='din_referenciautc', y='val_cargaglobal', title='Carga verificada em SP')

In [11]:
# convert to datetime
df['din_referenciautc'] = pd.to_datetime(df['din_referenciautc'])
df['din_atualizacao'] = pd.to_datetime(df['din_atualizacao'])

df['delay'] = (df['din_atualizacao'] - df['din_referenciautc']).dt.days

df.delay.describe()

df.plot(x='din_referenciautc', y='delay', title='Delay de atualização')