In [1]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

import pandas as pd

# set plotly as the default plotter
pd.options.plotting.backend = "plotly"

In [2]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import pendulum

def fetch_data(start_date, end_date, area_code):
    start = pendulum.parse(start_date)
    end = pendulum.parse(end_date)

    all_data = []
    current_start = start

    while current_start < end:
        current_end = min(current_start.add(days=90), end)

        url = f'https://apicarga.ons.org.br/prd/cargaverificada?dat_inicio={current_start.to_date_string()}&dat_fim={current_end.to_date_string()}&cod_areacarga={area_code}'

        session = requests.Session()
        retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
        session.mount('http://', HTTPAdapter(max_retries=retries))

        response = session.get(url)
        response.raise_for_status()

        chunk_data = response.json()

        # Check for correct number of records
        expected_records = (current_end.add(days=1) - current_start).in_hours() * 2
        if len(chunk_data) != expected_records:
            raise ValueError(f"Data integrity check failed. Expected {expected_records} records, but received {len(chunk_data)} for period {current_start.to_date_string()} to {current_end.to_date_string()}")

        all_data.extend(chunk_data)

        current_start = current_end.add(days=1)

    return all_data

# Example usage
try:
    data = fetch_data('2024-08-01', '2024-08-14', 'SP')
    print(f"Successfully retrieved {len(data)} records.")
except ValueError as e:
    print(f"Error: {str(e)}")
except requests.exceptions.RequestException as e:
    print(f"Request failed: {str(e)}")

Successfully retrieved 672 records.


In [3]:
df = pd.DataFrame(data).sort_values('din_referenciautc')
df

Unnamed: 0,cod_areacarga,din_atualizacao,dat_referencia,din_referenciautc,val_cargaglobal,val_cargaglobalcons,val_cargaglobalsmmgd,val_cargasupervisionada,val_carganaosupervisionada,val_cargammgd,val_consistencia
0,SP,2024-08-23T03:18:44.697Z,2024-08-01,2024-08-01T03:30:00.000Z,15991.316,15991.316,15975.026,14550.327,1424.6990,16.2900,0
1,SP,2024-08-23T03:18:44.697Z,2024-08-01,2024-08-01T04:00:00.000Z,15323.893,15323.893,15307.603,13881.828,1425.7742,16.2900,0
2,SP,2024-08-23T03:18:44.697Z,2024-08-01,2024-08-01T04:30:00.000Z,14748.265,14748.265,14731.975,13305.545,1426.4299,16.2900,0
3,SP,2024-08-23T03:18:44.697Z,2024-08-01,2024-08-01T05:00:00.000Z,14389.187,14389.187,14372.896,12947.011,1425.8853,16.2900,0
4,SP,2024-08-23T03:18:44.697Z,2024-08-01,2024-08-01T05:30:00.000Z,14068.465,14068.465,14052.175,12627.370,1424.8046,16.2900,0
...,...,...,...,...,...,...,...,...,...,...,...
667,SP,2024-08-23T03:20:28.455Z,2024-08-14,2024-08-15T01:00:00.000Z,20068.271,20068.271,20051.982,18639.084,1412.8990,16.2900,0
668,SP,2024-08-23T03:20:28.455Z,2024-08-14,2024-08-15T01:30:00.000Z,19051.863,19051.863,19035.387,17618.805,1416.5818,16.4758,0
669,SP,2024-08-23T03:20:28.455Z,2024-08-14,2024-08-15T02:00:00.000Z,18236.590,18236.590,18220.227,16800.266,1419.9609,16.3642,0
670,SP,2024-08-23T03:20:28.455Z,2024-08-14,2024-08-15T02:30:00.000Z,17179.830,17179.830,17163.541,15741.145,1422.3960,16.2900,0


In [4]:
df.plot(x='din_referenciautc', y='val_cargaglobal', title='Carga verificada em SP')

In [5]:
# convert to datetime
df['din_referenciautc'] = pd.to_datetime(df['din_referenciautc'])
df['din_atualizacao'] = pd.to_datetime(df['din_atualizacao'])

df['delay'] = (df['din_atualizacao'] - df['din_referenciautc']).dt.days

df.delay.describe()

df.plot(x='din_referenciautc', y='delay', title='Delay de atualização')