In [26]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

import pandas as pd

# set plotly as the default plotter
pd.options.plotting.backend = "plotly"

In [93]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import pendulum

def fetch_data(start_date, end_date, area_code):
    start = pendulum.parse(start_date)
    end = pendulum.parse(end_date)

    all_data = []
    current_start = start

    while current_start < end:
        current_end = min(current_start.add(days=90), end)

        url = f'https://apicarga.ons.org.br/prd/cargaverificada?dat_inicio={current_start.to_date_string()}&dat_fim={current_end.to_date_string()}&cod_areacarga={area_code}'

        session = requests.Session()
        retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
        session.mount('http://', HTTPAdapter(max_retries=retries))

        response = session.get(url)
        response.raise_for_status()

        chunk_data = response.json()

        # Check for correct number of records
        expected_records = (current_end.add(days=1) - current_start).in_hours() * 2
        if len(chunk_data) != expected_records:
            raise ValueError(f"Data integrity check failed. Expected {expected_records} records, but received {len(chunk_data)} for period {current_start.to_date_string()} to {current_end.to_date_string()}")

        all_data.extend(chunk_data)

        current_start = current_end.add(days=1)

    return all_data

# Example usage
try:
    data = fetch_data('2024-08-13', '2024-08-14', 'SP')
    print(f"Successfully retrieved {len(data)} records.")
except ValueError as e:
    print(f"Error: {str(e)}")
except requests.exceptions.RequestException as e:
    print(f"Request failed: {str(e)}")

Successfully retrieved 96 records.


In [94]:
df = pd.DataFrame(data).sort_values('din_referenciautc')
df

Unnamed: 0,cod_areacarga,din_atualizacao,dat_referencia,din_referenciautc,val_cargaglobal,val_cargaglobalcons,val_cargaglobalsmmgd,val_cargasupervisionada,val_carganaosupervisionada,val_cargammgd,val_consistencia
0,SP,2024-08-14T03:19:47.647Z,2024-08-13,2024-08-13T03:30:00.000Z,14952.424,14952.424,14936.134,13517.985,1418.1486,16.2900,0
1,SP,2024-08-14T03:19:47.647Z,2024-08-13,2024-08-13T04:00:00.000Z,14420.714,14420.714,14404.424,12985.320,1419.1040,16.2900,0
2,SP,2024-08-14T03:19:47.647Z,2024-08-13,2024-08-13T04:30:00.000Z,13932.955,13932.955,13916.665,12496.768,1419.8972,16.2900,0
3,SP,2024-08-14T03:19:47.647Z,2024-08-13,2024-08-13T05:00:00.000Z,13694.828,13694.828,13678.538,12259.183,1419.3551,16.2900,0
4,SP,2024-08-14T03:19:47.647Z,2024-08-13,2024-08-13T05:30:00.000Z,13462.024,13462.024,13445.734,12027.460,1418.2745,16.2900,0
...,...,...,...,...,...,...,...,...,...,...,...
91,SP,2024-08-15T00:01:23.633Z,2024-08-14,2024-08-15T01:00:00.000Z,0.000,0.000,0.000,0.000,1409.0771,16.2900,0
92,SP,2024-08-15T00:01:23.633Z,2024-08-14,2024-08-15T01:30:00.000Z,0.000,0.000,0.000,0.000,1412.7210,16.4758,0
93,SP,2024-08-15T00:01:23.633Z,2024-08-14,2024-08-15T02:00:00.000Z,0.000,0.000,0.000,0.000,1415.6857,16.3642,0
94,SP,2024-08-15T00:01:23.633Z,2024-08-14,2024-08-15T02:30:00.000Z,0.000,0.000,0.000,0.000,1418.4727,16.2900,0


In [98]:
import boto3
import time
import os
from dotenv import load_dotenv

load_dotenv()

KEY_ID = os.environ["aws_access_key_id"]
SECRET_KEY = os.environ["aws_secret_access_key"]
REGION = os.environ["region_name"]
BUCKET_NAME = os.environ["bucket_name"]

aws = boto3.Session(
        aws_access_key_id=KEY_ID,
        aws_secret_access_key=SECRET_KEY,
        region_name=REGION
    )

# connect to the S3 bucket
s3 = aws.client("s3")

# list all the files in the bucket
response = s3.list_objects_v2(Bucket=BUCKET_NAME)
files = [obj["Key"] for obj in response["Contents"]]
files

# put the data in the bucket
filename = f"ons_data_{pendulum.now().to_iso8601_string()}.csv"
df.to_csv(filename, index=False)

s3.upload_file(filename, BUCKET_NAME, filename)

S3UploadFailedError: Failed to upload ons_data_2024-08-14T21:47:17.247739-03:00.csv to testedatariskreclame/ons_data_2024-08-14T21:47:17.247739-03:00.csv: An error occurred (AccessDenied) when calling the PutObject operation: Access Denied

In [84]:
df.plot(x='din_referenciautc', y='val_cargaglobal', title='Carga verificada em SP')

In [85]:
# convert to datetime
df['din_referenciautc'] = pd.to_datetime(df['din_referenciautc'])
df['din_atualizacao'] = pd.to_datetime(df['din_atualizacao'])

df['delay'] = (df['din_atualizacao'] - df['din_referenciautc']).dt.days

df.delay.describe()

df.plot(x='din_referenciautc', y='delay', title='Delay de atualização')

In [95]:
# global vs consolidada

df['val_cargaglobal'] = df['val_cargaglobal'].astype(float)
df['val_cargaconsolidada'] = df['val_cargaconsolidada'].astype(float)

df[['val_cargaglobal', 'val_cargaconsolidada']].plot(title='Carga global vs consolidada')

KeyError: 'val_cargaconsolidada'