# Agência Nacional de Petróleo

Análises baseadas em uma amostra de coleta de preços de combustíveis automotivos nos postos do país



In [28]:
!pip install pandas pyarrow fastparquet

Collecting pyarrow
  Downloading pyarrow-12.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.9/38.9 MB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting fastparquet
  Downloading fastparquet-2023.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting cramjam>=2.3 (from fastparquet)
  Downloading cramjam-2.6.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m47.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting fsspec (from fastparquet)
  Downloading fsspec-2023.5.0-py3-none-any.whl (160 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.1/160.1 kB[0m [31m14.0 MB/s[0m eta [36m

In [42]:
import os
import shutil
import requests
import zipfile

def makedirs(path):
    if not os.path.exists(path):
        os.makedirs(path)
        
def remove_paths(paths):
    for path in paths:
        if os.path.isfile(path):
            os.remove(path)
        elif os.path.isdir(path):
            shutil.rmtree(path)
            
def download(url, path):
    r = requests.get(url)
    with open(path, 'wb') as f:
        f.write(r.content)  
        
def extract(zip_file_path, extract_path):
    with zipfile.ZipFile(zip_file_path) as zip_ref:
        zip_ref.extractall(extract_path)

In [45]:
import pandas as pd

anp_parquet_path = './data/anp/ca-2022-02.parquet'

def load_anp_data():
    # ANP > Série Histórica de Preços de Combustíveis e de GLP > Combustíveis automotivos > 2º semestre de 2022
    dataset_url = 'https://www.gov.br/anp/pt-br/centrais-de-conteudo/dados-abertos/arquivos/shpc/dsas/ca/ca-2022-02.zip'

    makedirs('./temp')
    file_path = './temp/ca-2022-02.zip'
    extract_path = './temp/ca-2022-02'

    download(dataset_url, file_path)
    extract(file_path, extract_path)

    anp_data = ( pd
        .read_csv(f'{extract_path}/ca-2022-02.csv', sep=';')
        .drop(['Nome da Rua', 'Numero Rua', 'Complemento', 'Bairro', 'Valor de Compra'], axis=1, errors='ignore')
    )
    
    remove_paths(['./temp/ca-2022-02.zip', './temp/ca-2022-02'])

    makedirs('./data/anp')
    anp_data.to_parquet('./data/anp/ca-2022-02.parquet')
    
    return anp_data

def get_anp_data():
    if os.path.exists(anp_parquet_path):
        return pd.read_parquet(anp_parquet_path)
    else:
        return load_anp_data()

print(get_anp_data().head())

  Regiao - Sigla Estado - Sigla Municipio   
0             SE             ES  COLATINA  \
1             SE             ES  COLATINA   
2             SE             ES  COLATINA   
3             SE             ES  COLATINA   
4             SE             ES  COLATINA   

                                             Revenda      CNPJ da Revenda   
0  SAO FRANCISCO DE ASSIS COMERCIO DE COMBUSTIVEI...   08.519.545/0001-10  \
1  SAO FRANCISCO DE ASSIS COMERCIO DE COMBUSTIVEI...   08.519.545/0001-10   
2                                 POSTO MOTOCAP LTDA   04.814.652/0001-10   
3                                 POSTO MOTOCAP LTDA   04.814.652/0001-10   
4                                 POSTO MOTOCAP LTDA   04.814.652/0001-10   

         Cep             Produto Data da Coleta Valor de Venda   
0  29703-030  GASOLINA ADITIVADA     01/07/2022           7,48  \
1  29703-030            GASOLINA     01/07/2022           7,38   
2  29703-055          DIESEL S10     01/07/2022           7,69   
3 