## Bioethanol production in Brazil

### Downloading the data

In [1]:
# Un-comment to download spreadsheets to data folder
import requests, zipfile, io
r = requests.get('https://www.gov.br/anp/pt-br/assuntos/producao-e-fornecimento-de-biocombustiveis/etanol/arquivos-etanol/pb-da-etanol.zip')
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(path='./data/')

### Reading the data

In [2]:
from pathlib import Path
import csv

In [3]:
# File containing production capacity per plant site
path = Path('./data/Etanol_DadosAbertos_CSV_Capacidade.csv')
lines = path.read_text(encoding='utf-8').splitlines()

reader = csv.reader(lines)
header_row = next(reader)

In [4]:
print("Column names:")
for i, name in enumerate(header_row):
    print(f"{i}: {name}")

Column names:
0: ﻿Mês/Ano
1: Razão Social
2: CNPJ
3: Região
4: Estado
5: Município
6: Capacidade Produção Etanol Anidro (m³/d)
7: Capacidade Produção Etanol Hidratado (m³/d)


Note: A CNPJ, or Cadastro Nacional de Pessoa Jurídica, is a unique tax identification number given to entities such as companies, partnerships, foundations and in this case, plant sites. Each plant site has its own unique CNPJ. This means that a company with multiple plants will have multiple CNPJs with a different number for each site.

In [5]:
import datetime as dt

In [6]:
date_format = '%m/%Y'
#print( dt.datetime.strptime(df['date'][0],date_format).date() )

In [7]:
dates = []
names = []
cnpjs = []
states = []
capacities_anhydrous = []
capacities_hydrous = []


for row in reader:
    date = dt.datetime.strptime(row[0],date_format).date()
    name = row[1]
    cnpj = int(row[2])
    state = row[4]
    capacity_anhydrous = int(row[6])
    capacity_hydrous = int(row[7])

    dates.append(date)
    names.append(name)
    cnpjs.append(cnpj)
    states.append(state)
    capacities_anhydrous.append(capacity_anhydrous)
    capacities_hydrous.append(capacity_hydrous)

In [8]:
import pandas as pd

In [9]:
df = pd.DataFrame({'date': dates,
                   'name': names,
                   'cnpj': cnpjs,
                   'state': states,
                   'capacity_hydrous': capacities_hydrous,
                   'capacity_anhydrous': capacities_anhydrous})

In [10]:
df.head(20)

Unnamed: 0,date,name,cnpj,state,capacity_hydrous,capacity_anhydrous
0,2024-11-01,CARAMURU ALIMENTOS S/A,80671002668,Mato Grosso,50,0
1,2024-11-01,DESTILARIA DE ÁLCOOL LIBRA LTDA - EM RECUPERAÇ...,297598000122,Mato Grosso,600,600
2,2024-11-01,CENTRAL ENERGETICA VALE DO SAPUCAI LTDA,372496000124,São Paulo,900,517
3,2024-11-01,DENUSA DESTILARIA NOVA UNIAO S/A - EM RECUPERA...,595322000120,Goiás,650,450
4,2024-11-01,SANTA CRUZ ACUCAR E ALCOOL LTDA,738822000255,Bahia,240,200
5,2024-11-01,CJ SELECTA S.A.,969790000541,Minas Gerais,35,0
6,2024-11-01,WD AGROINDUSTRIAL LTDA,1105558000102,Minas Gerais,450,220
7,2024-11-01,USINA RIO VERDE LTDA EM RECUPERACAO JUDICIAL,2043917000107,Goiás,330,120
8,2024-11-01,T.G. AGRO INDUSTRIAL LTDA.,2126558000143,Maranhão,420,200
9,2024-11-01,VALE VERDE EMPREENDIMENTOS AGRICOLAS LTDA EM R...,2414858000390,Rio Grande do Norte,460,360
