## Data Analysis - Goal: to join supply and demand information about gasoline

In [21]:
import pandas as pd
import numpy as np

Read the csv files that were downloaded in the ANP web site

In [22]:
# To connect to the csv file that is downlaoded in the ANP web site
# https://www.gov.br/anp/pt-br/centrais-de-conteudo/dados-abertos/processamento-de-petroleo-e-producao-de-derivados
# https://www.gov.br/anp/pt-br/centrais-de-conteudo/dados-abertos/vendas-de-derivados-de-petroleo-e-biocombustiveis
demand = pd.read_csv(
    'vendas-combustiveis-segmento-m3-2012-2025.csv', 
    sep=';',
    decimal=',',
    thousands='.'
    )
supply = pd.read_csv(
    'producao-derivados-petroleo-por-refinaria-m3-1990-2025.csv',
    sep=';',
    decimal=',',
    thousands='.'
)

In [23]:
start_year = 2012
end_year = 2024

In [24]:
print(demand.head())
print(demand.tail())

    ANO  MÊS UNIDADE DA FEDERAÇÃO           PRODUTO          SEGMENTO  VENDAS
0  2012  JAN             RONDÔNIA  ETANOL HIDRATADO  POSTO REVENDEDOR  1560.5
1  2012  MAI     DISTRITO FEDERAL  ETANOL HIDRATADO  CONSUMIDOR FINAL    50.0
2  2012  JUN     DISTRITO FEDERAL  ETANOL HIDRATADO  CONSUMIDOR FINAL    25.0
3  2012  JUL     DISTRITO FEDERAL  ETANOL HIDRATADO  CONSUMIDOR FINAL    25.0
4  2012  AGO     DISTRITO FEDERAL  ETANOL HIDRATADO  CONSUMIDOR FINAL    20.0
        ANO  MÊS UNIDADE DA FEDERAÇÃO      PRODUTO          SEGMENTO  \
38146  2025  JAN   MATO GROSSO DO SUL  ÓLEO DIESEL  POSTO REVENDEDOR   
38147  2025  JAN    RIO GRANDE DO SUL  ÓLEO DIESEL  POSTO REVENDEDOR   
38148  2025  JAN       SANTA CATARINA  ÓLEO DIESEL  POSTO REVENDEDOR   
38149  2025  JAN            SÃO PAULO  ÓLEO DIESEL  POSTO REVENDEDOR   
38150  2025  JAN     DISTRITO FEDERAL  ÓLEO DIESEL               TRR   

           VENDAS  
38146   71057.868  
38147  206235.406  
38148  185694.560  
38149  532111.030  

In [25]:
print(supply.head())
print(supply.tail())

    ANO  MÊS UNIDADE DA FEDERAÇÃO REFINARIA              PRODUTO  PRODUÇÃO
0  1990  JAN                CEARÁ    LUBNOR             PARAFINA       0.0
1  1990  MAI            SÃO PAULO      RPBC  GASOLINA DE AVIAÇÃO   23832.0
2  1990  JUN            SÃO PAULO      RPBC  GASOLINA DE AVIAÇÃO       0.0
3  1990  JUL            SÃO PAULO      RPBC  GASOLINA DE AVIAÇÃO       0.0
4  1990  AGO            SÃO PAULO      RPBC  GASOLINA DE AVIAÇÃO    3995.0
         ANO  MÊS UNIDADE DA FEDERAÇÃO REFINARIA               PRODUTO  \
109969  2025  JAN            SÃO PAULO    UNIVEN  QUEROSENE DE AVIAÇÃO   
109970  2025  FEV    RIO GRANDE DO SUL     REFAP  QUEROSENE DE AVIAÇÃO   
109971  2025  JAN    RIO GRANDE DO SUL     REFAP  QUEROSENE DE AVIAÇÃO   
109972  2025  FEV                BAHIA    REFMAT  QUEROSENE ILUMINANTE   
109973  2025  JAN            SÃO PAULO     SSOIL    OUTROS ENERGÉTICOS   

        PRODUÇÃO  
109969     0.000  
109970  9661.907  
109971  9564.305  
109972   188.100  
109973    

In [26]:
# demand_filter_product = demand[(demand['PRODUTO'] == 'GASOLINA C') | (demand['PRODUTO'] == 'ETANOL HIDRATADO')]

# Filter only the lines which 'PRODUTO' is equal to GASOLINA C
demand_filter_product = demand[(demand['PRODUTO'] == 'GASOLINA C')]

In [27]:
# Create a columns for the number month
month_numbers = {'JAN' : '01',
                 'FEV' : '02',
                 'MAR' : '03',
                 'ABR' : '04',
                 'MAI' : '05',
                 'JUN' : '06',
                 'JUL' : '07',
                 'AGO' : '08',
                 'SET' : '09',
                 'OUT' : '10',
                 'NOV' : '11',
                 'DEZ' : '12',
                 }
demand_filter_product = demand_filter_product.copy()
demand_filter_product['MONTH'] = demand_filter_product['MÊS'].map(month_numbers)
demand_filter_product.head()

Unnamed: 0,ANO,MÊS,UNIDADE DA FEDERAÇÃO,PRODUTO,SEGMENTO,VENDAS,MONTH
972,2012,JAN,RONDÔNIA,GASOLINA C,POSTO REVENDEDOR,26326.38,1
973,2012,MAI,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,809.0,5
974,2012,JUN,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,814.0,6
975,2012,JUL,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,703.0,7
976,2012,AGO,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,869.0,8


In [28]:
demand_filter_product = demand_filter_product.copy()
demand_filter_product['Date'] = pd.to_datetime(demand_filter_product['ANO'].astype(str) + '-' + demand_filter_product['MONTH'].astype(str) + '-01')

ethanol_blend = {
    2012: 20.0,
    2013: 25.0,
    2014: 25.0,
    2015: 27.0,
    2016: 27.0,
    2017: 27.0,
    2018: 27.0,
    2019: 27.0,
    2020: 27.0,
    2021: 27.0,
    2022: 27.0,
    2023: 27.5,
    2024: 27.5
}


demand_filter_product['ethanol_blend'] = demand_filter_product['ANO'].map(ethanol_blend)
demand_filter_product['VENDAS_GASOLINA_A'] = (1-demand_filter_product['ethanol_blend']/100)*demand_filter_product['VENDAS']
demand_filter_product.head()



Unnamed: 0,ANO,MÊS,UNIDADE DA FEDERAÇÃO,PRODUTO,SEGMENTO,VENDAS,MONTH,Date,ethanol_blend,VENDAS_GASOLINA_A
972,2012,JAN,RONDÔNIA,GASOLINA C,POSTO REVENDEDOR,26326.38,1,2012-01-01,20.0,21061.104
973,2012,MAI,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,809.0,5,2012-05-01,20.0,647.2
974,2012,JUN,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,814.0,6,2012-06-01,20.0,651.2
975,2012,JUL,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,703.0,7,2012-07-01,20.0,562.4
976,2012,AGO,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,869.0,8,2012-08-01,20.0,695.2


In [29]:
federative_unit = {
    'ACRE' : 'AC',
    'ALAGOAS' : 'AL',
    'AMAPÁ' : 'AP',
    'AMAZONAS' : 'AM',
    'BAHIA' : 'BA',
    'CEARÁ' : 'CE',
    'DISTRITO FEDERAL' : 'DF',
    'ESPÍRITO SANTO' : 'ES',
    'GOIÁS' : 'GO',
    'MARANHÃO' : 'MA',
    'MATO GROSSO' : 'MT',
    'MATO GROSSO DO SUL' : 'MS',
    'MINAS GERAIS' : 'MG',
    'PARANÁ' : 'PR',
    'PARAÍBA' : 'PB',
    'PARÁ' : 'PA',
    'PARA' : 'PA',
    'PERNAMBUCO' : 'PE',
    'PIAUÍ' : 'PI',
    'RIO DE JANEIRO' : 'RJ',
    'RIO GRANDE DO SUL' : 'RS',
    'RIO GRANDE DO NORTE' : 'RN',
    'RONDÔNIA' : 'RO',
    'RONDONIA' : 'RO',
    'RORAIMA' : 'RR',
    'SANTA CATARINA' : 'SC',
    'SERGIPE' : 'SE',
    'SÃO PAULO' : 'SP',
    'TOCANTINS' : 'TO'
}
demand_filter_product = demand_filter_product.copy()
demand_filter_product['UF'] = demand_filter_product['UNIDADE DA FEDERAÇÃO'].map(federative_unit)
demand_filter_product.head()

Unnamed: 0,ANO,MÊS,UNIDADE DA FEDERAÇÃO,PRODUTO,SEGMENTO,VENDAS,MONTH,Date,ethanol_blend,VENDAS_GASOLINA_A,UF
972,2012,JAN,RONDÔNIA,GASOLINA C,POSTO REVENDEDOR,26326.38,1,2012-01-01,20.0,21061.104,RO
973,2012,MAI,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,809.0,5,2012-05-01,20.0,647.2,DF
974,2012,JUN,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,814.0,6,2012-06-01,20.0,651.2,DF
975,2012,JUL,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,703.0,7,2012-07-01,20.0,562.4,DF
976,2012,AGO,DISTRITO FEDERAL,GASOLINA C,CONSUMIDOR FINAL,869.0,8,2012-08-01,20.0,695.2,DF


In [30]:
demand_filter_product_time = demand_filter_product.copy()
demand_filter_product_time = demand_filter_product_time[(demand_filter_product_time['ANO'] >= start_year) & (demand_filter_product_time['ANO'] <= end_year)]
demand_filter_product_time.tail()

Unnamed: 0,ANO,MÊS,UNIDADE DA FEDERAÇÃO,PRODUTO,SEGMENTO,VENDAS,MONTH,Date,ethanol_blend,VENDAS_GASOLINA_A,UF
36931,2024,DEZ,DISTRITO FEDERAL,GASOLINA C,POSTO REVENDEDOR,83425.4,12,2024-12-01,27.5,60483.415,DF
36932,2024,NOV,DISTRITO FEDERAL,GASOLINA C,POSTO REVENDEDOR,81319.1,11,2024-11-01,27.5,58956.3475,DF
36933,2024,OUT,DISTRITO FEDERAL,GASOLINA C,POSTO REVENDEDOR,82157.9,10,2024-10-01,27.5,59564.4775,DF
36934,2024,SET,DISTRITO FEDERAL,GASOLINA C,POSTO REVENDEDOR,78064.6,9,2024-09-01,27.5,56596.835,DF
36935,2024,FEV,RONDÔNIA,GASOLINA C,CONSUMIDOR FINAL,67.0,2,2024-02-01,27.5,48.575,RO


In [31]:
demand_filter_product_time

first_group_demand = demand_filter_product_time.groupby(['Date', 'UF', 'PRODUTO'])['VENDAS_GASOLINA_A'].sum().reset_index()
first_group_demand = first_group_demand.copy()
# first_group_demand['KEY'] = first_group_demand['Date'].astype(str) + '-' + first_group_demand['UF']
# first_group_demand['Date'].astype(str) + '-' +
#                             first_group_demand[]

first_group_demand

Unnamed: 0,Date,UF,PRODUTO,VENDAS_GASOLINA_A
0,2012-01-01,AC,GASOLINA C,6813.60000
1,2012-01-01,AL,GASOLINA C,23159.60000
2,2012-01-01,AM,GASOLINA C,33227.58560
3,2012-01-01,AP,GASOLINA C,7635.44000
4,2012-01-01,BA,GASOLINA C,122321.76000
...,...,...,...,...
4207,2024-12-01,RS,GASOLINA C,262154.08980
4208,2024-12-01,SC,GASOLINA C,211094.77435
4209,2024-12-01,SE,GASOLINA C,28925.32500
4210,2024-12-01,SP,GASOLINA C,569399.17760


In [32]:
supply = supply.copy() # Create a copy of the dataframe to avoid modifying the original data
supply = supply[(supply['ANO'] >= start_year) & (supply['ANO'] <= end_year)] # Filter the dataframe for the analysis period based on the start and end year inputs  
supply['UF'] = supply['UNIDADE DA FEDERAÇÃO'].map(federative_unit) # Map federal unit names to their corresponding abbreviations (UF)  
supply['MONTH'] = supply['MÊS'].map(month_numbers) # Convert month names to their corresponding numerical values  
supply['Date'] = pd.to_datetime(supply['ANO'].astype(str) + '-' + supply['MONTH'].astype(str) + '-01') # Create a datetime column using year and month (set day as 1 for consistency)  
# supply['ethanol_blend'] = supply['ANO'].map(ethanol_blend)
# supply['PRODUÇÃO_GAS_C'] = (100-supply['ethanol_blend'])/ 100 * supply['PRODUÇÃO'] 
# supply['PRODUÇÃO_GAS_C'] = (1-supply['ethanol_blend'])/ 100 * supply['PRODUÇÃO'] 

first_group_supply = supply.groupby(['Date', 'UF', 'REFINARIA', 'PRODUTO'])['PRODUÇÃO'].sum().reset_index() # Group by Date, UF, and PRODUTO, then sum the PRODUÇÃO values  
first_group_supply = first_group_supply[first_group_supply['PRODUTO'] == 'GASOLINA A'] # Filter only the lines which the values is 'GASOLINA A'
second_group_suply = first_group_supply.groupby(['Date', 'UF', 'PRODUTO'])['PRODUÇÃO'].sum().reset_index()
first_group_supply


Unnamed: 0,Date,UF,REFINARIA,PRODUTO,PRODUÇÃO
2,2012-01-01,AM,REAM,GASOLINA A,39165.000
17,2012-01-01,BA,DAX OIL,GASOLINA A,0.000
32,2012-01-01,BA,REFMAT,GASOLINA A,229124.000
47,2012-01-01,CE,LUBNOR,GASOLINA A,0.000
62,2012-01-01,MG,REGAP,GASOLINA A,143835.000
...,...,...,...,...,...
41884,2024-12-01,SP,RECAP,GASOLINA A,91477.671
41899,2024-12-01,SP,REPLAN,GASOLINA A,535003.300
41914,2024-12-01,SP,REVAP,GASOLINA A,293101.510
41929,2024-12-01,SP,RPBC,GASOLINA A,251148.080


In [33]:

# Filter for gasoline products only 
supply_SandD_gas = second_group_suply[
    second_group_suply['PRODUTO'].isin(['GASOLINA A'])
].copy()  # Create independent copy to avoid warnings

# Create unique composite key: Date-UF-PRODUTO
supply_SandD_gas['KEY'] = (
    supply_SandD_gas['Date'].astype(str) + '-' + 
    supply_SandD_gas['UF'] ## + '-' + 
    ## demand_SandD_gas['PRODUTO']
)
# supply_SandD_gas['PRODUTO'] = supply_SandD_gas['PRODUTO'].replace(
#     {'ETANOL HIDRATADO': 'HIDRATADO'}
# )
supply_SandD_gas

Unnamed: 0,Date,UF,PRODUTO,PRODUÇÃO,KEY
0,2012-01-01,AM,GASOLINA A,39165.000,2012-01-01-AM
1,2012-01-01,BA,GASOLINA A,229124.000,2012-01-01-BA
2,2012-01-01,CE,GASOLINA A,0.000,2012-01-01-CE
3,2012-01-01,MG,GASOLINA A,143835.000,2012-01-01-MG
4,2012-01-01,PE,GASOLINA A,0.000,2012-01-01-PE
...,...,...,...,...,...
1553,2024-12-01,PE,GASOLINA A,2115.982,2024-12-01-PE
1554,2024-12-01,PR,GASOLINA A,349793.695,2024-12-01-PR
1555,2024-12-01,RJ,GASOLINA A,374071.843,2024-12-01-RJ
1556,2024-12-01,RS,GASOLINA A,284207.507,2024-12-01-RS


In [34]:
demand_SandD_gas = first_group_demand.copy()  # Create independent copy to avoid warnings

# Create unique composite key: Date-UF-PRODUTO
demand_SandD_gas['KEY'] = (
    demand_SandD_gas['Date'].astype(str) + '-' + 
    demand_SandD_gas['UF'] ## + '-' + 
    ## demand_SandD_gas['PRODUTO']
)
# supply_SandD_gas['PRODUTO'] = supply_SandD_gas['PRODUTO'].replace(
#     {'ETANOL HIDRATADO': 'HIDRATADO'}
# )
demand_SandD_gas

Unnamed: 0,Date,UF,PRODUTO,VENDAS_GASOLINA_A,KEY
0,2012-01-01,AC,GASOLINA C,6813.60000,2012-01-01-AC
1,2012-01-01,AL,GASOLINA C,23159.60000,2012-01-01-AL
2,2012-01-01,AM,GASOLINA C,33227.58560,2012-01-01-AM
3,2012-01-01,AP,GASOLINA C,7635.44000,2012-01-01-AP
4,2012-01-01,BA,GASOLINA C,122321.76000,2012-01-01-BA
...,...,...,...,...,...
4207,2024-12-01,RS,GASOLINA C,262154.08980,2024-12-01-RS
4208,2024-12-01,SC,GASOLINA C,211094.77435,2024-12-01-SC
4209,2024-12-01,SE,GASOLINA C,28925.32500,2024-12-01-SE
4210,2024-12-01,SP,GASOLINA C,569399.17760,2024-12-01-SP


In [35]:
supply_and_demand_SandD_gas = demand_SandD_gas.merge(
    supply_SandD_gas, 
    how='outer',
    on='KEY',
    suffixes=('_d', '_s')
    )
supply_and_demand_SandD_gas

Unnamed: 0,Date_d,UF_d,PRODUTO_d,VENDAS_GASOLINA_A,KEY,Date_s,UF_s,PRODUTO_s,PRODUÇÃO
0,2012-01-01,AC,GASOLINA C,6813.60000,2012-01-01-AC,NaT,,,
1,2012-01-01,AL,GASOLINA C,23159.60000,2012-01-01-AL,NaT,,,
2,2012-01-01,AM,GASOLINA C,33227.58560,2012-01-01-AM,2012-01-01,AM,GASOLINA A,39165.000
3,2012-01-01,AP,GASOLINA C,7635.44000,2012-01-01-AP,NaT,,,
4,2012-01-01,BA,GASOLINA C,122321.76000,2012-01-01-BA,2012-01-01,BA,GASOLINA A,229124.000
...,...,...,...,...,...,...,...,...,...
4207,2024-12-01,RS,GASOLINA C,262154.08980,2024-12-01-RS,2024-12-01,RS,GASOLINA A,284207.507
4208,2024-12-01,SC,GASOLINA C,211094.77435,2024-12-01-SC,NaT,,,
4209,2024-12-01,SE,GASOLINA C,28925.32500,2024-12-01-SE,NaT,,,
4210,2024-12-01,SP,GASOLINA C,569399.17760,2024-12-01-SP,2024-12-01,SP,GASOLINA A,1228801.568


In [36]:
# First fill UF_s and UF_d from whichever column exists
supply_and_demand_SandD_gas['UF_d'] = supply_and_demand_SandD_gas['UF_d'].fillna(supply_and_demand_SandD_gas['UF_s'])
supply_and_demand_SandD_gas['UF_s'] = supply_and_demand_SandD_gas['UF_s'].fillna(supply_and_demand_SandD_gas['UF_d'])
supply_and_demand_SandD_gas['PRODUTO_s'] = supply_and_demand_SandD_gas['PRODUTO_s'].fillna(supply_and_demand_SandD_gas['PRODUTO_d'])
supply_and_demand_SandD_gas['PRODUTO_d'] = supply_and_demand_SandD_gas['PRODUTO_d'].fillna(supply_and_demand_SandD_gas['PRODUTO_s'])
supply_and_demand_SandD_gas['Date_s'] = supply_and_demand_SandD_gas['Date_s'].fillna(supply_and_demand_SandD_gas['Date_d'])
supply_and_demand_SandD_gas['Date_d'] = supply_and_demand_SandD_gas['Date_d'].fillna(supply_and_demand_SandD_gas['Date_s'])

# Then fill all remaining numeric columns with 0
numeric_cols = ['VENDAS_GASOLINA_A', 'PRODUÇÃO']
supply_and_demand_SandD_gas[numeric_cols] = supply_and_demand_SandD_gas[numeric_cols].fillna(0)

# For date columns, you might want to keep as NaT or fill with a default date
# supply_and_demand_SandD_ethanol['Date_d'] = supply_and_demand_SandD_ethanol['Date_d'].fillna(pd.NaT)
# supply_and_demand_SandD_ethanol['Date_s'] = supply_and_demand_SandD_ethanol['Date_s'].fillna(pd.NaT)
supply_and_demand_SandD_gas.head(10)

Unnamed: 0,Date_d,UF_d,PRODUTO_d,VENDAS_GASOLINA_A,KEY,Date_s,UF_s,PRODUTO_s,PRODUÇÃO
0,2012-01-01,AC,GASOLINA C,6813.6,2012-01-01-AC,2012-01-01,AC,GASOLINA C,0.0
1,2012-01-01,AL,GASOLINA C,23159.6,2012-01-01-AL,2012-01-01,AL,GASOLINA C,0.0
2,2012-01-01,AM,GASOLINA C,33227.5856,2012-01-01-AM,2012-01-01,AM,GASOLINA A,39165.0
3,2012-01-01,AP,GASOLINA C,7635.44,2012-01-01-AP,2012-01-01,AP,GASOLINA C,0.0
4,2012-01-01,BA,GASOLINA C,122321.76,2012-01-01-BA,2012-01-01,BA,GASOLINA A,229124.0
5,2012-01-01,CE,GASOLINA C,70871.9432,2012-01-01-CE,2012-01-01,CE,GASOLINA A,0.0
6,2012-01-01,DF,GASOLINA C,60520.8,2012-01-01-DF,2012-01-01,DF,GASOLINA C,0.0
7,2012-01-01,ES,GASOLINA C,52070.08,2012-01-01-ES,2012-01-01,ES,GASOLINA C,0.0
8,2012-01-01,GO,GASOLINA C,88258.912,2012-01-01-GO,2012-01-01,GO,GASOLINA C,0.0
9,2012-01-01,MA,GASOLINA C,45653.3688,2012-01-01-MA,2012-01-01,MA,GASOLINA C,0.0


In [37]:
supply_and_demand_SandD_gas.tail(20)

Unnamed: 0,Date_d,UF_d,PRODUTO_d,VENDAS_GASOLINA_A,KEY,Date_s,UF_s,PRODUTO_s,PRODUÇÃO
4192,2024-12-01,ES,GASOLINA C,67572.5375,2024-12-01-ES,2024-12-01,ES,GASOLINA C,0.0
4193,2024-12-01,GO,GASOLINA C,98882.5296,2024-12-01-GO,2024-12-01,GO,GASOLINA C,0.0
4194,2024-12-01,MA,GASOLINA C,74458.5875,2024-12-01-MA,2024-12-01,MA,GASOLINA C,0.0
4195,2024-12-01,MG,GASOLINA C,315706.121275,2024-12-01-MG,2024-12-01,MG,GASOLINA A,210016.297
4196,2024-12-01,MS,GASOLINA C,47588.383025,2024-12-01-MS,2024-12-01,MS,GASOLINA C,0.0
4197,2024-12-01,MT,GASOLINA C,40542.725,2024-12-01-MT,2024-12-01,MT,GASOLINA C,0.0
4198,2024-12-01,PA,GASOLINA C,93093.7526,2024-12-01-PA,2024-12-01,PA,GASOLINA C,0.0
4199,2024-12-01,PB,GASOLINA C,49711.34615,2024-12-01-PB,2024-12-01,PB,GASOLINA C,0.0
4200,2024-12-01,PE,GASOLINA C,97892.00125,2024-12-01-PE,2024-12-01,PE,GASOLINA A,2115.982
4201,2024-12-01,PI,GASOLINA C,44292.983975,2024-12-01-PI,2024-12-01,PI,GASOLINA C,0.0


In [38]:
# demand_SanD.to_csv('[S&D_Demand_EthaGas].csv')
# Converte para uma lista de tuplas e cria o DataFrame
df_ethanol_blend = pd.DataFrame(
    list(ethanol_blend.items()),
    columns=['Ano', 'Percentual_Anidro']
)

print(df_ethanol_blend)

     Ano  Percentual_Anidro
0   2012               20.0
1   2013               25.0
2   2014               25.0
3   2015               27.0
4   2016               27.0
5   2017               27.0
6   2018               27.0
7   2019               27.0
8   2020               27.0
9   2021               27.0
10  2022               27.0
11  2023               27.5
12  2024               27.5


In [39]:
supply_and_demand_SandD_gas.to_csv('[S&D_GASOLINE].csv')
df_ethanol_blend.to_csv('[S&D_dim_ethanol_blend].csv')

In [40]:
def format_numbers(x):
    if x >= 1_000_000 :
        return f'{x/1_000_000:,.0f}Mi'
    elif x >= 1_000 :
        return f'{x/1_000_000:,.2f}Mi'
    elif x >= 1 :
        return f'{x/1_000:,.1f}k'

In [41]:
supply_ano = supply[supply['ANO'] == 2024]
supply_ano_produto = supply_ano[supply_ano['PRODUTO'] == 'GASOLINA A']
supply_ano_produto_groupby = supply_ano_produto.groupby('REFINARIA')['PRODUÇÃO'].sum()
# supply_ano_produto_groupby.sort_values(ascending=False).apply(format_numbers)
supply_ano_produto_groupby.sort_values(ascending=False).apply(format_numbers).reset_index()

Unnamed: 0,REFINARIA,PRODUÇÃO
0,REPLAN,6Mi
1,REPAR,4Mi
2,REVAP,3Mi
3,REFMAT,3Mi
4,REFAP,3Mi
5,RPBC,3Mi
6,REDUC,3Mi
7,REGAP,2Mi
8,MANGUINHOS,1Mi
9,RECAP,1Mi
