In [None]:
# Directory 
input_path = os.environ.get('input_path')
fig_output = os.environ.get('fig_output')
tab_output = os.environ.get('tab_output')

# Queries config
project_id_bq = os.environ.get('project_id_bq')
run_query = os.environ.get('run_query')

# Procurement

In [None]:
# Query to count missing municipalities in tender table

def run_query_and_save_results():

    query = '''
    SELECT COUNT (DISTINCT id_municipio) AS distinct_municipalities, ano, sigla_uf
    FROM `basedosdados.world_wb_mides.licitacao`
    GROUP BY ano, sigla_uf
    ORDER BY sigla_uf, ano
    '''

    count_mun_lic = bd.read_sql(query, billing_project_id=project_id_bq)

    count_mun_lic.to_csv(os.path.join(input_path,'count_mun_lic.csv'), index=False, na_rep='', float_format='%.2f')

if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Query to count missing municipalities in tender-item table

def run_query_and_save_results():
    
    query = '''
    SELECT COUNT (DISTINCT id_municipio) AS distinct_municipalities, ano, sigla_uf
    FROM `basedosdados.world_wb_mides.licitacao_item`
    GROUP BY ano, sigla_uf
    ORDER BY sigla_uf, ano
    '''

    count_mun_lic_item = bd.read_sql(query, billing_project_id=project_id_bq)

    count_mun_lic_item.to_csv(os.path.join(input_path,'count_mun_lic_item.csv'), index=False, na_rep='', float_format='%.2f')
    
if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Query to count missing municipalities in tender-participant table

def run_query_and_save_results():

    query = '''
    SELECT COUNT (DISTINCT id_municipio) AS distinct_municipalities, ano, sigla_uf
    FROM `basedosdados.world_wb_mides.licitacao_participante`
    GROUP BY ano, sigla_uf
    ORDER BY sigla_uf, ano
    '''

    count_mun_lic_part = bd.read_sql(query, billing_project_id=project_id_bq)

    count_mun_lic_part.to_csv(os.path.join(input_path,'count_mun_lic_part.csv'), index=False, na_rep='', float_format='%.2f')

if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Open procurement data generated by the queries

count_mun_lic = pd.read_csv(os.path.join(input_path,'count_mun_lic.csv'))
count_mun_lic_item = pd.read_csv(os.path.join(input_path,'count_mun_lic_item.csv'))
count_mun_lic_part = pd.read_csv(os.path.join(input_path,'count_mun_lic_part.csv'))

# Create dataframe with two columns
sigla_uf = ['CE', 'MG', 'PB', 'PE', 'PR', 'RS','SP']
number_municipalities = ['184', '853', '223', '185', '399', '497', '645']
number_municipalities = pd.DataFrame({'sigla_uf': sigla_uf, 'number_municipalities': number_municipalities})

# Merge both
count_mun_lic = pd.merge(count_mun_lic, number_municipalities, how='left', left_on='sigla_uf', right_on='sigla_uf')
count_mun_lic_item = pd.merge(count_mun_lic_item, number_municipalities, how='left', left_on='sigla_uf', right_on='sigla_uf')
count_mun_lic_part = pd.merge(count_mun_lic_part, number_municipalities, how='left', left_on='sigla_uf', right_on='sigla_uf')

# Calculate the number and percentage of missing municipalities
count_mun_lic['diff'] = count_mun_lic['number_municipalities'].astype(int) -count_mun_lic['distinct_municipalities']
count_mun_lic_item['diff'] = count_mun_lic_item['number_municipalities'].astype(int) -count_mun_lic_item['distinct_municipalities']
count_mun_lic_part['diff'] = count_mun_lic_part['number_municipalities'].astype(int) -count_mun_lic_part['distinct_municipalities']

count_mun_lic['perc_missing'] = (count_mun_lic['diff'].astype(int)/count_mun_lic['number_municipalities'].astype(int))*100
count_mun_lic_item['perc_missing'] = (count_mun_lic_item['diff'].astype(int)/count_mun_lic_item['number_municipalities'].astype(int))*100
count_mun_lic_part['perc_missing'] = (count_mun_lic_part['diff'].astype(int)/count_mun_lic_part['number_municipalities'].astype(int))*100

count_mun_lic['ano'] = count_mun_lic['ano'].astype(int)
count_mun_lic_item['ano'] = count_mun_lic_item['ano'].astype(int)
count_mun_lic_part['ano'] = count_mun_lic_part['ano'].astype(int)

In [None]:
# Reset colors
mpl.rcParams.update(mpl.rcParamsDefault)

sns.set_style('ticks')

sigla_uf_list = count_mun_lic['sigla_uf'].unique()

# Subplots
fig, axs = plt.subplots(3, 1, figsize=(6, 6), sharex=True)

# DataFrame count_mun_lic
for sigla_uf in sigla_uf_list:
    df = count_mun_lic[count_mun_lic['sigla_uf'] == sigla_uf]
    axs[0].plot(df['ano'], df['perc_missing'], label=sigla_uf,  marker = '.', linewidth=1.2, linestyle='dashed')

axs[0].set_ylabel('%', fontsize=9)
axs[0].set_title('Tender', fontsize=8, loc='center', pad=10)

# DataFrame count_mun_lic_item
for sigla_uf in sigla_uf_list:
    df = count_mun_lic_item[count_mun_lic_item['sigla_uf'] == sigla_uf]
    axs[1].plot(df['ano'], df['perc_missing'], label=sigla_uf, marker = '.', linewidth=1.2, linestyle='dashed')

axs[1].set_ylabel('%', fontsize=9)
axs[1].set_title('Tender-item', fontsize=8, loc='center', pad=10)

# DataFrame count_mun_lic_part
for sigla_uf in sigla_uf_list:
    df = count_mun_lic_part[count_mun_lic_part['sigla_uf'] == sigla_uf]
    axs[2].plot(df['ano'], df['perc_missing'], label=sigla_uf, marker = '.', linewidth=1.2, linestyle='dashed')

axs[2].set_ylabel('%', fontsize=9)
axs[2].set_title('Tender-participant', fontsize=8, loc='center', pad=10)

# Ticks x-axis
years = range(2009, 2022)
plt.xticks(years, rotation=45, ha='right')

for ax in axs:
    ax.tick_params(axis='both', labelsize=8)

# Format
plt.tight_layout(pad=2)

# Legend
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.4), ncol=len(sigla_uf_list), fontsize=8)
plt.subplots_adjust(bottom=0.2)

plt.savefig(os.path.join(fig_output,'missing_municipalities_procurement.pdf'))

# Budget execution

In [None]:
# Query to count missing municipalities in procurement table

def run_query_and_save_results():

    query = '''
    SELECT COUNT (DISTINCT id_municipio) AS distinct_municipalities, ano, sigla_uf
    FROM `basedosdados.world_wb_mides.empenho`
    GROUP BY ano, sigla_uf
    ORDER BY sigla_uf, ano
    '''

    count_mun_empenho = bd.read_sql(query, billing_project_id=project_id_bq)

    count_mun_empenho.to_csv(os.path.join(input_path,'count_mun_empenho.csv'), index=False, na_rep='', float_format='%.2f')

if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Query to count missing municipalities in verification table

def run_query_and_save_results():

    query = '''
    SELECT COUNT (DISTINCT id_municipio) AS distinct_municipalities, ano, sigla_uf
    FROM `basedosdados.world_wb_mides.liquidacao`
    GROUP BY ano, sigla_uf
    ORDER BY sigla_uf, ano
    '''

    count_mun_liq = bd.read_sql(query, billing_project_id=project_id_bq)

    count_mun_liq.to_csv(os.path.join(input_path,'count_mun_liq.csv'), index=False, na_rep='', float_format='%.2f')

if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Query to count missing municipalities in payment table

def run_query_and_save_results():

    query = '''
    SELECT COUNT (DISTINCT id_municipio) AS distinct_municipalities, ano, sigla_uf
    FROM `basedosdados.world_wb_mides.pagamento`
    GROUP BY ano, sigla_uf
    ORDER BY sigla_uf, ano
    '''

    count_mun_pag = bd.read_sql(query, billing_project_id=project_id_bq)

    count_mun_pag.to_csv(os.path.join(input_path,'count_mun_pag.csv'), index=False, na_rep='', float_format='%.2f')

if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Open budget execution data generated by the queries
count_mun_empenho = pd.read_csv(os.path.join(input_path,'count_mun_empenho.csv'))
count_mun_liq = pd.read_csv(os.path.join(input_path,'count_mun_liq.csv'))
count_mun_pag = pd.read_csv(os.path.join(input_path,'count_mun_pag.csv'))

count_mun_empenho=count_mun_empenho.query('sigla_uf.notnull()')
count_mun_liq=count_mun_liq.query('sigla_uf.notnull()')
count_mun_pag=count_mun_pag.query('sigla_uf.notnull()')

count_mun_empenho = count_mun_empenho.loc[~((count_mun_empenho['ano'] < 2008) & (count_mun_empenho['sigla_uf'] == 'RS'))]
count_mun_liq = count_mun_liq.loc[~((count_mun_liq['ano'] < 2008) & (count_mun_liq['sigla_uf'] == 'RS'))]
count_mun_pag = count_mun_pag.loc[~((count_mun_pag['ano'] < 2008) & (count_mun_pag['sigla_uf'] == 'RS'))]

count_mun_liq = count_mun_liq.loc[~((count_mun_liq['ano'] < 2009) & (count_mun_liq['sigla_uf'] == 'PB'))]

# Create dataframe with two columns
sigla_uf = ['CE', 'MG', 'PB', 'PE', 'PR', 'RS','SP']
number_municipalities = ['184', '853', '223', '185', '399', '497', '645']
number_municipalities = pd.DataFrame({'sigla_uf': sigla_uf, 'number_municipalities': number_municipalities})

# Merge both
count_mun_empenho = pd.merge(count_mun_empenho, number_municipalities, how='left', left_on='sigla_uf', right_on='sigla_uf')
count_mun_liq = pd.merge(count_mun_liq, number_municipalities, how='left', left_on='sigla_uf', right_on='sigla_uf')
count_mun_pag = pd.merge(count_mun_pag, number_municipalities, how='left', left_on='sigla_uf', right_on='sigla_uf')

# Calculate the number and percentage of missing municipalities
count_mun_empenho['diff'] = count_mun_empenho['number_municipalities'].astype(int) -count_mun_empenho['distinct_municipalities']
count_mun_liq['diff'] = count_mun_liq['number_municipalities'].astype(int) -count_mun_liq['distinct_municipalities']
count_mun_pag['diff'] = count_mun_pag['number_municipalities'].astype(int) -count_mun_pag['distinct_municipalities']

count_mun_empenho['perc_missing'] = (count_mun_empenho['diff'].astype(int)/count_mun_empenho['number_municipalities'].astype(int))*100
count_mun_liq['perc_missing'] = (count_mun_liq['diff'].astype(int)/count_mun_liq['number_municipalities'].astype(int))*100
count_mun_pag['perc_missing'] = (count_mun_pag['diff'].astype(int)/count_mun_pag['number_municipalities'].astype(int))*100

In [None]:
# Missing municipalities: budget execution
# Query to drop problematic years of RS

sns.set_style('ticks')

count_mun_empenho_sample = count_mun_empenho.loc[~((count_mun_empenho['ano'] >2020) & (count_mun_empenho['sigla_uf'] == 'RS'))]
count_mun_liq_sample = count_mun_liq.loc[~((count_mun_liq['ano'] >2020) & (count_mun_liq['sigla_uf'] == 'RS'))]
count_mun_pag_sample = count_mun_pag.loc[~((count_mun_pag['ano'] >2020) & (count_mun_pag['sigla_uf'] == 'RS'))]

count_mun_empenho_sample = count_mun_empenho_sample.loc[~((count_mun_empenho_sample['ano'] <2009) & (count_mun_empenho_sample['sigla_uf'] == 'RS'))]
count_mun_liq_sample = count_mun_liq_sample.loc[~((count_mun_liq_sample['ano'] <2009) & (count_mun_liq_sample['sigla_uf'] == 'RS'))]
count_mun_pag_sample = count_mun_pag_sample.loc[~((count_mun_pag_sample['ano'] <2009) & (count_mun_pag_sample['sigla_uf'] == 'RS'))]

sigla_uf_list = count_mun_empenho_sample['sigla_uf'].unique()

# subplots
fig, axs = plt.subplots(3, 1, figsize=(6, 6), sharex=True)

# DataFrame count_mun_empenho
for sigla_uf in sigla_uf_list:
    df = count_mun_empenho_sample[count_mun_empenho_sample['sigla_uf'] == sigla_uf]
    axs[0].plot(df['ano'], df['perc_missing'], label=sigla_uf,  marker = '.', linewidth=1.2, linestyle='dashed')

axs[0].set_ylabel('%', fontsize=9)
axs[0].set_title('Commitment', fontsize=8, loc='center', pad=10)

# DataFrame count_mun_liq
for sigla_uf in sigla_uf_list:
    df = count_mun_liq_sample[count_mun_liq_sample['sigla_uf'] == sigla_uf]
    axs[1].plot(df['ano'], df['perc_missing'], label=sigla_uf, marker = '.', linewidth=1.2, linestyle='dashed')

axs[1].set_ylabel('%', fontsize=9)
axs[1].set_title('Verification', fontsize=8, loc='center', pad=10)

for sigla_uf in sigla_uf_list:
    df = count_mun_pag_sample[count_mun_pag_sample['sigla_uf'] == sigla_uf]
    axs[2].plot(df['ano'], df['perc_missing'], label=sigla_uf, marker = '.', linewidth=1.2, linestyle='dashed')

axs[2].set_ylabel('%', fontsize=9)
axs[2].set_title('Payment', fontsize=8, loc='center', pad=10)

years = range(2003, 2023)
plt.xticks(years, rotation=45, ha='right')

for ax in axs:
    ax.tick_params(axis='both', labelsize=8)

plt.tight_layout(pad=2)

# Legend
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.4), ncol=len(sigla_uf_list), fontsize=8)
plt.subplots_adjust(bottom=0.2)

plt.savefig(os.path.join(fig_output,'missing_municipalities_budget_execution_sample.pdf'))