In [None]:
# Directory 
input_path = os.environ.get('input_path')
fig_output = os.environ.get('fig_output')
tab_output = os.environ.get('tab_output')

# Queries config
project_id_bq = os.environ.get('project_id_bq')
run_query = os.environ.get('run_query')

# Budget Execution

In [None]:
# Query to count null commitment ids (id_empenho_bd) 

def run_query_and_save_results():

    query = '''
    SELECT
      ano AS year,
      sigla_uf AS state,
      COUNT (*) AS total_observations,
      COUNT(id_empenho_bd) AS total_commitments,
      COUNT(CASE WHEN id_empenho_bd IS NULL THEN 1 END) AS total_null_commtiments,
      SUM(valor_final) AS total_committed,
      SUM(CASE WHEN id_empenho_bd IS NULL THEN valor_final ELSE 0 END) AS total_null_committed
    FROM basedosdados.world_wb_mides.empenho
    WHERE ano IS NOT NULL
    GROUP BY 1,2
    '''

    df = bd.read_sql(query, billing_project_id=project_id_bq)
    
    df.to_csv(os.path.join(input_path,'null_budget_commitment_ids.csv'), index=False, na_rep='', float_format='%.2f')

if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Query to count null verification ids (id_liquidacao_bd) 

def run_query_and_save_results():

    query = '''
    SELECT
      ano AS year,
      sigla_uf AS state,
      COUNT (*) AS total_observations,
      COUNT(id_liquidacao_bd) AS total_verifications,
      COUNT(CASE WHEN id_liquidacao_bd IS NULL THEN 1 END) AS total_null_verifications,
      SUM(valor_final) AS total_verified
      SUM(CASE WHEN id_liquidacao_bd IS NULL THEN valor_final ELSE 0 END) AS total_null_verified
    FROM basedosdados.world_wb_mides.liquidacao
    WHERE ano IS NOT NULL
    GROUP BY 1,2
    '''

    df = bd.read_sql(query, billing_project_id=project_id_bq)
    
    df.to_csv(os.path.join(input_path,'null_budget_verification_ids.csv'), index=False, na_rep='', float_format='%.2f')

if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Query to count null payment ids (id_pagamento_bd) 

def run_query_and_save_results():

    query = '''
    SELECT
      ano AS year,
      sigla_uf AS state,
      COUNT (*) AS total_observations,
      COUNT(id_pagamento_bd) AS total_payments,
      COUNT(CASE WHEN id_pagamento_bd IS NULL THEN 1 END) AS total_null_payments,
      SUM(valor_final) AS total_paid,
      SUM(CASE WHEN id_pagamento_bd IS NULL THEN valor_final ELSE 0 END) AS total_null_paid
    FROM basedosdados.world_wb_mides.pagamento
    WHERE ano IS NOT NULL
    GROUP BY 1,2
    '''

    df = bd.read_sql(query, billing_project_id=project_id_bq)
    
    df.to_csv(os.path.join(input_path,'null_budget_payment_ids.csv'), index=False, na_rep='', float_format='%.2f')

if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Provides the total and value of commitments uniquely identified in the database through the created variable "id_empenho_bd"

df = pd.read_csv(os.path.join(input_path, 'null_budget_commitment_ids.csv'))

df['year'] = df['year'].replace(np.nan, 0).astype(int)
df = df.sort_values(by=['state', 'year'])
df['proportion'] = 100*df['total_null_commtiments']/df['total_observations']
df['proportion_value'] = 100*df['total_null_committed']/df['total_committed']

fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3,2, figsize=(12,8))
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=['#1a476f', '#c10534'])

df.query('state == "CE"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax1, marker='.', legend=None)
df.query('state == "MG"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax2, marker='.', legend=None)
df.query('state == "PB" & year > 2008').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax3, marker='.', legend=None)
df.query('state == "PR"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax4, marker='.', legend=None)
df.query('state == "RS" & year > 2009').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax5, marker='.', legend=None)
df.query('state == "SP"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax6, marker='.', legend=None)

ax1.set_title("CE")
ax1.set_ylabel('%')
ax1.set_xlabel(None)
ax1.set_xticks(np.arange(2009,2022,2))
ax2.set_title("MG")
ax2.set_xlabel(None)
ax3.set_title("PB")
ax3.set_ylabel('%')
ax3.set_xlabel(None)
ax3.set_xticks(np.arange(2009,2021,2))
ax4.set_title("PR")
ax4.set_xlabel(None)
ax5.set_title("RS")
ax5.set_ylabel('%')
ax5.set_xlabel('Year')
ax6.set_title("SP")
ax6.set_xlabel('Year')

ax1.grid(False)
ax2.grid(False)
ax3.grid(False)
ax4.grid(False)
ax5.grid(False)
ax6.grid(False)

plt.tight_layout()
plt.legend(['Null Commitments (%)', 'Null Total Committed (%)'], bbox_to_anchor =(-0.1,-0.5), loc='lower center', ncol=2)
plt.savefig(os.path.join(fig_output, 'proporcao_nulos_empenhos.pdf'), bbox_inches='tight')

In [None]:
# Provides the total and value of verifications uniquely identified in the database through the created variable "id_liquidacao_bd"

df = pd.read_csv(os.path.join(input_path, 'null_budget_verification_ids.csv'))

df['year'] = df['year'].replace(np.nan, 0).astype(int)
df = df.sort_values(by=['state', 'year'])
df['proportion'] = 100*df['total_null_verifications']/df['total_observations']
df['proportion_value'] = 100*df['total_null_verified']/df['total_verified']

fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3,2, figsize=(12,8))
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=['#1a476f', '#c10534'])

df.query('state == "CE"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax1, marker='.', legend=None)
df.query('state == "MG"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax2, marker='.', legend=None)
df.query('state == "PB" & year > 2008').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax3, marker='.', legend=None)
df.query('state == "PR"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax4, marker='.', legend=None)
df.query('state == "RS" & year > 2009').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax5, marker='.', legend=None)
df.query('state == "SP"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax6, marker='.', legend=None)

ax1.set_title("CE")
ax1.set_ylabel('%')
ax1.set_xlabel(None)
ax1.set_xticks(np.arange(2009,2022,2))
ax2.set_title("MG")
ax2.set_xlabel(None)
ax3.set_title("PB")
ax3.set_ylabel('%')
ax3.set_xlabel(None)
ax3.set_xticks(np.arange(2009,2021,2))
ax4.set_title("PR")
ax4.set_xlabel(None)
ax5.set_title("RS")
ax5.set_ylabel('%')
ax5.set_xlabel('Year')
ax6.set_title("SP")
ax6.set_xlabel('Year')

ax1.grid(False)
ax2.grid(False)
ax3.grid(False)
ax4.grid(False)
ax5.grid(False)
ax6.grid(False)

plt.tight_layout()
plt.legend(['Null Verifications (%)', 'Null Total Verified (%)'], bbox_to_anchor =(-0.1,-0.5), loc='lower center', ncol=2)
plt.savefig(os.path.join(fig_output, 'proporcao_nulos_liquidacao.pdf'), bbox_inches='tight')

In [None]:
# Provides the total and value of payments uniquely identified in the database through the created variable "id_pagamento_bd"

df = pd.read_csv(os.path.join(input_path, 'null_budget_payment_ids.csv'))

df['year'] = df['year'].replace(np.nan, 0).astype(int)
df = df.sort_values(by=['state', 'year'])
df['proportion'] = 100*df['total_null_payments']/df['total_observations']
df['proportion_value'] = 100*df['total_null_paid']/df['total_paid']

fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3,2, figsize=(12,8))

mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=['#1a476f', '#c10534'])

df.query('state == "CE"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax1, marker='.', legend=None)
df.query('state == "MG"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax2, marker='.', legend=None)
df.query('state == "PB" & year > 2008').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax3, marker='.', legend=None)
df.query('state == "PR"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax4, marker='.', legend=None)
df.query('state == "RS" & year > 2009').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax5, marker='.', legend=None)
df.query('state == "SP"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax6, marker='.', legend=None)

ax1.set_title("CE")
ax1.set_ylabel('%')
ax1.set_xlabel(None)
ax1.set_xticks(np.arange(2009,2022,2))
ax2.set_title("MG")
ax2.set_xlabel(None)
ax3.set_title("PB")
ax3.set_ylabel('%')
ax3.set_xlabel(None)
ax3.set_xticks(np.arange(2009,2021,2))
ax4.set_title("PR")
ax4.set_xlabel(None)
ax5.set_title("RS")
ax5.set_ylabel('%')
ax5.set_xlabel('Year')
ax6.set_title("SP")
ax6.set_xlabel('Year')

ax1.grid(False)
ax2.grid(False)
ax3.grid(False)
ax4.grid(False)
ax5.grid(False)
ax6.grid(False)

plt.tight_layout()
plt.legend(['Null Payments (%)', 'Null Total Paid (%)'], bbox_to_anchor =(-0.1,-0.5), loc='lower center', ncol=2)
plt.savefig(os.path.join(fig_output, 'proporcao_nulos_pagamento.pdf'), bbox_inches='tight')

# Procurement

In [None]:
# Query to count null tender ids (id_licitacao_bd) 

def run_query_and_save_results():
    
    query = '''

    SELECT
      ano AS year,
      sigla_uf AS state,
      COUNT (*) AS total_observations,
      COUNT(id_licitacao_bd) AS total_tenders,
      COUNT(CASE WHEN id_licitacao_bd IS NULL THEN 1 END) AS total_null_tenders,
      SUM(CASE WHEN valor_corrigido_w IS NULL THEN 0 ELSE valor_corrigido_w END) AS total_procurement_value,
      SUM(CASE WHEN id_licitacao_bd IS NULL THEN CAST(valor_corrigido_w AS FLOAT64) ELSE 0 END) AS total_null_tenders_value
    FROM (
      SELECT
        *,
        CASE
          WHEN valor_corrigido_float < percentile_lower THEN percentile_lower
          WHEN valor_corrigido_float > percentile_upper THEN percentile_upper
          ELSE valor_corrigido_float
        END AS valor_corrigido_w
      FROM (
        SELECT
          *,
          PERCENTILE_CONT(valor_corrigido_float, 0.01) OVER (PARTITION BY sigla_uf) AS percentile_lower,
          PERCENTILE_CONT(valor_corrigido_float, 0.999) OVER (PARTITION BY sigla_uf) AS percentile_upper
        FROM (
          SELECT
            *,
            SAFE_CAST(valor_corrigido AS FLOAT64) AS valor_corrigido_float
          FROM basedosdados.world_wb_mides.licitacao
          WHERE ano IS NOT NULL
        )
      )
    )
    GROUP BY 1, 2
    
    '''

    df = bd.read_sql(query, billing_project_id=project_id_bq)
    
    df.to_csv(os.path.join(input_path, 'null_tender_ids.csv'), index=False, na_rep='', float_format='%.2f')

if __name__ == '__main__':
    if run_query == 'True':
        run_query_and_save_results()

In [None]:
# Provides the total and value of commitments uniquely identified in the database through the created variable "id_licitacao_bd"

df = pd.read_csv(os.path.join(input_path, 'null_tender_ids.csv'))

df['year'] = df['year'].replace(np.nan, 0).astype(int)
df = df.sort_values(by=['state', 'year'])
df['proportion'] = 100*df['total_null_tenders']/df['total_observations']
df['proportion_value'] = 100*df['total_null_tenders_value']/df['total_procurement_value']

fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3,2, figsize=(12,8))
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=['#1a476f', '#c10534'])

df.query('state == "CE"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax1, marker='.', legend=None)
df.query('state == "MG"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax2, marker='.', legend=None)
df.query('state == "PB" & year > 2008').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax3, marker='.', legend=None)
df.query('state == "PR"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax4, marker='.', legend=None)
df.query('state == "RS" & year > 2009').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax5, marker='.', legend=None)
df.query('state == "PE"').plot(x="year", y=["proportion", "proportion_value"], kind="line", ax=ax6, marker='.', legend=None)

ax1.set_title("CE")
ax1.set_ylabel('%')
ax1.set_xlabel(None)
ax1.set_xticks(np.arange(2009,2022,2))
ax2.set_title("MG")
ax2.set_xlabel(None)
ax3.set_title("PB")
ax3.set_ylabel('%')
ax3.set_xlabel(None)
ax4.set_title("PR")
ax4.set_xlabel(None)
ax5.set_title("RS")
ax5.set_ylabel('%')
ax5.set_xlabel('Year')
ax6.set_title("PE")
ax6.set_xlabel('Year')
ax6.set_xticks(np.arange(2012,2022,1))

ax1.grid(False)
ax2.grid(False)
ax3.grid(False)
ax4.grid(False)
ax5.grid(False)
ax6.grid(False)

plt.tight_layout()
plt.legend(['Null Tenders (%)', 'Null Total Tenders Value (%)'], bbox_to_anchor =(-0.1,-0.5), loc='lower center', ncol=2)
plt.savefig(os.path.join(fig_output,'proporcao_nulos_licitacao.pdf'), bbox_inches='tight')