In [1]:

import pandas as pd
import pandas_gbq as pd_gbq
import jinja2
import os

import time

# function to get current date and time
def get_current_time():
    from datetime import datetime
    return "[" + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "]"

def read_gbq_(query):
  project_id = 'sfwthr2a4shdyuogrt3jjtygj160rs' # ri-nonprod
  print(f'{get_current_time()} Getting dataset from BQ...')
  return pd_gbq.read_gbq(query, progress_bar_type='tqdm',
                         use_bqstorage_api=True,
      project_id=project_id)

def read_gbq_from_template(template_query, dict_query):
  query = template_query
  if dict_query:
      from jinja2 import Template
      # Reads a query from a template and returns the query with the variables replaced
      # template_query: query as string, may use jinja2 templating
      # dict_query: dictionary of query parameters, to render from the template with jinja2
      query = Template(template_query).render(dict_query)
  return read_gbq_(query)


def read_text(file_name, encoding='utf-8'):
  with open(file_name, 'r', encoding=encoding) as f:
      return f.read()



def download_data(query_name,
                  queries_path,
                  data_path,
                  file_format='parquet',
                  update=False,
                  dict_query = {}, **kwargs):
  import os

  if not query_name.endswith('.sql'):
      query_name += '.sql'

  if file_format.startswith('.'):
      file_format = file_format[1:]

  from os.path import exists
  #file_path = fr"data\{query_name[:-4]}.{file_format}"
  #query_path = fr"queries\{query_name}"
  file_path = os.path.join(data_path, f'{query_name[:-4]}.{file_format}')
  query_path = os.path.join(queries_path, query_name)

  template_query = read_text(query_path)

  if update or not exists(file_path):
      df = read_gbq_from_template(template_query, dict_query)

      for c in df.dtypes[df.dtypes=='dbdate'].index:
          df[c] = df[c].astype('datetime64[ns]')

      z = 'index=False' if file_format=='csv' else ''
      file_format = 'excel' if file_format=='xlsx' else file_format
      print(file_format)
      if len(df)>0:
          eval(f'df.to_{file_format}(file_path,{z})')
          print(f'{get_current_time()} File successfully recorded on ', file_path)
      return df

#nome_projeto = '64. TAM/spinoffs/subs_exploratorio'
#queries_path = os.path.join(GDRIVE_PATH, nome_projeto, 'queries')
#data_path = os.path.join(GDRIVE_PATH, nome_projeto, 'data')
queries_path = 'queries'
data_path = 'data'

In [6]:


query_name = 'get_ton_unicidade'
download_data(query_name, queries_path, data_path, update=True)


[2024-08-06 20:15:03] Getting dataset from BQ...


  create_bqstorage_client=create_bqstorage_client,


parquet
[2024-08-06 20:16:06] File successfully recorded on  data/get_ton_unicidade.parquet


Unnamed: 0,cod_muni,document,reference_month,legal_name,trade_name,nome_muni,uf
0,2700300,11223606490,2024-05-31,RAYANNE KELLY GUIMARAES MOREIRA,ANNA MERCADINHO,Arapiraca,AL
1,2408102,04833363437,2024-05-31,KASSIO LIVIO DE SOUSA ALBUQUERQUE,KASSIO LIVIO DE SOUSA ALBUQUERQUE,Natal,RN
2,4303103,72764708068,2024-04-30,DEMERVAL RODRIGUES PEREIRA,DEMERVAL RODRIGUES PEREIRA,Cachoeirinha,RS
3,1200401,27526031000137,2024-05-31,JEFFERSON DA SILVA OLIVEIRA,TECNO MOTOS,Rio Branco,AC
4,3300407,13614781798,2024-03-31,ANGELA PEREIRA,ESPENTO,Barra Mansa,RJ
...,...,...,...,...,...,...,...
5443961,2914802,07010580561,2024-03-31,MARYELLE CRISTINA DE JESUS SILVA,AMOR PERFEITO,Itabuna,BA
5443962,3135209,06289764616,2024-05-31,LEONARDO NUNES DE CASTRO,LEONARDO MULTCOM,Januária,MG
5443963,2605905,10331405466,2024-04-30,JOSIANE MARIA DA SILVA,JOSIANE MODAS,Gameleira,PE
5443964,2609600,96113510468,2024-03-31,ELIAS BATISTA FRANCISCO,CASA DO FRIOS,Olinda,PE


In [2]:

query_name = 'final_nomes'
download_data(query_name, queries_path, data_path, update=True)


[2024-08-07 20:32:00] Getting dataset from BQ...


  create_bqstorage_client=create_bqstorage_client,


parquet
[2024-08-07 20:35:25] File successfully recorded on  data/final_nomes.parquet


Unnamed: 0,reference_month,subs_asterisk,nome_master_com_espaco,nome_master,nome_muni,uf,cpf,cpf_brasil,cnpj,numero_inicio,cod_muni,merchant_tax_id,mmhid_merge,merchant_market_hierarchy_id
0,2024-05-31,SumUp,COSMETICOS,COSMETICOS,Recife,PE,,,29866658000162,,2611606,,,
1,2024-03-31,SumUp,WARLEY E LANA,WARLEYELANA,São Paulo,SP,,,,,3550308,,,
2,2024-04-30,SumUp,MOURA BUFFET,MOURABUFFET,Brasília,DF,,,,,5300108,,,
3,2024-05-31,SumUp,VINICIUSREBEC,VINICIUSREBEC,Sobral,CE,,,,,2312908,,,
4,2024-05-31,SumUp,STUDIO LUCAS,STUDIOLUCAS,São Paulo,SP,,,,,3550308,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48467731,2024-04-30,MercadoPago_subPagarme,MOMENTOTRUFA,MOMENTOTRUFA,Rio de Janeiro,RJ,08018728712,,,,3304557,,,
48467732,2024-04-30,MercadoPago_subPagarme,IERLIBEZERRA,IERLIBEZERRA,Timon,MA,04008110305,,,,2112209,,,
48467733,2024-05-31,MercadoPago_subPagarme,COMERCIO,COMERCIO,Belford Roxo,RJ,00385044720,,,,3300456,,,
48467734,2024-05-31,MercadoPago_subPagarme,ALIANCA,ALIANCA,Lages,SC,04438037905,,,,4209300,,,
