In [None]:
import os
import time
import requests
import unicodedata
import pandas as pd
from datetime import datetime, timedelta
from geopy.geocoders import Nominatim

In [None]:
def csv(namefile, dataset):
    new_df = pd.DataFrame([dataset])
    if os.path.isfile(namefile):
        existing_df = pd.read_csv(namefile)
        updated_df = pd.concat([existing_df, new_df], ignore_index=True)
    else:
        updated_df = new_df
    updated_df.to_csv(namefile, index=False)


In [None]:

def response(url, max_retries=2, delay=2):
    for attempt in range(max_retries):
        try:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                return data
            else:
                print ("Attempt ->", max_retries)
                print(f"Error {response.status_code}: Unable to retrieve the information for url-> {url}.")
        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} failed with error: {e}")

        if attempt < max_retries - 1:
            time.sleep(delay)
            
    print("Max retries reached. Unable to retrieve the data.")
    return None


In [None]:
def generate_dates(tini, tend):
    fecha_inicio = datetime.strptime(tini, "%d%m%Y")
    fecha_fin = datetime.strptime(tend, "%d%m%Y")
    delta = timedelta(days=1)
    fechas = []
    while fecha_inicio <= fecha_fin:
        fechas.append(fecha_inicio.strftime("%d%m%Y"))
        fecha_inicio += delta
    return fechas

In [None]:
def date_format(date):
    date_dt = datetime.strptime(date.split('T')[0], '%Y-%m-%d')
    months = ['Enero', 'Febrero', 'Marzo', 'Abril', 'Mayo', 'Junio','Julio', 'Agosto', 'Septiembre', 'Octubre', 'Noviembre', 'Diciembre']
    month = months[date_dt.month - 1]
    date = f"{month}/{date_dt.year}"
    return(date)

In [None]:
def latlong(inst, address, retries=5, backoff_factor=0.5):
    loc = Nominatim(user_agent="localhost")
    for attempt in range(retries):
        try:
            getLoc = loc.geocode(inst)
            if not getLoc:
                getLoc = loc.geocode(address)
            if getLoc:
                return ("{},{}".format(getLoc.latitude, getLoc.longitude))
            else:
                return ("0,0")
        except (requests.exceptions.RequestException, Exception) as e:
            if attempt < retries - 1:
                # Exponential backoff
                wait_time = backoff_factor * (2 ** attempt)
                print(f"Attempt {attempt + 1} failed: {e}. Retrying in {wait_time:.1f} seconds...")
                time.sleep(wait_time)
            else:
                # Final failure after retries exhausted
                print(f"Final attempt failed: {e}")
                return ("0,0")



In [None]:
def find_keywords(name_oc, keywords, flag = False, kw_found = 0):
    name_oc   = unicodedata.normalize("NFKD", name_oc.lower()).encode("ascii","ignore").decode("ascii")
    kw_found += sum(1 for w in keywords if w in name_oc)
    if kw_found >=2: flag = True
    return flag

In [None]:
################## Input data #######################
keywords = ["papel","resma","imprimir","impresion","fotocopia","carta","oficio","impresora"]
csv_name = "dataset_2024_gastos_papel_bruto.csv"  
tini     = "01012024"
tend     = "15012024"
ticket                = "F8537A18-6766-4DEF-9E59-426B4FEE2844" 
endpoint_institutions = "https://api.mercadopublico.cl/servicios/v1/Publico/Empresas/BuscarComprador?"
endpoint_oc           = "https://api.mercadopublico.cl/servicios/v1/Publico/ordenesDeCompra.json?"

In [None]:
################## Search purchase orders by each institution #######################
dates = generate_dates(tini, tend)
try:
    institutions = response(f"{endpoint_institutions}ticket={ticket}")
    if institutions.get('listaEmpresas'):#900
        print (institutions)
        for org in institutions.get('listaEmpresas'):
            code = org.get("CodigoEmpresa")
            print ("Institución: {} - Código: {}".format(org.get("NombreEmpresa"), code))
            for date in dates:
                url_oc = f"{endpoint_oc}fecha={date}&estado=aceptada&CodigoOrganismo={code}&ticket={ticket}"
                response_oc = response(url_oc)
                if response_oc and response_oc.get('Listado'):
                    print("Total OC ->", response_oc.get("Cantidad"))
                    for oc in response_oc.get('Listado'):
                        name_oc = oc.get("Nombre")
                        data    = find_keywords(name_oc, keywords) 
                        if data:
                            url_code  =  f"{endpoint_oc}codigo={oc.get('Codigo')}&ticket={ticket}"
                            info_code = response(url_code)
                            if info_code and info_code.get("Listado"):
                                data     = info_code.get("Listado")[0]
                                print (data)
                                date_csv = date_format(data.get("Fechas").get("FechaAceptacion"))
                                name_org = data.get("Comprador").get("NombreOrganismo")
                                if name_org.startswith("I MUNICIPALIDAD"): name_org = ("ILUSTRE{}".format(name_org[1:]))
                                address = data.get("Comprador").get("DireccionUnidad")
                                coord = latlong(name_org, address)
                                dataset_oc = {
                                    "Institución Padre": data.get("Comprador").get("NombreOrganismo"),
                                    "Dirección": address,
                                    "Comuna": data.get("Comprador").get("ComunaUnidad"),
                                    "Región": data.get("Comprador").get("RegionUnidad"),
                                    "Coordenadas Geográficas": coord,
                                    "Rubro de gasto": name_oc,
                                    "Periodo": date_csv,
                                    "Gasto": int(data.get("Total"))
                                }
                                csv(csv_name, dataset_oc)
                            else:
                                print ("Request failed for code:",name_oc) 
      
except requests.exceptions.RequestException as e: 
    print(e)
