In [1]:
from datetime import datetime
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import requests

BASE_URL = "https://d3e6htiiul5ek9.cloudfront.net/prod/"
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}
sucursales_url = "sucursales?lat={}&lng={}&limit=30"
lookup_url = "productos?string={}&array_sucursales={}&offset=0&limit=100&id_categoria={}"
product_url = "producto?limit=50&id_producto={}&array_sucursales={}"

In [2]:
# 8 coordenadas para el proyecto.
locations = {
    "Noroeste": {
        "code": "NO",
        "lat": "-31.3417943",
        "lon": "-64.2538802",
    },
    "Norte": {
        "code": "N",
        "lat": "-31.3613381",
        "lon": "-64.2066416",
    },
    "Centro-oeste": {
        "code": "CO",
        "lat": "-31.4086063",
        "lon": "-64.2084814",
    },
    "Centro-este": {
        "code": "CE",
        "lat": "-31.4054267",
        "lon": "-64.1956076",
    },
    "Oeste": {
        "code": "O",
        "lat": "-31.4087208",
        "lon": "-64.2330298",
    },
    "Sudoeste": {
        "code": "SO",
        "lat": "-31.4671374",
        "lon": "-64.2272544",
    },
    "Sudeste": {
        "code": "SE",
        "lat": "-31.4696984",
        "lon": "-64.1715217",
    },
    "Este": {
        "code": "E",
        "lat": "-31.4215951",
        "lon": "-64.1226057",
    },
}


In [3]:
lookup_products = {
    "Aceite Girasol": {
        "term": "aceite girasol 1.5 lt",
        "packagings": "1.5 lt",
    },
    "Agua Bidón": {
        "term": "agua 6 lt",
        "packagings": "6.0 lt|6.25 lt|6.3 lt|6.5 lt",
        "category": "05",
    },
    "Arroz Integral": {
        "term": "arroz integral 1 kg",
        "packagings": "1.0 kg",
    },
    "Arroz Largo Fino": {
        "term": "arroz largo fino 1 kg",
        "packagings": "1.0 kg",
    },
    "Arvejas Conserva": {
        "term": "arvejas 3",
        "packagings": "300.0 gr|320.0 gr|340.0 gr|350.0 gr",
    },
    "Arvejas Secas": {
        "term": "arvejas",
        "packagings": "500.0 gr",
    },
    "Avena Instántanea": {
        "term": "avena instantanea",
        "packagings": "350.0 gr|400.0 gr",
    },
    "Azúcar": {
        "term": "azucar 1 kg",
        "packagings": "1.0 kg",
    },
    "Café Tostado": {
        "term": "cafe molido 250",
        "packagings": "250.0 gr",
    },
    "Choclo Grano": {
        "term": "choclo grano lata",
        "packagings": "300.0 gr|350.0 gr",
    },
    "Dulce de Leche": {
        "term": "dulce de leche 400",
        "packagings": "400.0 gr",
    },
    "Edulcorante Líquido": {
        "term": "edulcorante liquido 2",
        "packagings": "200.0 cc|200.0 ml|250.0 cc|250.0 ml",
    },
    "Fideos Largos": {
        "term": "tallarin",
        "packagings": "500.0 gr",
    },
    "Garbanzos Secos": {
        "term": "garbanzos 500",
        "packagings": "500.0 gr",
    },
    "Harina 000": {
        "term": "harina 000",
        "packagings": "1.0 kg",
    },
    "Harina Maiz": {
        "term": "harina maiz",
        "packagings": "500.0 gr",
    },
    "Harina Integral": {
        "term": "harina integral 1",
        "packagings": "1.0 kg",
    },
    "Leche Descremada (SachetCaja)": {
        "term": "leche descremada 1",
        "packagings": "1.0 lt",
    },
    "Leche en Polvo Descremada": {
        "term": "leche en polvo descremada 400",
        "packagings": "400.0 gr",
    },
    "Lentejas Secas": {
        "term": "lentejas 400",
        "packagings": "400.0 gr",
    },
    "Levadura Seca": {
        "term": "levadura 20",
        "packagings": "20.0 gr",
    },
    "Manteca": {
        "term": "manteca 200",
        "packagings": "200.0 gr",
        "category": "06",
    },
    "Poroto Alubia": {
        "term": "poroto alubia",
        "packagings": "500.0 gr",
    },
    "Queso Untable Light": {
        "term": "queso untable light 300",
        "packagings": "300.0 gr",
    },
    "Té en Saquitos": {
        "term": "te saquitos 25",
        "packagings": "25.0 un",
    },
    "Tomate Perita en Lata": {
        "term": "tomate perita 400",
        "packagings": "400.0 gr",
    },
    "Yerba": {
        "term": "yerba 1 kg",
        "packagings": "1.0 kg",
    },
    "Yogur Bebible Descremado": {
        "term": "yogur bebible descremado 1",
        "packagings": "1.0 lt|1.0 kg",
    },
}

In [4]:
get_precio_lista = lambda x: x["preciosProducto"].get("precioLista")

today_string = datetime.today().strftime("%Y%m%d")

def get_csvs_for_region(locate):
    sucurl = BASE_URL + sucursales_url.format(locate["lat"], locate["lon"])
    response = requests.get(sucurl, headers=HEADERS)
    sucursales = pd.DataFrame.from_records(json.loads(response.content.decode("latin-1")).get("sucursales", []))
    locate["array"] = str(list(sucursales.id)).strip("[]\'").replace("', '", ",")
    sucsarray = locate["array"]
    
    for productstr, lookupstr in lookup_products.items():
        lookingurl = BASE_URL + lookup_url.format(lookupstr["term"], sucsarray, lookupstr.get("category", "0"))
        response = requests.get(lookingurl, headers=HEADERS)
        productos = pd.DataFrame.from_records(json.loads(response.content.decode("latin-1")).get("productos", []))
        productos = productos[productos["presentacion"].str.contains(lookupstr["packagings"])].sort_values("precioMin").iloc[:4]
        candidates = productos.id.to_list()
        
        finalproductdf = pd.DataFrame()

        for candidate in candidates:
            candidate_url = BASE_URL + product_url.format(candidate, sucsarray)
            response = requests.get(candidate_url, headers=HEADERS)
            productdf = pd.DataFrame.from_records(json.loads(response.content.decode("latin-1")).get('sucursales', [])).dropna(subset=["actualizadoHoy"])
            productdf["precio"] = productdf.apply(get_precio_lista, axis=1)
            productdf["id"] = candidate
            finalproductdf = pd.concat([finalproductdf, productdf[["id", "banderaDescripcion", "direccion", "precio"]]])
        
        finalproductdf = finalproductdf.merge(productos[["id", "nombre"]], on="id", how="right")
        finalproductdf["comercio"] = finalproductdf["banderaDescripcion"] + " - " + finalproductdf["direccion"]
        finalproductdf["precio"] = pd.to_numeric(finalproductdf["precio"])
        finalproductdf.sort_values("precio", inplace=True)
        
        csv_string = today_string + "/" + productstr + " - " + locate["code"] + ".csv"
        finalproductdf[["nombre", "comercio", "precio"]].reset_index(drop=True).to_csv(csv_string)

        print("Saved {}".format(csv_string))

In [None]:
os.mkdir(today_string)
for locate in locations:
    region = locations.get(locate)
    get_csvs_for_region(region)

Saved 20191125/Aceite Girasol - NO.csv
Saved 20191125/Agua Bidón - NO.csv
Saved 20191125/Arroz Integral - NO.csv
Saved 20191125/Arroz Largo Fino - NO.csv
Saved 20191125/Arvejas Conserva - NO.csv
Saved 20191125/Arvejas Secas - NO.csv
Saved 20191125/Avena Instántanea - NO.csv
Saved 20191125/Azúcar - NO.csv
Saved 20191125/Café Tostado - NO.csv
Saved 20191125/Choclo Grano - NO.csv
Saved 20191125/Dulce de Leche - NO.csv
Saved 20191125/Edulcorante Líquido - NO.csv
Saved 20191125/Fideos Largos - NO.csv
Saved 20191125/Garbanzos Secos - NO.csv
Saved 20191125/Harina 000 - NO.csv
Saved 20191125/Harina Maiz - NO.csv
Saved 20191125/Harina Integral - NO.csv
Saved 20191125/Leche Descremada (SachetCaja) - NO.csv
Saved 20191125/Leche en Polvo Descremada - NO.csv
Saved 20191125/Lentejas Secas - NO.csv
Saved 20191125/Levadura Seca - NO.csv
Saved 20191125/Manteca - NO.csv
