In [34]:
from datetime import datetime
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import requests
from zipfile import ZipFile

BASE_URL = "https://d3e6htiiul5ek9.cloudfront.net/prod/"
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}
sucursales_url = "sucursales?lat={}&lng={}&limit=30"
lookup_url = "productos?string={}&array_sucursales={}&offset=0&limit=100&id_categoria={}"
product_url = "producto?limit=50&id_producto={}&array_sucursales={}"
grouped_url = "categoria?id_categoria={}&string={}&array_sucursales={}&lat={}&lng={}"
suc_grouped_url = "sucursal?string={}&id_categoria={}&id_sucursal={}&sort=precio_lista"

In [2]:
# 8 coordenadas para el proyecto.
locations = {
    "Noroeste": {
        "code": "NO",
        "lat": "-31.3417943",
        "lon": "-64.2538802",
    },
    "Norte": {
        "code": "N",
        "lat": "-31.3613381",
        "lon": "-64.2066416",
    },
    "Centro-oeste": {
        "code": "CO",
        "lat": "-31.4086063",
        "lon": "-64.2084814",
    },
    "Centro-este": {
        "code": "CE",
        "lat": "-31.4054267",
        "lon": "-64.1956076",
    },
    "Oeste": {
        "code": "O",
        "lat": "-31.4068982",
        "lon": "-64.2413965",
    },
    "Sudoeste": {
        "code": "SO",
        "lat": "-31.4671374",
        "lon": "-64.2272544",
    },
    "Sudeste": {
        "code": "SE",
        "lat": "-31.4696984",
        "lon": "-64.1715217",
    },
    "Este": {
        "code": "E",
        "lat": "-31.4215951",
        "lon": "-64.1226057",
    },
}


In [3]:
lookup_products = {
    "Aceite Girasol": {
        "term": "aceite girasol 1.5 lt",
        "packagings": "1.5 lt",
    },
    "Agua Bidón": {
        "term": "agua 6 lt",
        "packagings": "6.0 lt|6.25 lt|6.3 lt|6.5 lt",
        "category": "05",
    },
    "Arroz Integral": {
        "term": "arroz integral 1 kg",
        "packagings": "1.0 kg",
    },
    "Arroz Largo Fino": {
        "term": "arroz largo fino 1 kg",
        "packagings": "1.0 kg",
    },
    "Arvejas Conserva": {
        "term": "arvejas 3",
        "packagings": "300.0 gr|320.0 gr|340.0 gr|350.0 gr",
    },
    "Arvejas Secas": {
        "term": "arvejas",
        "packagings": "500.0 gr",
    },
    "Avena Instántanea": {
        "term": "avena instantanea",
        "packagings": "350.0 gr|400.0 gr|500.0 gr",
    },
    "Azúcar": {
        "term": "azucar 1 kg",
        "packagings": "1.0 kg",
    },
    "Café Tostado": {
        "term": "cafe molido 250",
        "packagings": "250.0 gr",
    },
    "Choclo Grano": {
        "term": "choclo grano lata",
        "packagings": "300.0 gr|350.0 gr",
    },
    "Dulce de Leche": {
        "term": "dulce de leche 400",
        "packagings": "400.0 gr",
    },
    "Edulcorante Líquido": {
        "term": "edulcorante liquido 2",
        "packagings": "200.0 cc|200.0 ml|250.0 cc|250.0 ml",
    },
    "Fideos Largos": {
        "term": "tallarin",
        "packagings": "500.0 gr",
    },
    "Garbanzos Secos": {
        "term": "garbanzos 500",
        "packagings": "500.0 gr",
    },
    "Harina 000": {
        "term": "harina 000",
        "packagings": "1.0 kg",
    },
    "Harina Maiz": {
        "term": "harina maiz",
        "packagings": "500.0 gr",
    },
    "Harina Integral": {
        "term": "harina integral 1",
        "packagings": "1.0 kg",
    },
    "Leche Descremada (SachetCaja)": {
        "term": "leche descremada 1",
        "packagings": "1.0 lt",
    },
    "Leche en Polvo Descremada": {
        "term": "leche en polvo descremada 400",
        "packagings": "400.0 gr",
    },
    "Lentejas Secas": {
        "term": "lentejas 400",
        "packagings": "400.0 gr",
    },
    "Levadura Seca": {
        "term": "levadura 20",
        "packagings": "20.0 gr",
    },
    "Manteca": {
        "term": "manteca 200",
        "packagings": "200.0 gr",
        "category": "06",
    },
    "Poroto Alubia": {
        "term": "poroto alubia",
        "packagings": "500.0 gr",
    },
    "Queso Untable Light": {
        "term": "queso untable light",
        "packagings": "290.0 gr|300.0 gr",
    },
    "Té en Saquitos": {
        "term": "te saquitos 25",
        "packagings": "25.0 un",
    },
    "Tomate Perita en Lata": {
        "term": "tomate perita 400",
        "packagings": "400.0 gr",
    },
    "Yerba": {
        "term": "yerba 1 kg",
        "packagings": "1.0 kg",
    },
    "Yogur Bebible Descremado": {
        "term": "yogur bebible descremado 1",
        "packagings": "1.0 lt|1.0 kg",
    },
}

In [5]:
get_precio_lista = lambda x: x["preciosProducto"].get("precioLista")

today_string = datetime.today().strftime("%Y%m%d")

def get_csvs_for_region(locate):
    sucurl = BASE_URL + sucursales_url.format(locate["lat"], locate["lon"])
    response = requests.get(sucurl, headers=HEADERS)
    sucursales = pd.DataFrame.from_records(json.loads(response.content.decode("latin-1")).get("sucursales", []))
    locate["array"] = str(list(sucursales.id)).strip("[]\'").replace("', '", ",")
    sucsarray = locate["array"]
    
    for productstr, lookupstr in lookup_products.items():
        lookingurl = BASE_URL + lookup_url.format(lookupstr["term"], sucsarray, lookupstr.get("category", "0"))
        response = requests.get(lookingurl, headers=HEADERS)
        productos = pd.DataFrame.from_records(json.loads(response.content.decode("latin-1")).get("productos", []))
        productos = productos[productos["presentacion"].str.contains(lookupstr["packagings"])].sort_values("precioMin").iloc[:4]
        candidates = productos.id.to_list()
        
        finalproductdf = pd.DataFrame()

        for candidate in candidates:
            candidate_url = BASE_URL + product_url.format(candidate, sucsarray)
            response = requests.get(candidate_url, headers=HEADERS)
            productdf = pd.DataFrame.from_records(json.loads(response.content.decode("latin-1")).get('sucursales', [])).dropna(subset=["actualizadoHoy"])
            productdf["precio"] = productdf.apply(get_precio_lista, axis=1)
            productdf["id"] = candidate
            finalproductdf = pd.concat([finalproductdf, productdf[["id", "banderaDescripcion", "direccion", "precio"]]])
        
        finalproductdf = finalproductdf.merge(productos[["id", "nombre"]], on="id", how="right")
        finalproductdf["comercio"] = finalproductdf["banderaDescripcion"] + " - " + finalproductdf["direccion"]
        finalproductdf["precio"] = pd.to_numeric(finalproductdf["precio"])
        finalproductdf.sort_values("precio", inplace=True)
        
        csv_string = "{}/{}/{}.csv".format(today_string, locate["code"], productstr)
        finalproductdf[["nombre", "comercio", "precio"]].reset_index(drop=True).to_csv(csv_string)

        print("Saved {}".format(csv_string))

In [6]:
os.mkdir(today_string)
for locate in locations:
    region = locations.get(locate)
    os.mkdir("{}/{}".format(today_string, region.get("code")))
    get_csvs_for_region(region)

Saved 20191126/NO/Aceite Girasol.csv
Saved 20191126/NO/Agua Bidón.csv
Saved 20191126/NO/Arroz Integral.csv
Saved 20191126/NO/Arroz Largo Fino.csv
Saved 20191126/NO/Arvejas Conserva.csv
Saved 20191126/NO/Arvejas Secas.csv
Saved 20191126/NO/Avena Instántanea.csv
Saved 20191126/NO/Azúcar.csv
Saved 20191126/NO/Café Tostado.csv
Saved 20191126/NO/Choclo Grano.csv
Saved 20191126/NO/Dulce de Leche.csv
Saved 20191126/NO/Edulcorante Líquido.csv
Saved 20191126/NO/Fideos Largos.csv
Saved 20191126/NO/Garbanzos Secos.csv
Saved 20191126/NO/Harina 000.csv
Saved 20191126/NO/Harina Maiz.csv
Saved 20191126/NO/Harina Integral.csv
Saved 20191126/NO/Leche Descremada (SachetCaja).csv
Saved 20191126/NO/Leche en Polvo Descremada.csv
Saved 20191126/NO/Lentejas Secas.csv
Saved 20191126/NO/Levadura Seca.csv
Saved 20191126/NO/Manteca.csv
Saved 20191126/NO/Poroto Alubia.csv
Saved 20191126/NO/Queso Untable Light.csv
Saved 20191126/NO/Té en Saquitos.csv
Saved 20191126/NO/Tomate Perita en Lata.csv
Saved 20191126/NO/Ye

Saved 20191126/E/Té en Saquitos.csv
Saved 20191126/E/Tomate Perita en Lata.csv
Saved 20191126/E/Yerba.csv
Saved 20191126/E/Yogur Bebible Descremado.csv


In [11]:
with ZipFile('{}.zip'.format(today_string), 'w') as zipObj:
   # Iterate over all the files in directory
   for folderName, subfolders, filenames in os.walk(today_string):
       for filename in filenames:
           #create complete filepath of file in directory
           filePath = os.path.join(folderName, filename)
           # Add file to zip
           zipObj.write(filePath)

In [13]:
sucurl = BASE_URL + sucursales_url.format(region["lat"], region["lon"])
response = requests.get(sucurl, headers=HEADERS)


In [4]:
locate = locations.get("Centro-oeste")
sucurl = BASE_URL + sucursales_url.format(locate["lat"], locate["lon"])
response = requests.get(sucurl, headers=HEADERS)
sucursales = pd.DataFrame.from_records(json.loads(response.content.decode("latin-1")).get("sucursales", []))
locate["array"] = str(list(sucursales.id)).strip("[]\'").replace("', '", ",")
sucsarray = locate["array"]

In [84]:
lookupstr = {
    "term": "queso",
    "category": "060608004",
    "packagings": "1 Kg",
}

groupproduct_url = BASE_URL + grouped_url.format(
    lookupstr["category"],
    lookupstr["term"],
    sucsarray,
    locate["lat"],
    locate["lon"],
)

response = requests.get(groupproduct_url, headers=HEADERS)
datos = pd.DataFrame.from_records(json.loads(response.content.decode("latin-1")).get("sucursales", []))
datos["sucursal"] = datos["comercio_id"].apply(str) + "-" + datos["bandera_id"].apply(str) + "-" + datos["sucursal_id"]
datos["comercio"] = datos["bandera_descripcion"] + " - " + datos["direccion"]
datos = datos[["comercio", "sucursal"]]

In [81]:
quesos = pd.DataFrame()
for sucursal in datos["sucursal"]:
    lookup_url = BASE_URL + suc_grouped_url.format(lookupstr["term"], lookupstr["category"], sucursal)
    response = requests.get(lookup_url, headers=HEADERS)
    quesitos = pd.DataFrame.from_records(
        json.loads(response.content.decode("latin-1")).get("result", {}).get("productos", [])
    )
    if quesitos.empty:
        continue
    quesitos = quesitos[quesitos["presentacion"].str.contains(lookupstr["packagings"])]
    quesitos["sucursal"] = sucursal
    quesitos = quesitos[["sucursal", "producto_descripcion", "precio_lista"]]
    quesos = pd.concat([quesos, quesitos])

#datos = datos.join(quesos[["sucursal", "producto_descripcion", "precio_lista"]], on="sucursal", how="outer")

In [83]:
datos.merge(quesos, on="sucursal").sort_values("precio_lista")

Unnamed: 0,local,sucursal,producto_descripcion,precio_lista
30,Vea - AVENIDA COLON 461,9-1-476,Queso Cremoso en Sobre Parmalat 1 Kg,139
22,Vea - BOULEVARD CHACABUCO 199,9-1-440,Queso Cremoso en Sobre Parmalat 1 Kg,139
31,Vea - AVENIDA COLON 461,9-1-476,Queso Cremoso Procesado en Sobre El Ribeño 1 Kg,139.9
23,Vea - BOULEVARD CHACABUCO 199,9-1-440,Queso Cremoso Procesado en Sobre El Ribeño 1 Kg,139.9
38,Disco - AVENIDA COLON 683,9-2-439,Queso Cremoso Procesado en Sobre El Ribeño 1 Kg,145
0,Disco - J. L. cabrera 493,9-2-33,Queso Cremoso Procesado en Sobre El Ribeño 1 Kg,145
39,Disco - AVENIDA COLON 683,9-2-439,Queso Cremoso en Sobre Parmalat 1 Kg,149
1,Disco - J. L. cabrera 493,9-2-33,Queso Cremoso en Sobre Parmalat 1 Kg,149
53,Supermercados Cordiez - AV. VELEZ SARSFIELD 334,7-1-40,Queso Cremoso Pilarcito 1 Kg,219.9
47,Supermercados Cordiez - AV. COLON 3265,7-1-26,Queso Cremoso Trozado Masterlac 1 Kg,219.9


In [71]:
quesitos

Unnamed: 0,precio_bulto_con_iva,precio_bulto_sin_iva,precio_leyenda_promo1,precio_leyenda_promo2,precio_lista,precio_unitario_bulto_por_unidad_venta_con_iva,precio_unitario_bulto_por_unidad_venta_sin_iva,precio_unitario_promo1,precio_unitario_promo2,presentacion,producto_descripcion,producto_sepa_id,unidad_venta,sucursal
0,,,,,219.9,,,,,1 Kg,Queso Cremoso Villa Nueva 1 Kg,7-1-0023065600000,,7-1-8
1,,,,,219.9,,,,,1 Kg,Queso Cremoso 1 Kg,0023062300000,,7-1-8
3,,,,,219.9,,,,,1 Kg,Queso Cremoso Pilarcito 1 Kg,0023038400000,,7-1-8
4,,,,,355.9,,,,,1 Kg,Queso Cremoso Trozo Cremac 1 Kg,7-1-0023057100000,,7-1-8
5,,,,,501.99,,,,,1 Kg,Queso Cremon Horma con Vitaminas La Serenisima...,7-1-0023066900000,,7-1-8
6,,,,,540.79,,,,,1 Kg,Queso Cremon Fraccionado Vitamina La Serenisim...,7-1-0023080600000,,7-1-8
