In [1]:
#|echo: false
import pandas as pd, numpy as np, matplotlib.pyplot as plt, altair as alt, pytz
from fastcore.all import *
from datetime import datetime, timedelta

## Get data

In [2]:
#|echo: false
print(f'Last execution time: {datetime.now(pytz.timezone("America/Lima")).strftime("%d/%m/%Y %T")}')

Last execution time: 06/11/2023 05:52:22


In [3]:
#|code-summary: Products type filter
explore_types = ['frutas', 'lacteos', 'verduras', 'embutidos', 'panaderia', 'desayuno', 'congelados', 'abarrotes',
                 'aves', 'carnes', 'pescados']

In [4]:
#|code-summary: Data table
path = Path('../../output')
csv_files = L(path.glob('*.csv')).filter(lambda o: os.stat(o).st_size>0)
pat_store = re.compile('(.+)\_\d+')
pat_date = re.compile('.+\_(\d+)')
df = (
    pd.concat([pd.read_csv(o).assign(store=pat_store.match(o.stem)[1], date=pat_date.match(o.stem)[1])
               for o in csv_files], ignore_index=True)
    .pipe(lambda d: d.assign(
        name=d.name.str.lower()+' ('+d.store+')',
        sku=d.id.where(d.sku.isna(), d.sku).astype(int),
        date=pd.to_datetime(d.date)
    ))
    .drop('id', axis=1)
    .loc[lambda d: d.category.str.contains('|'.join(explore_types))]
    # Filter products with recent data
#     .loc[lambda d: d.name.isin(d.groupby('name').date.max().loc[ge(datetime.now()-timedelta(days=30))].index)]
    # Filter empty prices
    .loc[lambda d: d.price>0]
)
print(df.shape)
df.sample(3)

(782186, 8)


Unnamed: 0,brand,uri,name,price,category,store,date,sku
1135219,Metro,https://www.metro.pe/sancochado-de-pescuezo-na...,sancochado de pescuezo nacional x kg (metro),22.3,https://www.metro.pe/carnes-aves-y-pescados/re...,metro,2023-05-22,36149
1463195,.,https://www.metro.pe/fresa-americana-600g-2642...,fresa americana 600g (metro),6.99,https://www.metro.pe/frutas-y-verduras/frutas/...,metro,2022-12-29,33561
937225,PLAZA VEA,https://www.plazavea.com.pe/camote-morado/p,camote morado (plaza_vea),3.49,https://www.plazavea.com.pe/frutas-y-verduras,plaza_vea,2023-03-09,9517


## Top changes (ratio)

In [5]:
top_changes = (df
 # Use last 30 days of data to compare prices
 .loc[lambda d: d.date>=(datetime.now()-timedelta(days=30))]
 .sort_values('date')
 # Get percentage change
 .assign(change=lambda d: d
     .groupby(['store','sku'], as_index=False)
     .price.transform(lambda d: (d-d.shift())/d.shift())
 )
 .groupby(['store','sku'], as_index=False)
 .agg({'price':'last', 'change':'mean', 'date':'last'})
 .rename({'price':'last_price', 'date':'last_date'}, axis=1)
 .dropna()
 .loc[lambda d: d.last_date==d.last_date.max()]
 .loc[lambda d: d.change.abs().sort_values(ascending=False).index]
)
top_changes.head(3)

Unnamed: 0,store,sku,last_price,change,last_date
7727,plaza_vea,10234709,11.5,0.731481,2023-11-06
6781,plaza_vea,10020904,13.9,0.356,2023-11-06
3810,plaza_vea,915,32.9,0.218519,2023-11-06


In [6]:
def plot_changes(df_changes, title):
    selection = alt.selection_point(fields=['name'], bind='legend')
    dff = df_changes.drop('change', axis=1).merge(df, on=['store','sku'])
    return (dff
     .pipe(alt.Chart)
     .mark_line(point=True)
     .encode(
         x='date',
         y='price',
         color=alt.Color('name').scale(domain=sorted(dff.name.unique().tolist())),
         tooltip=['name','price','last_price']
     )
     .add_params(selection)
     .transform_filter(selection)
     .interactive()
     .properties(width=650, title=title)
     .configure_legend(orient='top', columns=3)
    )

In [7]:
top_changes.head(10).pipe(plot_changes, 'Top changes')

In [8]:
(top_changes
 .sort_values('change')
 .head(10)
 .pipe(plot_changes, 'Top drops')
)

In [9]:
(top_changes
 .sort_values('change')
 .tail(10)
 .pipe(plot_changes, 'Top increases')
)

## Top changes (absolute values)

In [10]:
top_changes_abs = (df
 # Use last 30 days of data to compare prices
 .loc[lambda d: d.date>=(datetime.now()-timedelta(days=30))]
 .sort_values('date')
 # Get percentage change
 .assign(change=lambda d: d
     .groupby(['store','sku'], as_index=False)
     .price.transform(lambda d: (d-d.shift()).iloc[-1])
 )
 .groupby(['store','sku'], as_index=False)
 .agg({'price':'last', 'change':'mean', 'date':'last'})
 .rename({'price':'last_price', 'date':'last_date'}, axis=1)
 .dropna()
 .loc[lambda d: d.last_date==d.last_date.max()]
 .loc[lambda d: d.change.abs().sort_values(ascending=False).index]
)
top_changes_abs.head(3)

Unnamed: 0,store,sku,last_price,change,last_date
3721,plaza_vea,553,88.7,23.2,2023-11-06
6588,plaza_vea,83497,74.5,23.1,2023-11-06
8732,plaza_vea,10710450,39.9,-15.2,2023-11-06


In [11]:
top_changes_abs.head(10).pipe(plot_changes, 'Top changes')

In [12]:
(top_changes_abs
 .sort_values('change')
 .head(10)
 .pipe(plot_changes, 'Top drops')
)

In [13]:
(top_changes_abs
 .sort_values('change')
 .tail(10)
 .pipe(plot_changes, 'Top increases')
)

## Search specific products

In [14]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*pollo)(?=.*entero).*') &
                ~df.name.str.contains(r'marinado|aderezo')].unique().tolist()
names

['pollo entero light  x kg (wong)',
 'pollo entero con menudencia x kg (wong)',
 'pollo entero sin menudencia x kg (wong)',
 'pollo entero\xa0artisan\xa0libre de antibióticos x kg (plaza_vea)',
 'pollo entero fresco metro x kg (metro)',
 'pollo entero light  x kg (metro)',
 'pollo entero sin menudencia importado x kg (metro)',
 'pollo entero sin menudencia x kg (metro)',
 'pollo entero congelado perdix bolsa 1400g (plaza_vea)',
 'pollo entero sin menudencia congelado perdix 1300g (plaza_vea)']

In [15]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Pollo')
 .interactive()
 .configure_legend(orient='top', columns=3)
)

In [16]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'palta') &
                ~df.name.str.contains(r'shampoo|humectante|vino|salsa|acondicionador|aceite')].unique().tolist()
names

['palta hass orgánica 1kg (wong)',
 'palta fuerte verde x kg (wong)',
 'palta fuerte madura empacada x kg (wong)',
 'palta fuerte (plaza_vea)',
 'palta hass natifrut x kg (metro)',
 'palta nava metro x kg (metro)',
 'palta madura cremosita x kg (metro)',
 'palta fuerte metro x kg (metro)',
 'palta hass madura la caserita empaque 500g (plaza_vea)',
 "palta fuerte bell's madura (plaza_vea)",
 'palta naval x kg (plaza_vea)',
 'palta hass x kg (metro)',
 'palta hass madura natifrut 500g (metro)',
 'palta hass (plaza_vea)',
 'palta nava x kg (wong)',
 'palta super fuerte x kg (metro)',
 'palta hass natifrut x kg (wong)',
 'palta fuerte x kg (plaza_vea)',
 'palta fuerte malla 1kg z (plaza_vea)']

In [17]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Palta')
 .interactive()
 .configure_legend(orient='top', columns=3)
)

In [18]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*aceite)(?=.*vegetal)(?=.*900).*') &
                ~df.name.str.contains(r'atun|atún|pack|filete|caballa|tacos|sardinas')].unique().tolist()
names

['aceite vegetal primor clásico 900ml (wong)',
 'aceite vegetal primor corazón 900ml (wong)',
 'aceite vegetal primor premium 900ml (wong)',
 'aceite vegetal primor clásico botella 900ml (plaza_vea)',
 'aceite vegetal deleite botella 900ml (plaza_vea)',
 "aceite vegetal bell's botella 900ml (plaza_vea)",
 'aceite vegetal cil botella 900ml (plaza_vea)',
 'aceite vegetal cocinero botella 900ml (plaza_vea)',
 'aceite vegetal nicolini botella 900ml (plaza_vea)',
 'aceite vegetal primor premium botella 900ml (plaza_vea)',
 'aceite vegetal máxima 900ml (metro)',
 'aceite vegetal primor clásico 900ml (metro)',
 'aceite vegetal nicolini 900ml (metro)',
 'aceite vegetal primor corazón 900ml (metro)',
 'aceite vegetal cocinero 900ml (metro)',
 'aceite vegetal primor premium 900ml (metro)',
 'aceite vegetal deleite premium 900ml (metro)',
 'aceite vegetal del cielo botella 900ml (plaza_vea)',
 'aceite vegetal metro 900ml (metro)',
 'aceite vegetal primor botella 900ml (plaza_vea)',
 'aceite veget

In [19]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Aceite')
 .interactive()
 .configure_legend(orient='top', columns=3)
)

In [20]:
#|echo: false
#|output: false
names = df.name[
    df.name.str.contains(r'(?=.*harina)(?=.*1kg).*')
#     & ~df.name.str.contains(r'atun|atún|pack|filete|caballa|tacos|sardinas')
].unique().tolist()
names

['harina sin preparar nicolini 1kg (wong)',
 'harina preparada blanca flor 1kg (wong)',
 'harina preparada nicolini 1kg (wong)',
 'harina sin preparar favorita 1kg (wong)',
 'harina sin preparar blanca flor 1kg (wong)',
 "harina de maíz bell's bolsa 1kg (plaza_vea)",
 'harina de arroz costeño bolsa 1kg (plaza_vea)',
 'harina de maíz amarillo p.a.n. precocida bolsa 1kg (plaza_vea)',
 'harina de trigo preparada blanca flor bolsa 1kg (plaza_vea)',
 'harina la casa marimel integral bolsa 1kg (plaza_vea)',
 'harina selecta molitalia bolsa 1kg (plaza_vea)',
 'harina de trigo nicolini preparada bolsa 1kg (plaza_vea)',
 "harina bell's de trigo preparada bolsa 1kg (plaza_vea)",
 "harina de trigo sin preparar bell's bolsa 1kg (plaza_vea)",
 'harina preparada favorita paquete 1kg (plaza_vea)',
 'harina preparada favorita 1kg (metro)',
 'harina sin preparar favorita 1kg (metro)',
 'harina preparada blanca flor 1kg (metro)',
 'harina sin preparar blanca flor 1kg (metro)',
 'harina preparada nicolin

In [21]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Aceite')
 .interactive()
 .configure_legend(orient='top', columns=3)
)

In [22]:
#|echo: false
#|output: false
names = df.name[
    df.name.str.contains(r'(?=.*limón).*(?=.*kg).*')
#     df.name.str.contains(r'limón')
#     & ~df.name.str.contains(r'atun|atún|pack|filete|caballa|tacos|sardinas')
].unique().tolist()
names

['limón tahíti x kg (wong)',
 'limón x kg (wong)',
 'limón tahiti metro x kg (metro)',
 'limón x kg (metro)',
 'limón dulce x kg (wong)']

In [23]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Aceite')
 .interactive()
 .configure_legend(orient='top', columns=3)
)