In [1]:
#|echo: false
import pandas as pd, numpy as np, matplotlib.pyplot as plt, altair as alt, pytz
from fastcore.all import *
from datetime import datetime, timedelta

## Get data

In [2]:
#|echo: false
print(f'Last execution time: {datetime.now(pytz.timezone("America/Lima")).strftime("%d/%m/%Y %T")}')

Last execution time: 31/01/2023 00:59:09


In [3]:
explore_types = ['frutas', 'lacteos', 'verduras', 'embutidos', 'panaderia', 'desayuno', 'congelados', 'abarrotes',
                 'aves', 'carnes', 'pescados']

In [4]:
#|tbl-cap: Data table
path = Path('../../output')
csv_files = L(path.glob('*.csv')).filter(lambda o: os.stat(o).st_size>0)
pat_store = re.compile('(.+)\_\d+')
pat_date = re.compile('.+\_(\d+)')
df = (
    pd.concat([pd.read_csv(o).assign(store=pat_store.match(o.stem)[1], date=pat_date.match(o.stem)[1])
               for o in csv_files], ignore_index=True)
    .pipe(lambda d: d.assign(
        name=d.name.str.lower()+' ('+d.store+')',
        sku=d.id.where(d.sku.isna(), d.sku).astype(int),
        date=pd.to_datetime(d.date)
    ))
    .drop('id', axis=1)
    .loc[lambda d: d.category.str.contains('|'.join(explore_types))]
    # Filter products with recent data
    .loc[lambda d: d.name.isin(d.groupby('name').date.max().loc[ge(datetime.now()-timedelta(days=30))].index)]
    # Filter empty prices
    .loc[lambda d: d.price>0]
)
print(df.shape)
df.sample(3)

(263734, 8)


Unnamed: 0,brand,uri,name,price,category,store,date,sku
709348,VIA,https://www.plazavea.com.pe/endulzante-via-con...,endulzante vía con fruto del monje display 300...,37.4,https://www.plazavea.com.pe/abarrotes,plaza_vea,2022-12-26,10247893
273422,BELL'S,https://www.plazavea.com.pe/pimienta-bells-neg...,pimienta bell's negra molida sobre 18gr (plaza...,1.89,https://www.plazavea.com.pe/abarrotes,plaza_vea,2022-09-19,1101
707886,Metro,https://www.metro.pe/mandarina-con-pepa-metro-...,mandarina con pepa metro x kg (metro),3.49,https://www.metro.pe/frutas-y-verduras/frutas/...,metro,2022-11-21,59395


In [5]:
top_changes = (df
 .loc[lambda d: ~d.name.isin([
     'sillau #06 kikko botella 350 ml (metro)',
     'sillau kikko botella 500 ml (metro)',
     'salsa de soya con ajo y kión kikko 350ml (metro)'
 ])]
 # Use last 90 days of data
 .loc[lambda d: d.date>=(datetime.now()-timedelta(days=90))]
 .sort_values('date')
 # Get percentage change
 .pipe(lambda d: d.assign(**d
     .groupby(['store','sku'], as_index=False)
     .price
     .transform(lambda d: (d-d.shift())/d.shift())
     .loc[:,['price']]
 ))
 .groupby(['store','sku'], as_index=False)
 .price.mean()
 .rename({'price': 'change'}, axis=1)
 .dropna()
 .loc[lambda d: d.change.abs().sort_values(ascending=False).index]
)
top_changes.head()

Unnamed: 0,store,sku,change
4126,metro,1017226,-0.739623
5947,plaza_vea,13447,0.704338
4119,metro,1017219,-0.439024
4121,metro,1017221,-0.415094
10168,plaza_vea,10765059,-0.389061


In [6]:
(top_changes
 .head(10)
 .drop('change', axis=1)
 .merge(df, on=['store','sku'])
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Top changes')
 .configure_legend(orient='top', columns=3)
)


iteritems is deprecated and will be removed in a future version. Use .items instead.



In [7]:
(top_changes
 .sort_values('change')
 .head(10)
 .loc[:, ['store','sku']]
 .merge(df, on=['store','sku'])
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Top drops')
 .configure_legend(orient='top', columns=3)
)


iteritems is deprecated and will be removed in a future version. Use .items instead.



In [8]:
(top_changes
 .sort_values('change')
 .tail(10)
 .loc[:, ['store','sku']]
 .merge(df, on=['store','sku'])
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Top increases')
 .configure_legend(orient='top', columns=3)
)


iteritems is deprecated and will be removed in a future version. Use .items instead.



In [9]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*pollo)(?=.*entero).*') &
                ~df.name.str.contains(r'marinado|aderezo')].unique().tolist()
names

['pollo entero light  x kg (metro)',
 'pollo entero fresco metro x kg (metro)',
 'pollo entero\xa0artisan\xa0libre de antibióticos x kg (plaza_vea)']

In [10]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Pollo')
 .configure_legend(orient='top', columns=3)
)

In [11]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'palta') &
                ~df.name.str.contains(r'shampoo|humectante|vino|salsa|acondicionador|aceite')].unique().tolist()
names

['palta hass natifrut x kg (metro)',
 'palta fuerte metro x kg (metro)',
 'palta madura cremosita x kg (metro)',
 'palta hass madura la caserita empaque 500g (plaza_vea)',
 'palta fuerte (plaza_vea)',
 "palta fuerte bell's madura (plaza_vea)",
 'palta fuerte x kg (plaza_vea)']

In [12]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Palta')
 .configure_legend(orient='top', columns=3)
)

In [13]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*aceite)(?=.*vegetal).*') &
                ~df.name.str.contains(r'atun|atún|pack|filete|caballa|tacos|sardinas')].unique().tolist()
names

['aceite vegetal nicolini 900ml (metro)',
 'aceite vegetal metro 900ml (metro)',
 'aceite vegetal deleite premium 900ml (metro)',
 'aceite vegetal máxima 900ml (metro)',
 'aceite vegetal primor clásico 900ml (metro)',
 'aceite vegetal cocinero 900ml (metro)',
 'aceite vegetal primor corazón 900ml (metro)',
 'aceite vegetal primor premium 900ml (metro)',
 'aceite vegetal de soya del cielo botella 1l (plaza_vea)',
 'aceite vegetal primor premium botella 900ml (plaza_vea)',
 'aceite vegetal primor botella 900ml (plaza_vea)',
 'aceite vegetal nicolini botella 900ml (plaza_vea)',
 "aceite vegetal bell's botella 900ml (plaza_vea)",
 'aceite vegetal cocinero botella 900ml (plaza_vea)',
 'aceite vegetal mazola canola y girasol spray 142g (plaza_vea)',
 'aceite vegetal deleite botella 900ml (plaza_vea)',
 'aceite vegetal primor botella 1.8l (plaza_vea)',
 'anchoveta en aceite vegetal a1 lata 125g (plaza_vea)',
 'aceite vegetal del cielo soya bidón 5l (plaza_vea)',
 'aceite vegetal cil botella 9

In [14]:
#|fig-cap: Aceite vegeta
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Aceite')
 .configure_legend(orient='top', columns=3)
)