In [1]:
#|echo: false
import pandas as pd, numpy as np, matplotlib.pyplot as plt, altair as alt, pytz
from fastcore.all import *
from datetime import datetime, timedelta

## Get data

In [2]:
#|echo: false
print(f'Last execution time: {datetime.now(pytz.timezone("America/Lima")).strftime("%d/%m/%Y %T")}')

Last execution time: 09/03/2023 05:45:17


In [3]:
explore_types = ['frutas', 'lacteos', 'verduras', 'embutidos', 'panaderia', 'desayuno', 'congelados', 'abarrotes',
                 'aves', 'carnes', 'pescados']

In [4]:
#|tbl-cap: Data table
path = Path('../../output')
csv_files = L(path.glob('*.csv')).filter(lambda o: os.stat(o).st_size>0)
pat_store = re.compile('(.+)\_\d+')
pat_date = re.compile('.+\_(\d+)')
df = (
    pd.concat([pd.read_csv(o).assign(store=pat_store.match(o.stem)[1], date=pat_date.match(o.stem)[1])
               for o in csv_files], ignore_index=True)
    .pipe(lambda d: d.assign(
        name=d.name.str.lower()+' ('+d.store+')',
        sku=d.id.where(d.sku.isna(), d.sku).astype(int),
        date=pd.to_datetime(d.date)
    ))
    .drop('id', axis=1)
    .loc[lambda d: d.category.str.contains('|'.join(explore_types))]
    # Filter products with recent data
    .loc[lambda d: d.name.isin(d.groupby('name').date.max().loc[ge(datetime.now()-timedelta(days=30))].index)]
    # Filter empty prices
    .loc[lambda d: d.price>0]
)
print(df.shape)
df.sample(3)

(328015, 8)


Unnamed: 0,sku,name,brand,category,uri,price,store,date
665876,10532715,nonpareils wilton rosado (plaza_vea),WILTON,https://www.plazavea.com.pe/panaderia-y-pastel...,https://www.plazavea.com.pe/nonpareils-wilton-...,5.0,plaza_vea,2023-03-02
66002,1991,nuggets de pollo bell's bolsa 15un (plaza_vea),BELL'S,https://www.plazavea.com.pe/congelados,https://www.plazavea.com.pe/nuggets-de-pollo-p...,9.9,plaza_vea,2023-01-12
162883,3424,wafers nik costa vainilla pack 6un (metro),Costa,https://www.metro.pe/abarrotes/galletas-snacks...,https://www.metro.pe/wafers-nik-costa-vainilla...,4.2,metro,2022-09-12


In [5]:
top_changes = (df
 .loc[lambda d: ~d.name.isin([
     'sillau #06 kikko botella 350 ml (metro)',
     'sillau kikko botella 500 ml (metro)',
     'salsa de soya con ajo y kión kikko 350ml (metro)'
 ])]
 # Use last 90 days of data
 .loc[lambda d: d.date>=(datetime.now()-timedelta(days=90))]
 .sort_values('date')
 # Get percentage change
 .pipe(lambda d: d.assign(**d
     .groupby(['store','sku'], as_index=False)
     .price
     .transform(lambda d: (d-d.shift())/d.shift())
     .loc[:,['price']]
 ))
 .groupby(['store','sku'], as_index=False)
 .price.mean()
 .rename({'price': 'change'}, axis=1)
 .dropna()
 .loc[lambda d: d.change.abs().sort_values(ascending=False).index]
)
top_changes.head()

Unnamed: 0,store,sku,change
4033,metro,1017225,0.705882
4034,metro,1017226,0.525239
7004,plaza_vea,62784,0.263158
2586,metro,564388,-0.219298
6803,plaza_vea,50027,0.20499


In [6]:
(top_changes
 .head(10)
 .drop('change', axis=1)
 .merge(df, on=['store','sku'])
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Top changes')
 .configure_legend(orient='top', columns=3)
)

  for col_name, dtype in df.dtypes.iteritems():


In [7]:
(top_changes
 .sort_values('change')
 .head(10)
 .loc[:, ['store','sku']]
 .merge(df, on=['store','sku'])
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Top drops')
 .configure_legend(orient='top', columns=3)
)

  for col_name, dtype in df.dtypes.iteritems():


In [8]:
(top_changes
 .sort_values('change')
 .tail(10)
 .loc[:, ['store','sku']]
 .merge(df, on=['store','sku'])
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Top increases')
 .configure_legend(orient='top', columns=3)
)

  for col_name, dtype in df.dtypes.iteritems():


In [9]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*pollo)(?=.*entero).*') &
                ~df.name.str.contains(r'marinado|aderezo')].unique().tolist()
names

['pollo entero fresco metro x kg (metro)',
 'pollo entero\xa0artisan\xa0libre de antibióticos x kg (plaza_vea)',
 'pollo entero light  x kg (metro)']

In [10]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Pollo')
 .configure_legend(orient='top', columns=3)
)

In [11]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'palta') &
                ~df.name.str.contains(r'shampoo|humectante|vino|salsa|acondicionador|aceite')].unique().tolist()
names

['palta fuerte (plaza_vea)',
 'palta fuerte metro x kg (metro)',
 'palta madura cremosita x kg (metro)',
 'palta hass natifrut x kg (metro)',
 "palta fuerte bell's madura (plaza_vea)",
 'palta hass madura la caserita empaque 500g (plaza_vea)',
 'palta super fuerte x kg (metro)']

In [12]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Palta')
 .configure_legend(orient='top', columns=3)
)

In [13]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*aceite)(?=.*vegetal).*') &
                ~df.name.str.contains(r'atun|atún|pack|filete|caballa|tacos|sardinas')].unique().tolist()
names

['aceite vegetal nicolini botella 900ml (plaza_vea)',
 'aceite vegetal cocinero botella 1.8l (plaza_vea)',
 'aceite vegetal deleite botella 900ml (plaza_vea)',
 'aceite vegetal del cielo soya bidón 5l (plaza_vea)',
 'aceite vegetal primor premium botella 900ml (plaza_vea)',
 'aceite vegetal nicolini 900ml (metro)',
 'aceite vegetal cocinero botella 5 litros (metro)',
 'aceite vegetal primor clásico 900ml (metro)',
 'aceite vegetal máxima 900ml (metro)',
 'aceite vegetal primor corazón 900ml (metro)',
 'aceite vegetal deleite premium 900ml (metro)',
 'aceite vegetal metro 900ml (metro)',
 'aceite vegetal cocinero 900ml (metro)',
 'aceite vegetal primor premium 900ml (metro)',
 'aceite vegetal primor botella 1.8l (plaza_vea)',
 'aceite vegetal cil botella 900ml (plaza_vea)',
 "aceite vegetal bell's botella 900ml (plaza_vea)",
 'aceite vegetal de soya del cielo botella 1l (plaza_vea)',
 "aceite vegetal bell's galonera 5l (plaza_vea)",
 'aceite vegetal cocinero botella 900ml (plaza_vea)',


In [14]:
#|fig-cap: Aceite vegeta
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Aceite')
 .configure_legend(orient='top', columns=3)
)