In [None]:
#|echo: false
import pandas as pd, numpy as np, matplotlib.pyplot as plt, altair as alt, pytz
from fastcore.all import *
from datetime import datetime

## Get data

In [None]:
#|echo: false
print(f'Lsast execution time: {datetime.now(pytz.timezone("America/Lima")).strftime("%d/%m/%Y %T")}')

Last execution time: 18/01/2023 22:18:44


In [None]:
explore_types = ['frutas', 'lacteos', 'verduras', 'embutidos', 'panaderia', 'desayuno', 'congelados', 'abarrotes',
                 'aves', 'carnes', 'pescados']

In [None]:
#|tbl-cap: Data table
path = Path('../../output')
csv_files = L(path.glob('*.csv')).filter(lambda o: os.stat(o).st_size>0)
pat_store = re.compile('(.+)\_\d+')
pat_date = re.compile('.+\_(\d+)')
df = (
    pd.concat([pd.read_csv(o).assign(store=pat_store.match(o.stem)[1], date=pat_date.match(o.stem)[1])
               for o in csv_files])
    .pipe(lambda d: d.assign(
        name=d.name.str.lower()+' ('+d.store+')',
        sku=d.id.where(d.sku.isna(), d.sku).astype(int),
        date=pd.to_datetime(d.date)
    ))
    .drop('id', axis=1)
    .loc[lambda d: d.category.str.contains('|'.join(explore_types))]
)
df.sample(3)

Unnamed: 0,brand,uri,name,price,category,store,date,sku
2961,Metro,https://www.metro.pe/pan-integral-metro-900-g-...,pan integral metro 900 g (metro),15.2,https://www.metro.pe/desayuno/panes-y-tortilla...,metro,2023-01-05,32113
4423,SUNKA,https://www.plazavea.com.pe/coca-sunka-caja-20...,coca sunka caja 20un (plaza_vea),5.59,https://www.plazavea.com.pe/desayunos,plaza_vea,2022-10-31,10701845
2919,La Panadería,https://www.metro.pe/pan-coliza-la-panaderia-x...,pan coliza la panadería x 5 unid (metro),3.9,https://www.metro.pe/panaderia-y-pasteleria/la...,metro,2022-12-12,4112


In [None]:
top_changes = (df
 .groupby(['store','sku'], as_index=False)
 .price
 .agg('std')
 .query('price>0')
 .sort_values('price', ascending=False)
)
top_changes.head()

Unnamed: 0,store,sku,price
9634,plaza_vea,10089986,50.192285
4246,metro,959183,39.125
11631,plaza_vea,10734131,29.025782
11698,plaza_vea,10753106,25.819889
9062,plaza_vea,10039597,22.198705


In [None]:
(top_changes
 .head(10)
 .drop('price', axis=1)
 .merge(df, on=['store','sku'])
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Top changes')
 .configure_legend(orient='top', columns=3)
)

  for col_name, dtype in df.dtypes.iteritems():


In [None]:
top_drops = (df
 .sort_values('date')
 .pipe(lambda d: d.assign(**d
     .groupby(['store','sku'], as_index=False).price
     .transform(lambda d: d.rolling(3).mean())
     .loc[:,['price']]
 ))
 .dropna()
 .groupby(['store','sku'], as_index=False)
 .price
 .agg(['first','last'])
 .assign(diff=lambda d: d['last']-d['first'])
 .reset_index()
 .sort_values('diff')
)
top_drops.head()

Unnamed: 0,store,sku,first,last,diff
6326,plaza_vea,43433,159.0,103.363333,-55.636667
3956,metro,959183,156.5,104.333333,-52.166667
8665,plaza_vea,10734126,156.6,118.2,-38.4
8132,plaza_vea,10523087,79.9,53.293333,-26.606667
7078,plaza_vea,10043432,140.8,119.0,-21.8


In [None]:
(top_drops
 .head(10)
 .loc[:, ['store','sku']]
 .merge(df, on=['store','sku'])
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Top drops')
 .configure_legend(orient='top', columns=3)
)

  for col_name, dtype in df.dtypes.iteritems():


In [None]:
(top_drops
 .tail(10)
 .loc[:, ['store','sku']]
 .merge(df, on=['store','sku'])
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Top increases')
 .configure_legend(orient='top', columns=3)
)

  for col_name, dtype in df.dtypes.iteritems():


In [None]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*pollo)(?=.*entero).*') &
                ~df.name.str.contains(r'marinado|aderezo')].unique().tolist()
names

['pollo entero light  x kg (metro)',
 'pollo entero fresco metro x kg (metro)',
 'pollo entero\xa0artisan\xa0libre de antibióticos x kg (plaza_vea)',
 'pollo entero light  x kg (wong)',
 'pollo entero con menudencia x kg (wong)',
 'pollo entero sin menudencia x kg (wong)']

In [None]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Pollo')
 .configure_legend(orient='top', columns=3)
)

In [None]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'palta') &
                ~df.name.str.contains(r'shampoo|humectante|vino|salsa|acondicionador|aceite')].unique().tolist()
names

['palta nava metro x kg (metro)',
 'palta hass natifrut x kg (metro)',
 'palta fuerte metro x kg (metro)',
 'palta madura cremosita x kg (metro)',
 'palta hass madura la caserita empaque 500g (plaza_vea)',
 'palta fuerte (plaza_vea)',
 'palta naval x kg (plaza_vea)',
 "palta fuerte bell's madura (plaza_vea)",
 'palta fuerte madura empacada x kg (wong)',
 'palta hass orgánica 1kg (wong)',
 'palta hass natifrut x kg (wong)',
 'palta fuerte verde x kg (wong)',
 'palta fuerte x kg (plaza_vea)',
 'palta nava x kg (wong)',
 'palta fuerte malla 1kg z (plaza_vea)',
 'palta hass (plaza_vea)']

In [None]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Palta')
 .configure_legend(orient='top', columns=3)
)

In [None]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*aceite)(?=.*vegetal).*') &
                ~df.name.str.contains(r'atun|atún|pack|filete|caballa|tacos|sardinas')].unique().tolist()
names

['aceite vegetal cocinero botella 5 litros (metro)',
 'aceite vegetal nicolini 900ml (metro)',
 'aceite vegetal primor clásico 1.8l (metro)',
 'aceite vegetal metro 900ml (metro)',
 'aceite vegetal deleite premium 900ml (metro)',
 'aceite vegetal máxima 900ml (metro)',
 'aceite vegetal primor clásico 900ml (metro)',
 'aceite vegetal cocinero 900ml (metro)',
 'aceite vegetal primor corazón 900ml (metro)',
 'aceite vegetal primor premium 900ml (metro)',
 'aceite vegetal de soya del cielo botella 1l (plaza_vea)',
 'aceite vegetal primor premium botella 900ml (plaza_vea)',
 'aceite vegetal primor botella 900ml (plaza_vea)',
 'aceite vegetal nicolini botella 900ml (plaza_vea)',
 "aceite vegetal bell's botella 900ml (plaza_vea)",
 "aceite vegetal bell's botella 900ml paquete x 12un (plaza_vea)",
 'aceite vegetal cocinero botella 900ml (plaza_vea)',
 'aceite vegetal mazola canola y girasol spray 142g (plaza_vea)',
 'aceite vegetal deleite botella 900ml (plaza_vea)',
 'aceite vegetal primor co

In [None]:
#|fig-cap: Aceite vegeta
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Aceite')
 .configure_legend(orient='top', columns=3)
)