In [None]:
#|echo: false
import pandas as pd, numpy as np, matplotlib.pyplot as plt, altair as alt, pytz
from fastcore.all import *
from datetime import datetime, timedelta

## Get data

In [None]:
#|echo: false
print(f'Last execution time: {datetime.now(pytz.timezone("America/Lima")).strftime("%d/%m/%Y %T")}')

Last execution time: 02/09/2023 15:39:27


In [None]:
#|code-summary: Products type filter
explore_types = ['frutas', 'lacteos', 'verduras', 'embutidos', 'panaderia', 'desayuno', 'congelados', 'abarrotes',
                 'aves', 'carnes', 'pescados']

In [None]:
#|code-summary: Data table
path = Path('../../output')
csv_files = L(path.glob('*.csv')).filter(lambda o: os.stat(o).st_size>0)
pat_store = re.compile('(.+)\_\d+')
pat_date = re.compile('.+\_(\d+)')
df = (
    pd.concat([pd.read_csv(o).assign(store=pat_store.match(o.stem)[1], date=pat_date.match(o.stem)[1])
               for o in csv_files], ignore_index=True)
    .pipe(lambda d: d.assign(
        name=d.name.str.lower()+' ('+d.store+')',
        sku=d.id.where(d.sku.isna(), d.sku).astype(int),
        date=pd.to_datetime(d.date)
    ))
    .drop('id', axis=1)
    .loc[lambda d: d.category.str.contains('|'.join(explore_types))]
    # Filter products with recent data
#     .loc[lambda d: d.name.isin(d.groupby('name').date.max().loc[ge(datetime.now()-timedelta(days=30))].index)]
    # Filter empty prices
    .loc[lambda d: d.price>0]
)
print(df.shape)
df.sample(3)

(679375, 8)


Unnamed: 0,sku,name,brand,category,uri,price,store,date
1338920,2118,filete de atún bell's en aceite vegetal lata 1...,BELL'S,https://www.plazavea.com.pe/abarrotes,,4.8,plaza_vea,2022-10-20
968286,26581,bombones bon o bon caja 270g (plaza_vea),ARCOR,https://www.plazavea.com.pe/abarrotes,https://www.plazavea.com.pe/bombones-arcor-bon...,27.2,plaza_vea,2023-07-10
1004953,953471,infusión orgánica sunka luxury special tea 15u...,Sunka,https://www.metro.pe/desayuno/cafe-e-infusione...,https://www.metro.pe/infusion-organica-sunka-l...,38.99,metro,2022-10-10


## Top changes (ratio)

In [None]:
top_changes = (df
 # Use last 30 days of data to compare prices
 .loc[lambda d: d.date>=(datetime.now()-timedelta(days=30))]
 .sort_values('date')
 # Get percentage change
 .assign(change=lambda d: d
     .groupby(['store','sku'], as_index=False)
     .price.transform(lambda d: (d-d.shift())/d.shift())
 )
 .groupby(['store','sku'], as_index=False)
 .agg({'price':'last', 'change':'mean', 'date':'last'})
 .rename({'price':'last_price', 'date':'last_date'}, axis=1)
 .dropna()
 .loc[lambda d: d.last_date==d.last_date.max()]
 .loc[lambda d: d.change.abs().sort_values(ascending=False).index]
)
top_changes.head(3)

ValueError: No objects to concatenate

In [None]:
def plot_changes(df_changes, title):
    selection = alt.selection_point(fields=['name'], bind='legend')
    dff = df_changes.drop('change', axis=1).merge(df, on=['store','sku'])
    return (dff
     .pipe(alt.Chart)
     .mark_line(point=True)
     .encode(
         x='date',
         y='price',
         color=alt.Color('name').scale(domain=sorted(dff.name.unique().tolist())),
         tooltip=['name','price','last_price']
     )
     .add_params(selection)
     .transform_filter(selection)
     .interactive()
     .properties(width=650, title=title)
     .configure_legend(orient='top', columns=3)
    )

In [None]:
top_changes.head(10).pipe(plot_changes, 'Top changes')

In [None]:
(top_changes
 .sort_values('change')
 .head(10)
 .pipe(plot_changes, 'Top drops')
)

In [None]:
(top_changes
 .sort_values('change')
 .tail(10)
 .pipe(plot_changes, 'Top increases')
)

## Top changes (absolute values)

In [None]:
top_changes_abs = (df
 # Use last 30 days of data to compare prices
 .loc[lambda d: d.date>=(datetime.now()-timedelta(days=30))]
 .sort_values('date')
 # Get percentage change
 .assign(change=lambda d: d
     .groupby(['store','sku'], as_index=False)
     .price.transform(lambda d: (d-d.shift()).iloc[-1])
 )
 .groupby(['store','sku'], as_index=False)
 .agg({'price':'last', 'change':'mean', 'date':'last'})
 .rename({'price':'last_price', 'date':'last_date'}, axis=1)
 .dropna()
 .loc[lambda d: d.last_date==d.last_date.max()]
 .loc[lambda d: d.change.abs().sort_values(ascending=False).index]
)
top_changes_abs.head(3)

In [None]:
top_changes_abs.head(10).pipe(plot_changes, 'Top changes')

In [None]:
(top_changes_abs
 .sort_values('change')
 .head(10)
 .pipe(plot_changes, 'Top drops')
)

In [None]:
(top_changes_abs
 .sort_values('change')
 .tail(10)
 .pipe(plot_changes, 'Top increases')
)

## Search specific products

In [None]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*pollo)(?=.*entero).*') &
                ~df.name.str.contains(r'marinado|aderezo')].unique().tolist()
names

In [None]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Pollo')
 .interactive()
 .configure_legend(orient='top', columns=3)
)

In [None]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'palta') &
                ~df.name.str.contains(r'shampoo|humectante|vino|salsa|acondicionador|aceite')].unique().tolist()
names

In [None]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Palta')
 .interactive()
 .configure_legend(orient='top', columns=3)
)

In [None]:
#|echo: false
#|output: false
names = df.name[df.name.str.contains(r'(?=.*aceite)(?=.*vegetal)(?=.*900).*') &
                ~df.name.str.contains(r'atun|atún|pack|filete|caballa|tacos|sardinas')].unique().tolist()
names

In [None]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Aceite')
 .interactive()
 .configure_legend(orient='top', columns=3)
)

In [None]:
#|echo: false
#|output: false
names = df.name[
    df.name.str.contains(r'(?=.*harina)(?=.*1kg).*')
#     & ~df.name.str.contains(r'atun|atún|pack|filete|caballa|tacos|sardinas')
].unique().tolist()
names

In [None]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Aceite')
 .interactive()
 .configure_legend(orient='top', columns=3)
)

In [None]:
#|echo: false
#|output: false
names = df.name[
    df.name.str.contains(r'(?=.*limón).*(?=.*kg).*')
#     df.name.str.contains(r'limón')
#     & ~df.name.str.contains(r'atun|atún|pack|filete|caballa|tacos|sardinas')
].unique().tolist()
names

['limón tahiti metro x kg (metro)',
 'limón dulce x kg (wong)',
 'limón x kg (wong)',
 'limón x kg (metro)',
 'limón tahíti x kg (wong)']

In [None]:
(df
 .loc[df.name.isin(names)]
 .pipe(alt.Chart)
 .mark_line(point=True)
 .encode(x='date', y='price', color='name', tooltip=['name','price'])
 .properties(width=650, title='Aceite')
 .interactive()
 .configure_legend(orient='top', columns=3)
)