In [1]:
from items import candidates
import pandas as pd
import os
from shopscraping import scrape, get_view, balance_units

def track_item(item, offline=True, sorter=None):
    try:
        candidates
    except NameError:
        from items import candidates
        
    if not(offline):
        print(f'Tracking urls of item = {item.upper()} ...', end=' ')
        
    df = scrape(candidates[item], offline=offline, sorter=sorter)
    df = balance_units(df)
    
    if not(offline):
        print('Done')
        
    if sorter: 
        df = df.sort_values(**sorter)
    return df

def collect_catalogue(items=[], offline=False, balance_weights=True,
                      sorter={'by':['comparative_unit_discount_price', 'shop', 'discount'],
                              'ascending': [True, True, False]}):
    
    '''Return dictionary of scrape results for multiple items 
    Offline parameter is passed into scrape method 
    Input: list of items
    Output: dict
    ''' 
    
    try:
        candidates
    except NameError:
        from items import candidates
        
    item_dfs = dict()
    present_items = [item for item in items if item in candidates]
    missing_items = [item for item in items if item not in candidates]
    
    if missing_items:
        print('Please add urls for items:', missing_items)

    for item in present_items:
        item_dfs[item] = track_item(item, offline=offline, sorter=sorter)
        
    return item_dfs

fridge_dir, fridge_file = 'fridge', 'fridge.csv'
fridge_file_path = os.path.join(fridge_dir, fridge_file)

if fridge_file in os.listdir(fridge_dir):
    df = pd.read_csv(fridge_file_path)
    df = df.sort_values(by=['own', 'hunt', 'asap', 'items'], ascending=[True, False, False, True]).reset_index(drop=True)
    
    items_onwatch = sorted(candidates.keys())
    items_own = df['items'].loc[df['own']==1].tolist()
    items_hunt = df['items'].loc[df['hunt']==1].tolist()
    items_asap = df['items'].loc[df['asap']==1].tolist()
    
    print(f'onwatch:', items_onwatch, end='\n\n')
    print(f'own:', items_own, end='\n\n')
    print(f'hunt:', items_hunt, end='\n\n')
    print(f'asap:', items_asap, end='\n\n')

else:
    items_onwatch = sorted(candidates.keys())
    df = pd.DataFrame(dict(items=items_onwatch, 
                           own=[0 for n in items], 
                           hunt=[0 for n in items], 
                           asap=[0 for n in items]))
    df.to_csv(fridge_file_path, index=False)

onwatch: ['Avietės', 'Aviečių uogienė', 'Avižų gėrimas', 'Bananai', 'Bulvės', 'Burokėliai', 'Druska', 'Džiovinti krapai', 'Grietinė', 'Imbieras', 'Karis', 'Kefyras', 'Kiaušiniai', 'Kokosų gėrimas', 'Kopūstai', 'Laurų lapai', 'Maltos aitriosios paprikos', 'Morkos', 'Paprikos', 'Pienas UAT', 'Pomidorų pasta', 'Rausvieji svogūnai', 'Ridikėliai', 'Skumbrė', 'Sviestas', 'Sviestinis moliūgas', 'Svogūnai', 'Svogūnų laiškai', 'Trumpavaisiai agurkai', 'Viščiukų blauzdelės', 'Viščiukų filė', 'Šilauogės', 'Špinatai']

own: ['Bulvės', 'Grietinė', 'Kiaušiniai', 'Kokosų gėrimas', 'Morkos', 'Paprikos', 'Pomidorų pasta', 'Špinatai', 'Burokėliai', 'Druska', 'Imbieras', 'Karis', 'Kefyras', 'Kopūstai', 'Laurų lapai', 'Maltos aitriosios paprikos', 'Pienas UAT', 'Rausvieji svogūnai', 'Sviestinis moliūgas', 'Svogūnai', 'Trumpavaisiai agurkai']

hunt: ['Avietės', 'Aviečių uogienė', 'Džiovinti krapai', 'Svogūnų laiškai', 'Avižų gėrimas', 'Bananai', 'Ridikėliai', 'Skumbrė', 'Sviestas', 'Viščiukų blauzdelės', '

## Check and update your fridge
Select items that you own in your fridge, want to buy asap or just hunting for a good offer. Your choice will be instantly recorded in a file `fridge\fridge.csv`

In [2]:
import dash
from dash import dcc, html, Input, Output, State

# Initialize the Dash app
app = dash.Dash(__name__)

items = df['items']
columns = ['own', 'hunt', 'asap']

def mapper_th(name):
    return html.Th(name)

def mapper_td(name, i):
    return html.Td(dcc.Checklist(
                id={'type': f'check_{name}', 'index': i},
                options=[{'label': '', 'value': 'checked'}],
                value=['checked'] if df[name][i] else [],
                inline=True))

# Layout of the app
app.layout = html.Div([
    html.Table([html.Tr([html.Th("item"), *map(mapper_th, columns)]),
               *[html.Tr([html.Td(item), *map(lambda c: mapper_td(c, i), columns)]) 
                 for i, item in enumerate(items)]]),
    html.Div(id='output')
])

# Callback to handle checkbox updates
@app.callback(
    Output('output', 'children'),
    [Input({'type': 'check_own', 'index': dash.dependencies.ALL}, 'value'),
     Input({'type': 'check_hunt', 'index': dash.dependencies.ALL}, 'value'),
     Input({'type': 'check_asap', 'index': dash.dependencies.ALL}, 'value')]
)
def update_checks(own_values, hunt_values, asap_values):
    updated_own = [1 if v else 0 for v in own_values]
    updated_hunt = [1 if v else 0 for v in hunt_values]
    updated_asap = [1 if v else 0 for v in asap_values]
    
    #if item is needed asap that means it's also needed for hunt
    #but too stupid to update checkmark, sorry...
    #updated_hunt = [v1 if v2==0 else 1 for v1, v2 in zip(updated_hunt, updated_asap)]
    
    df['own'], df['hunt'], df['asap'] = updated_own, updated_hunt, updated_asap
    df.to_csv(fridge_file_path, index=False)
    

# Run the app
if __name__ == '__main__':
    app.run(debug=True, jupyter_mode="external")

Dash app running on http://127.0.0.1:8050/


## Run scan of every item on watch
You rn it at the start of a week, takes ~20min

In [4]:
collect_catalogue(items=items_onwatch, offline=False)

Could not parse url: https://www.rimi.lt/e-parduotuve/lt/produktai/vaisiai-darzoves-ir-geles/vaisiai-ir-uogos/uogos/lietuviskos-avietes-1-kl-125g/p/903227
Done
Doneking urls of item = AVIEČIŲ UOGIENĖ ... 
Doneking urls of item = AVIŽŲ GĖRIMAS ... 
Doneking urls of item = BANANAI ... 
Could not parse url: https://www.rimi.lt/e-parduotuve/lt/produktai/vaisiai-darzoves-ir-geles/darzoves-ir-grybai/bulves/sviezios-lietuviskos-fasuotos-bulves-45-1kg/p/244697
Done
Doneking urls of item = BUROKĖLIAI ... 
Doneking urls of item = DRUSKA ... 
Doneking urls of item = DŽIOVINTI KRAPAI ... 
Doneking urls of item = GRIETINĖ ... 
Doneking urls of item = IMBIERAS ... 
Doneking urls of item = KARIS ... 
Doneking urls of item = KEFYRAS ... 
Doneking urls of item = KIAUŠINIAI ... 
Doneking urls of item = KOKOSŲ GĖRIMAS ... 
Doneking urls of item = KOPŪSTAI ... 
Doneking urls of item = LAURŲ LAPAI ... 
Could not parse url: https://www.lastmile.lt/chain/IKI/product/Maltos-aitriosios-paprikos-IKI-40-g-259571

{'Avietės':                                                  url  \
 2  https://barbora.lt/produktai/sald-avietes-well...   
 7  https://www.rimi.lt/e-parduotuve/lt/produktai/...   
 3  https://www.lastmile.lt/chain/IKI/product/Avie...   
 4  https://www.lastmile.lt/chain/IKI/product/Avie...   
 0   https://barbora.lt/produktai/avietes-250-g-13955   
 5  https://www.rimi.lt/e-parduotuve/lt/produktai/...   
 1  https://barbora.lt/produktai/avietes-well-done...   
 6  https://www.rimi.lt/e-parduotuve/lt/produktai/...   
 
                                                image  \
 2  https://cdn.barbora.lt/products/248e6af1-850b-...   
 7  https://rimibaltic-res.cloudinary.com/image/up...   
 3  https://storage.googleapis.com/download/storag...   
 4  https://storage.googleapis.com/download/storag...   
 0  https://cdn.barbora.lt/products/80e91609-f148-...   
 5  https://rimibaltic-res.cloudinary.com/image/up...   
 1  https://cdn.barbora.lt/products/217e0050-d566-...   
 6  https://rimiba

## Collect your cart
### Review what's on stock (in jupyter view)
Record initial preview of `hunt list`

In [5]:
#items = ('Mangų kisielius', 'Druska', 'Burokėliai', 'Obuolių pudingas')
catalogue = collect_catalogue(items=items_hunt, offline=True)
preview_mds = []
for item in catalogue:
    df = get_view(catalogue[item])
    preview_mds.append(f'## {item}\n{df.to_markdown()}')
preview = '\n'.join(preview_mds)
with open('hunt.md', 'w', encoding='utf-8') as f:
    f.write(preview)

* Run [hunt_it.ipynb](hunt_it.ipynb) to preview recording of `hunt list` ready to push on GitHub

### Collect what to look for in a shop
Choose from `hunt list` what you want to collect

In [7]:
import dash
from dash import Dash, dash_table, html, Output, Input, callback
from itertools import chain

app = Dash()
items = items_hunt
def to_table(df, item):
    columns = []
    for i, name in enumerate(df.columns):
        if name in ('title', 'image'):
            columns.append({"name": name, "id": name, 'presentation': 'markdown'})
        else:
            columns.append({"name": name, "id": name, 'presentation': 'markdown'})
    return dash_table.DataTable(data = df.to_dict('records'), row_selectable="multi", selected_rows=[],
                                columns = columns, id=f'table-{item}', 
                                markdown_options={"html": True},
                                fill_width=False)


app.layout = html.Div([*list(chain(*[[html.Td(item), to_table(get_view(catalogue[item]), item)] for item in items])),
                       html.Div(id='selected-rows-output')])

#app.layout = html.Div([to_table(df), html.Div(id='selected-rows-output')])

# Callback to handle row selection
@app.callback(
    Output('selected-rows-output', 'children'),
    [Input(f'table-{item}', 'selected_rows') for item in items]
)
def update_output(*selected_rows):    
    # Retrieve the IDs of the selected rows
    selection = {items[i]: val for i, val in enumerate(selected_rows)}
    df = pd.concat([catalogue[item].iloc[idx] for item, idx in selection.items()])
    df.to_csv('cart.csv', index=False)
    return str(selection)
    
if __name__ == '__main__':
    app.run(debug=True, jupyter_mode="external")

Dash app running on http://127.0.0.1:8050/


Record your choice in a `collect list`

In [8]:
df = pd.read_csv('cart.csv')
sorter = {'by':['shop'], 'ascending': [True]}
df = df.sort_values(**sorter)
preview = get_view(df).to_markdown()
with open('collect.md', 'w', encoding='utf-8') as f:
    f.write(preview)

* Run [collect_it.ipynb](collect_it.ipynb) to preview recording of `collect list` ready to push on GitHub

### Push both `hunt list` and `collect list`
You'll need them in shop you visit

In [None]:
from items import candidates
from shopscraping import scrape, get_view
from IPython.display import Markdown, display
import os
import pandas as pd
from recipe_formatter import normalise_recipe, parse_recipe

def investigate_recipe(recipe_data, offline=True):
    recipe_df = normalise_recipe(recipe_data)
    #display(recipe_df)
    catalogue = collect_catalogue(items=recipe_df['name'], offline=offline)
    if catalogue:
        prices, discount_prices, discounts = [], [], []
        for weight, unit, title in recipe_df.values:
            price = None
            discount_price = None
            discount = None
            
            if title not in catalogue:
                pass
                #print('Please update candidate file with:', title)

            elif unit is None:
                pass
                
            elif not(all(catalogue[title]['comparative_unit'] == unit)):
                print(f'Inconsistent weight units: [{title}] is in {unit} but catalogue contains:', 
                      catalogue[title]['comparative_unit'].tolist())
            else:
                #display_items(catalogue[title])
                price = weight * catalogue[title]['comparative_unit_price'].min()
                discount_price = weight * catalogue[title]['comparative_unit_discount_price'].min()
                discount = price - discount_price
            prices.append(price)
            discount_prices.append(discount_price)
            discounts.append(discount)
            
        recipe_df['price'] = [round(n, 2) if n else n for n in prices]
        recipe_df['discount_price'] = [round(n, 2) if n else n for n in discount_prices]
        recipe_df['discount'] = [round(n, 2) if n else n for n in discounts]
        return recipe_df