# Grocery Recommender Demo
by Pepe Nunez

### 1. Ingredient list

In [50]:
ingredients = ['cherry tomatoes', 'zucchini', 'watermelon', 'carrots',
               'peppers', 'grapes', 'green apples', 'lemonade',
               'bread', 'instant coffee', 'Spaghetti',
               'meatballs']

### 2. Which supermarket should we choose? 

In [51]:
demo(ingredients)

  Supermarket  Price  Coverage  Avge Price / Ingredient
0   aldinorth 11.690     0.917                    1.063
1   aldisouth 14.130     0.833                    1.413
2        rewe 19.290     1.000                    1.607
3        lidl 20.240     1.000                    1.687
4       edeka 19.310     0.750                    2.146
5    kaufland 25.510     0.917                    2.319

Where would you like to go shopping?
rewe


Unnamed: 0,Ingredient,Supermarket,Product,Price,Coverage
60,cherry tomatoes,rewe,REWE cherry groats,3.99,1
61,zucchini,rewe,zucchini,0.89,1
62,watermelon,rewe,Watermelon,1.99,1
63,carrots,rewe,Carrots,0.99,1
64,peppers,rewe,Red pepper,1.19,1
65,grapes,rewe,Grapefruit,0.59,1
66,green apples,rewe,Red apples,1.49,1
67,lemonade,rewe,Yes! lemonade,2.34,1
68,bread,rewe,Crusty bread,1.15,1
69,instant coffee,rewe,Lindes grain coffee,1.99,1


### 3. Customers that bought those items, also bought:

In [52]:
get_top10_recommendations(ingredients)

If you are buying ['pint cherry tomatoes', 'large zucchini', 'kernel corn', 'carrot', 'pepper', 'capers', 'green peas', 'lemonade', 'bread', 'instant white rice', 'spaghetti', 'frozen meatballs'] you may also want to buy:
---------------------------------------------------------

1.large tomatoes chopped
2.head lettuce shredded
3.crushed tortilla chips (optional)
4.diced tomatoes with green chilies undrained
5.shredded Cheddar cheese (optional)
6.taco seasoning
7.sour cream (optional)
8.cream of celery soup
9.black beans


### 4. Ingredient recommender for recipes:

In [53]:
burrito = ['hot sauce', 'Black beans', 'Diced tomato']

# ['spaghetti', 'parmesan']
get_top10_recommendations(burrito)

If you are buying ['hot sauce', 'black beans', 'diced tomatoes'] you may also want to buy:
---------------------------------------------------------

1.crushed tortilla chips (optional)
2.diced tomatoes with green chilies undrained
3.shredded Cheddar cheese (optional)
4.sour cream (optional)
5.taco seasoning
6.fluid ounce) can
7.kernel corn
8.chopped fresh ginger root
9.chili beans


# Thank you! Questions?


-----


github.com/pepenunez

linkedin.com/in/pepenunez/

---------

In [29]:
import pandas as pd
import pickle
import numpy as np
from fuzzywuzzy import fuzz
from sklearn.neighbors import NearestNeighbors
from IPython.display import display, HTML

pd.set_option('display.float_format', lambda x: '%.3f' % x)

## 0. Setup

### 0.1 Import datasets

In [30]:
ingredient_features = pd.read_pickle('data/demo/ingredient_features.pkl')
unique_ingredients = pd.read_pickle('data/demo/unique_ingredients.pkl')

In [31]:
# import data
supermarkets = {
    'aldinorth': pd.read_pickle('data/products-clean/aldinorth-products-clean.pkl'),
    'aldisouth': pd.read_pickle('data/products-clean/aldisouth-products-clean.pkl'),
    'edeka': pd.read_pickle('data/products-clean/edeka-products-clean.pkl'),
    'kaufland': pd.read_pickle('data/products-clean/kaufland-products-clean.pkl'),
    'lidl': pd.read_pickle('data/products-clean/lidl-products-clean.pkl'),
    'rewe': pd.read_pickle('data/products-clean/rewe-products-clean.pkl')
}

### 0.2 Define Functions

#### 04_supermarkets-modeling

In [32]:
def get_products(ingredients:list):
    
    # Create an empty list per each supermarket
    supermarkets_lists = {
            'aldinorth': [],
            'aldisouth': [],
            'edeka': [],
            'kaufland': [],
            'lidl': [],
            'rewe': []
        }
    
    for market in supermarkets:

        for ing in ingredients:
            
            # Empty dictionary where we will store the matching products
            matches = []   
        
            # For each product, iterate through each supermarket and generate a list
            for product in supermarkets[market]['Name']:
                ratio = fuzz.ratio(product.lower(), ing.lower())

                if ratio > 50:
                    matches.append((product, ratio))
                    matches = sorted(matches, key=lambda x: x[1], reverse=True)
                
            # If matches list is not empty
            if matches:
                supermarkets_lists[market].append(matches[0][0])
            else: 
                supermarkets_lists[market].append(np.nan)

    return supermarkets_lists

In [33]:
def get_list_products_price(ingredients:list):
    
    # Create a dictionary with the supermarkets and a list with the products
    supermarkets_p = {
        'aldinorth': get_products(ingredients)['aldinorth'],
        'aldisouth': get_products(ingredients)['aldisouth'],
        'edeka': get_products(ingredients)['edeka'],
        'kaufland': get_products(ingredients)['kaufland'],
        'lidl': get_products(ingredients)['lidl'],
        'rewe': get_products(ingredients)['rewe']
    }
    
    # New empty df
    result = pd.DataFrame(columns=['Ingredient', 'Supermarket', 'Product', 'Price', 'Coverage'])

    # Iterate and populate a df with the final result
    for market in supermarkets_p:
        
        # Iterate per each product in each supermarket
        s = supermarkets[market] # Dictionary which contains the detailed info of all products per supermarket
        
        ### NEW TO FIND HOW TO ADD THE NAN with THE COVERAGE 0.
        for prod, ing in zip(supermarkets_p[market], ingredients):    
            try:
                price = s[s['Name'] == prod]['Price'].values[0]
                df2 = {'Ingredient': ing, 'Supermarket': market, 'Product': prod, 'Price': price, 'Coverage': 1}
                result = result.append(df2, ignore_index=True)
            except:
                result = result.append({'Ingredient': ing, 'Supermarket': market, 'Product': np.nan, 'Price': np.nan, 'Coverage': 0}, ignore_index=True)
    
    # 'result' contains detailed information about product and price per each supermarket
    
    # Change to numeric
    result['Price'] = pd.to_numeric(result['Price'], errors='coerce')
    result['Coverage'] = pd.to_numeric(result['Coverage'], errors='coerce')
    
    return result

In [34]:
def get_supermarket_choice(ingredients:list):
    
    result = get_list_products_price(ingredients) 
    
    # Group by 'Supermarket'
    results = result.groupby('Supermarket').agg({'Price':'sum', 'Coverage': 'mean'}).reset_index()
    results['Avge Price / Ingredient'] = results['Price'] / (len(ingredients) * results ['Coverage'])
    results = results.sort_values('Avge Price / Ingredient').reset_index(drop=True)

    
    return results

In [35]:
get_supermarket_choice(['Fish fingers', 'garlic', 'beer', 'vodka', 'pasta', 'zuchini'])

Unnamed: 0,Supermarket,Price,Coverage,Avge Price / Ingredient
0,aldisouth,2.93,0.5,0.977
1,lidl,5.67,0.5,1.89
2,rewe,10.63,0.833,2.126
3,kaufland,6.67,0.5,2.223
4,aldinorth,8.48,0.333,4.24
5,edeka,14.88,0.5,4.96


In [36]:
def get_shopping_list(ingredients:list):
    
    result = get_list_products_price(ingredients) 
    results = get_supermarket_choice(ingredients)
    
    # Ask the user where they would like to do the grocery shopping
    user_input = input('Where would you like to go shopping?\n')
    
    return display(result[result['Supermarket'] == user_input])


In [37]:
def get_translation(ingredients:list):
    
    clean_list = []
    
    # ...
    for ingredient in ingredients:

        matches = []

        for ing in unique_ingredients['ingredients']:
            ratio = fuzz.ratio(ing.lower(), ingredient.lower())

            if ratio > 50:
                matches.append((ing, ratio))
                matches = sorted(matches, key=lambda x: x[1], reverse=True)
                
        if matches:
            clean_list.append(matches[0][0])
        else: 
            clean_list.append(np.nan)
            
    return clean_list

In [38]:
def get_top10_recommendations(ingredients:list):
    
    ingredients = get_translation(ingredients)
    ingredients = [ing for ing in ingredients if str(ing) != 'nan']

    input_arr = np.array(ingredients)
    
    # Create empty DataFrame
    model = pd.DataFrame(columns=('Ingredient Position', 'Distance'))
    
    # Generate the model
    knn = model_knn.kneighbors(ingredient_features.loc[input_arr].values, n_neighbors=10)
    
    for i, ing in enumerate(ingredients):
        
        # Zip the ingredients position with the distance to the input 'ingredient'
        z = zip(knn[1][i].tolist(), knn[0][i].tolist())
        
        # Formate it as a list
        z_list = [(x, y) for x, y in z]
    
        # Create the DataFrame
        temp = pd.DataFrame(z_list, columns=('Ingredient Position', 'Distance'))
        
        # Drop the rows of the ingredients that are already in our list
        temp = temp.drop(temp[temp['Distance'] <= 0.001].index)

        # Concat the new list of (Position, Distance) with the final df
        model = pd.concat([model, temp])
        
    # Group by ingredient position and sort by distance
    model = model.groupby('Ingredient Position').agg({'Distance': 'mean'})
    model = model.sort_values(by='Distance', ascending=True).reset_index()
    
    # Print output header
    print(f'If you are buying {ingredients} you may also want to buy:')
    print(f'---------------------------------------------------------\n')
    
    for x in range(0, 9):
        #Print a list with the top 10 recommendations
        print(f'{x + 1}.{ingredient_features.iloc[model["Ingredient Position"][x]].name}')
    
    return

In [39]:
def demo(ingredients:list):
    
    # 1. Convert the ingredient list to product lists per each supermarket
    result = get_list_products_price(ingredients)
    
    # 2. Show an overview of their shopping list grouped by supermarket
    results = get_supermarket_choice(ingredients)
    
    print(f'{results}\n') # display as dataframe with print method..
    
    # 3. Ask the user where they would like to do the grocery shopping
    user_input = input(f"Where would you like to go shopping?\n")
    
    # 5. Show the user theis shopping list
    return result[result['Supermarket'] == user_input]

## 0.3 Train KNN model

In [40]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

mat_ingredient_features = csr_matrix(ingredient_features)

model_knn = NearestNeighbors(metric="cosine",
                             algorithm="brute",
                             n_jobs=-1)

model_knn.fit(mat_ingredient_features)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=-1, n_neighbors=5, p=2, radius=1.0)