# Association Rule Mining 1.0

In [1]:
## setup

from collections import defaultdict, Counter
from getpass import getpass
import matplotlib.pyplot as plt
import matplotlib #as mpl
import matplotlib.style
%matplotlib inline
import numpy as np
import seaborn as sns
import os
import pandas as pd
import pickle
from PIL import Image
import pymysql
import regex as re
import seaborn as sb
from sklearn.cluster import KMeans
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from mlxtend.frequent_patterns import apriori, association_rules, fpmax, fpgrowth
from mlxtend.preprocessing import TransactionEncoder

## Run these imports to skip the data import connector and pre-processing sections
Uncomment as needed

In [11]:
## to save time and not re-run the established portions of this notebook

## import saved csv
ingredients = pd.read_csv(os.getcwd()+'/offline/ingredients.csv', dtype={'RECIPE_ID':'str',
                                                                        'MEASUREMENT':'str',
                                                                        'INGREDIENT': 'str'})
# display(ingredients.head(10))
# print(type(ingredients['MEASUREMENT'][4]))

## import the pickle files
ingredients_to_rids_df = pd.read_pickle(os.getcwd()+'/offline/ingredients_to_rids.pkl')
display(ingredients_to_rids_df.head(10))
rids_clean_ingredients_df = pd.read_pickle(os.getcwd()+'/offline/rids_clean_ingredients.pkl')
display(rids_clean_ingredients_df.head(10))

Unnamed: 0_level_0,RECIPE_ID
INGREDIENT,Unnamed: 1_level_1
sugar,"[10006, 10009, 10023, 10024, 10027, 100321, 10..."
salt,"[10028, 10059, 10066, 10072, 10085, 10109, 102..."
pepper,"[100056, 100289, 100493, 101475, 102910, 10440..."
butter,"[10003, 10008, 100084, 10010, 10011, 10015, 10..."
oil,"[100011, 100218, 100292, 100631, 100705, 10072..."
cheese,"[100033, 10082, 10147, 10155, 101955, 10231, 1..."
garlic,"[101653, 102020, 103733, 104115, 104182, 10424..."
onion,"[100714, 101475, 103178, 104846, 105106, 10512..."
flour,"[10000, 10005, 100057, 10012, 10013, 10021, 10..."
powder,"[10100, 10198, 10338, 105820, 106294, 106297, ..."


Unnamed: 0_level_0,INGREDIENT
RECIPE_ID,Unnamed: 1_level_1
10000,[milk]
100008,[sugar]
10001,[flour]
100011,"[bay, leave, lea]"
10002,"[soy, use, seed, old, bean, see, soybean, hydr..."
10003,"[corn, cornstarch, starch, star, corns]"
100033,"[cream, heavy, pint]"
10004,"[corn, cornstarch, starch, star, corns]"
10005,"[seeds, sesame, seed, see, toast]"
100056,"[bacon, vegetarian, veg, strip, thaw, con, tri]"


## Connect to database for data import
Run this section if not importing the .csv from the offline folder or to get a new set from the db.

In [14]:
import_ingr = '''
    SELECT *
    FROM recipator_db.model_ingredients
'''

with pymysql.connect(
    user=input("Enter username: "),
    password=getpass("Enter password: "),
    host="recipatornew.mysql.database.azure.com",
    port=3306,
    db="recipator_db",
    ssl_ca="./DigiCertGlobalRootCA.crt.pem",
    ssl_disabled=False
    ) as connection:
        with connection.cursor() as cursor:
            cursor.execute(import_ingr)
            ingredients = pd.DataFrame(cursor.fetchall(), columns = ['RECIPE_ID','MEASUREMENT','INGREDIENT'])

Enter username: recipator
Enter password: ········


In [20]:
display(ingredients.head())
ingredients.info()

Unnamed: 0,RECIPE_ID,MEASUREMENT,INGREDIENT
0,10000,3 cups,all-purpose flour
1,100008,8 ounces,"fresh young ginger root, peeled"
2,10001,4,eggs
3,100011,2 tablespoons,vegetable oil
4,10002,½ teaspoon,"Salt, table"


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1232731 entries, 0 to 1232730
Data columns (total 3 columns):
 #   Column       Non-Null Count    Dtype 
---  ------       --------------    ----- 
 0   RECIPE_ID    1232731 non-null  object
 1   MEASUREMENT  1232731 non-null  object
 2   INGREDIENT   1232731 non-null  object
dtypes: object(3)
memory usage: 28.2+ MB


In [17]:
tenthou_df = ingredients[ingredients['RECIPE_ID'] == '10000']
display(tenthou_df)

Unnamed: 0,RECIPE_ID,MEASUREMENT,INGREDIENT
0,10000,3 cups,all-purpose flour
140657,10000,1 1/2 cups,white sugar
280249,10000,3/4 cup,unsweetened cocoa powder
418107,10000,3/4 teaspoon,salt
551045,10000,3 teaspoons,baking powder
675387,10000,1 1/2 cups,milk
787421,10000,2,eggs
885069,10000,1 1/2 teaspoons,vanilla extract
967564,10000,3/4 cup,shortening
1034998,10000,3/4 cup,"butter, softened"


## OFFLINE WORK
Uncomment as needed

In [18]:
## save to csv for working offline
ingredients.to_csv(os.getcwd()+'/offline/ingredients_jel.csv')
# main.to_csv(os.getcwd()+'/offline/main.csv', index=False)
# subs.to_csv(os.getcwd()+'/offline/subs.csv', index=False)

## Re-Import Dataframe

In [2]:
df_ing_1 = pd.read_csv('./offline/ingredients_jel.csv', dtype={'RECIPE_ID':'str','MEASUREMENT':'str','INGREDIENT': 'str'})
df_ing = df_ing_1.copy()
df_ing = df_ing[['RECIPE_ID','INGREDIENT']]
display(df_ing.head())
print(type(df_ing['INGREDIENT'][7]))

Unnamed: 0,RECIPE_ID,INGREDIENT
0,10000,all-purpose flour
1,100008,"fresh young ginger root, peeled"
2,10001,eggs
3,100011,vegetable oil
4,10002,"Salt, table"


<class 'str'>


## Get Ingredient Lists

In [3]:
groups = df_ing.groupby('RECIPE_ID')['INGREDIENT'].apply(list)
  
print(groups.head())
  
# groups store in a new 
# column called listvalues
df1 = groups.reset_index(name = 'INGREDIENTS_LIST')
# show the dataframe
display(df1.head())
print(df1['INGREDIENTS_LIST'][7])

RECIPE_ID
10000     [all-purpose flour, white sugar, unsweetened c...
100008    [fresh young ginger root, peeled, sea salt, ri...
10001     [eggs, cocoa powder, white sugar, ground cinna...
100011    [vegetable oil, minced garlic, large onion, ch...
10002     [Salt, table, Peanut butter, smooth style, wit...
Name: INGREDIENT, dtype: object


Unnamed: 0,RECIPE_ID,INGREDIENTS_LIST
0,10000,"[all-purpose flour, white sugar, unsweetened c..."
1,100008,"[fresh young ginger root, peeled, sea salt, ri..."
2,10001,"[eggs, cocoa powder, white sugar, ground cinna..."
3,100011,"[vegetable oil, minced garlic, large onion, ch..."
4,10002,"[Salt, table, Peanut butter, smooth style, wit..."


['egg whites', 'distilled white vinegar', 'cold water', 'vanilla extract', 'super fine sugar', 'cornstarch']


According to the directions found in this link: https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/, the next step will be to create a dataset that is a list of lists. The following code will treat the ingredient lists independently of their acoompanying recipe_ids in order to create association rules.

In [54]:
ing_dataset = []

print(df1['INGREDIENTS_LIST'][7])

for i in range(9000):
    ing_dataset.append(df1['INGREDIENTS_LIST'][i])

# print(ing_dataset)

['egg whites', 'distilled white vinegar', 'cold water', 'vanilla extract', 'super fine sugar', 'cornstarch']


In [57]:
egg_dataset = []

egg_df = df1[pd.DataFrame(df1.INGREDIENTS_LIST.tolist()).isin(['egg']).any(1).values].reset_index(drop=True)

# print(egg_df['INGREDIENTS_LIST'][7])

for i in range(900):
    egg_dataset.append(egg_df['INGREDIENTS_LIST'][i])

# print(egg_df['INGREDIENTS_LIST'].head(50))
# print(egg_dataset)

## Create Frequent Itemsets

In [58]:
te = TransactionEncoder()
te_ary = te.fit(ing_dataset).transform(ing_dataset)

ing_df1 = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = fpgrowth(ing_df1, min_support=0.1, use_colnames=True)

# frequent_itemsets

In [61]:
te = TransactionEncoder()
te_ary = te.fit(egg_dataset).transform(egg_dataset)

ing_df1 = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(ing_df1, min_support=0.3, use_colnames=True)

frequent_itemsets

Unnamed: 0,support,itemsets
0,0.706667,(all-purpose flour)
1,0.333333,(baking powder)
2,0.372222,(baking soda)
3,1.0,(egg)
4,0.654444,(salt)
5,0.42,(vanilla extract)
6,0.392222,(white sugar)
7,0.302222,"(all-purpose flour, baking powder)"
8,0.34,"(baking soda, all-purpose flour)"
9,0.706667,"(egg, all-purpose flour)"


## Clean and count the ingredients

In [5]:
## make a custom stopwords list
## removing status of ingredient words, example (frozen, diced)
## removing adjectives, example (color, dry)
## only put in words that are 3 or more characters, as the code
## removes smaller words (2 chars or less) later
custom_stopwords = ['chopped','and','fresh','ground','white',
                    'sliced','minced','dried','optional','table',
                    'green','diced','shredded','red','drained',
                    'into','peeled','cut','brown','black',
                    'large','softened','frozen','grated','dry',
                    'house','firmly','skim','whole','all-purpose',
                    'dry','mix','melted','beaten','finely','yellow',
                    'cubed','cooked','medium','small','sauce',
                    'crushed','divided','thinly','boneless',
                    'bell','thawed','pieces','cubes','crust',
                    'light','save','follows','hearty','layered',
                    'pan-frying','good-quality','express','skinless',
                    'inch','heirloom','1/2"','rectangular','solid-pack',
                    'fiber','julienne-cut','2-1/2','colossal','tsp',
                    'equivalent','teaspoons','tub-style','southwest',
                    'flanken','outer','least','secret','yields',
                    'well-beaten','dente','','multi-colored','major',
                    'mrs','simply','racks','aisle','working',
                    'untrimmed','excuding','packed','circles','pick',
                    'carb','parboiled','pfs','pad','family-sized',
                    'lbs','meet','digestion','tbsp','leaves',
                    'bdg','national','arm','fairly','microwaved',
                    'diluted','starbucks','mist','lots','full-bodied',
                    'shape','shred','-in','shape','corkscrew','sun-dried',
                    'grate','-inch','per','washing','mist','toothpick',
                    'colour','canister','grillers','kerrygold','wish-bone',
                    'unripe','tempura','pre-packaged','tasting','hugs',
                    'except','fast-rising','length-wise','delightfulls',
                    'handfull','frying','<sup>&reg<sup>otel<sup>&reg<sup>','loose-pack',
                    'tight','dissolve','purpose',
                    'young', 'table', 'whites', 'halved', 'tenderloin', # PL start
                    'seedless', 'toasted', 'lean', 'self-rising', 'plain',
                    'sifted', 'skinned', 'unsalted', 'pre-washed', 'box',
                    'boxes', 'squares', 'unsweetened', 'irish', 'flavored',
                    'pink', 'ripe', 'pureed', 'spears', 'bottle', 'colored',
                    'bone-less', 'bob', 'evans®', 'removed', 'tops', 'and', 'squeezed',
                    'italian', 'zesty', 'hot', 'canadian', 'slightly', 'taste',
                    'meat', 'prepared', 'candy-coated', 'slices', 'firm', 'smooth',
                    'flank', 'thinly', 'aged', 'chunks', 'dice', 'trimmed', 'dark',
                    'crumbs', 'pitted', 'room', 'temperature', 'deveined', 'portobello',
                    'caps', 'pre-baked', 'such', 'boboli', 'low-sodium', 'instant', 'active',
                    'dry', 'bittersweet', 'rolled', 'semisweet', 'chips', 'canned', 'make',
                    'ahead', 'mix', 'jigger', 'split', 'roast', 'thick', 'chops', 'center',
                    'loin', 'cutlets', 'about', 'butt', 'bone-in', 'freshly', 'picked',
                    'gluten-free', 'vanilla-flavored', 'buckwheat', 'kraft', 'uncooked',
                    'russet', 'rinsed', 'serving', 'style', 'cans', 'halves', 'jumbo',
                    'confectioners', 'seeded', 'chuck', 'sweetened', 'casings', 'crumbled',
                    'quartered', 'soft', 'very', 'coarsely', 'cooled', 'pounded',
                    'even', 'thickness', 'whole-grain', 'head', 'broken', 'loosely',
                    'reduced-sodium', 'cored', 'florets', 'tails', 'containers',
                    'refrigerated', 'non-fat', 'cloves', 'hulled', 'salted', 'strips',
                    'shelled', 'shoulder', 'korean', 'chinese', 'gluten', 'free', 'round',
                    'raw', 'sirloin', 'asian', 'fillets', 'filet', 'basmati', 'warm', 'tips',
                    'liquid', 'picnic', 'belly', 'stale', 'skewers', 'pressed', 'crispy',
                    'quick', 'quick-cooking', 'torn', 'bite', 'size', 'bite-size', 'washed',
                    'blanched'
                   ]

for w in custom_stopwords:
    STOPWORDS.add(w)
print('There are',len(STOPWORDS),'stopwords.')

There are 483 stopwords.


In [6]:
## strip the phrases and parse the words
words_sets = ingredients["INGREDIENT"].str.split(' ').apply(lambda x: [re.sub('[,™()®;\*:.!?\'0-9#\/]','',n.strip().lower()) for n in x]).tolist()
nwords = len(words_sets)
print('Sample of word sets:\n',words_sets[:5])

Sample of word sets:
 [['all-purpose', 'flour'], ['fresh', 'young', 'ginger', 'root', 'peeled'], ['eggs'], ['vegetable', 'oil'], ['salt', 'table']]


In [15]:
# for i, v in enumerate(words_sets):
#     try:
#         if words_sets[:][i][1] == 'oil':
#             words_sets[:][i] = words_sets[:][i][0] + words_sets[:][i][1]
#         #print(words_sets[:][i][1])
#     except IndexError as e:
#         pass

In [16]:
## make a set of cleaned words and count frequency
## full file count is millions of words before checking for duplicates
words = []
frequency = {}
word_count = 0
supressed = 0
for n in words_sets:
    for w in n:
        word_count +=1
        if w not in words and w not in STOPWORDS and len(w) > 2:
            words.append(w)
            frequency[w] = 1
        elif w in STOPWORDS or len(w) <= 2:
            supressed +=1
            pass
        elif w in words:
            frequency[w] +=1
        else:
            supressed +=1

print('There are',nwords,'ingredients listed having a total of',word_count,'words.')
print(len(words), 'unique words and', supressed, 'words supressed. That is', round(100*(supressed)/word_count,2),'% reduction!')

There are 1232731 ingredients listed having a total of 3750774 words.
23734 unique words and 1626262 words supressed. That is 43.36 % reduction!


In [17]:
## sort the list by frequency and remove words with extremely low frequency
minfreq = 10

freq_df = pd.DataFrame(frequency,index=[0]).transpose().sort_values(0,ascending=False)
freq_df = freq_df.drop(freq_df[freq_df[0] <= minfreq].index)
freq_df.rename({0:'frequency'},axis=1,inplace=True)

print('There are',len(freq_df), 'words with at least', minfreq+1, 'connections, which is an overall', round(100*(nwords-len(freq_df))/nwords,2),'% reduction')
print('\n\ntop of df\n',freq_df.head(5))
print('\nbottom of df\n',freq_df.tail(5))

There are 3782 words with at least 11 connections, which is an overall 99.69 % reduction


top of df
         frequency
sugar       63373
salt        55926
pepper      50418
butter      49979
oil         46891

bottom of df
               frequency
spheres              11
originale            11
mug                  11
epicure              11
confectioner         11


In [18]:
## map the ingredients to sets of recipes that use them
ingredients_to_rids = {}

for i in freq_df.index:
    rids = []
    for row in ingredients.itertuples(index=False):
        if i in row[2]: # 'INGREDIENTS'
            rids.append(row[0]) # 'RECIPE_ID'
    ingredients_to_rids[i] = rids
ingredients_to_rids_df = pd.DataFrame(ingredients_to_rids.items(),columns=['INGREDIENT','RECIPE_ID'])
    
print('length of ingredients_to_rids',len(ingredients_to_rids))
print('example of this data: number of recipes that include cumin',len(ingredients_to_rids['cumin']))
display(ingredients_to_rids_df.head())

length of ingredients_to_rids 3782
example of this data: number of recipes that include cumin 6418


Unnamed: 0,INGREDIENT,RECIPE_ID
0,sugar,"[10006, 10009, 10023, 10024, 10027, 100321, 10..."
1,salt,"[10028, 10059, 10066, 10072, 10085, 10109, 102..."
2,pepper,"[100056, 100289, 100493, 101475, 102910, 10440..."
3,butter,"[10003, 10008, 100084, 10010, 10011, 10015, 10..."
4,oil,"[100011, 100218, 100292, 100631, 100705, 10072..."


In [19]:
## set the ingredient as the index for future iteration
ingredients_to_rids_df.set_index('INGREDIENT', inplace=True)
# ingredients_to_rids_df.drop('Unnamed: 0',axis=1,inplace=True)
display(ingredients_to_rids_df.head())

Unnamed: 0_level_0,RECIPE_ID
INGREDIENT,Unnamed: 1_level_1
sugar,"[10006, 10009, 10023, 10024, 10027, 100321, 10..."
salt,"[10028, 10059, 10066, 10072, 10085, 10109, 102..."
pepper,"[100056, 100289, 100493, 101475, 102910, 10440..."
butter,"[10003, 10008, 100084, 10010, 10011, 10015, 10..."
oil,"[100011, 100218, 100292, 100631, 100705, 10072..."


In [20]:
## find the recipes that contain these new cleaned words
rids_clean_ingredients = {}

for row in ingredients.itertuples(index=False):
    ings = []
    for i in freq_df.index:
        if i in row[2]: # 'INGREDIENTS'
            ings.append(i) # this is for the dictionary method
    rids_clean_ingredients[row[0]] = ings
rids_clean_ingredients_df = pd.DataFrame(rids_clean_ingredients.items(),columns=['RECIPE_ID','INGREDIENT'])
    
print('length of rids_clean_ingredients',len(rids_clean_ingredients))
display(rids_clean_ingredients_df.head())

length of rids_clean_ingredients 140657


Unnamed: 0,RECIPE_ID,INGREDIENT
0,10000,[milk]
1,100008,[sugar]
2,10001,[flour]
3,100011,"[bay, leave, lea]"
4,10002,"[soy, use, seed, old, bean, see, soybean, hydr..."


In [21]:
## set the recipe_id as the index for future iteration
rids_clean_ingredients_df.set_index('RECIPE_ID', inplace=True)
# rids_clean_ingredients_df.drop('Unnamed: 0',axis=1,inplace=True)
display(rids_clean_ingredients_df.head())

Unnamed: 0_level_0,INGREDIENT
RECIPE_ID,Unnamed: 1_level_1
10000,[milk]
100008,[sugar]
10001,[flour]
100011,"[bay, leave, lea]"
10002,"[soy, use, seed, old, bean, see, soybean, hydr..."


In [12]:
ing_df = ingredients_to_rids_df.copy()
display(ing_df.head())

Unnamed: 0_level_0,RECIPE_ID
INGREDIENT,Unnamed: 1_level_1
sugar,"[10006, 10009, 10023, 10024, 10027, 100321, 10..."
salt,"[10028, 10059, 10066, 10072, 10085, 10109, 102..."
pepper,"[100056, 100289, 100493, 101475, 102910, 10440..."
butter,"[10003, 10008, 100084, 10010, 10011, 10015, 10..."
oil,"[100011, 100218, 100292, 100631, 100705, 10072..."


## Save files for OFFLINE WORK
Uncomment as needed

In [22]:
## save to pickle so that the list saves as list, not string
ingredients_to_rids_df.to_pickle(os.getcwd()+'/offline/ingredients_to_rids.pkl')
rids_clean_ingredients_df.to_pickle(os.getcwd()+'/offline/rids_clean_ingredients.pkl')

## The Frequent Itemset
build from the recipe IDs to clean ingredients map

In [3]:
display(rids_clean_ingredients_df.head(10))

Unnamed: 0_level_0,INGREDIENT
RECIPE_ID,Unnamed: 1_level_1
10000,[milk]
100008,[sugar]
10001,[flour]
100011,"[bay, leave, lea]"
10002,"[soy, use, seed, old, bean, see, soybean, hydr..."
10003,"[corn, cornstarch, starch, star, corns]"
100033,"[cream, heavy, pint]"
10004,"[corn, cornstarch, starch, star, corns]"
10005,"[seeds, sesame, seed, see, toast]"
100056,"[bacon, vegetarian, veg, strip, thaw, con, tri]"


In [6]:
ci_df = rids_clean_ingredients_df.copy()
display(ci_df.head())

Unnamed: 0_level_0,INGREDIENT
RECIPE_ID,Unnamed: 1_level_1
10000,[milk]
100008,[sugar]
10001,[flour]
100011,"[bay, leave, lea]"
10002,"[soy, use, seed, old, bean, see, soybean, hydr..."


In [10]:
# for col in ci_df.columns:
#     print(col)

ten_thou_df = ci_df.filter(items=['10000'], axis=0)
display(ten_thou_df)

Unnamed: 0_level_0,INGREDIENT
RECIPE_ID,Unnamed: 1_level_1
10000,[milk]


In [None]:
## save to pickle files because why not have it be food related? :)

with open(os.getcwd()+'/offline/edges.pickle', 'wb') as handle:
    pickle.dump(edges, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open(os.getcwd()+'/offline/weights.pickle', 'wb') as handle:
    pickle.dump(weights, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(os.getcwd()+'/offline/edges.pickle', 'rb') as handle:
    edges_pickle = pickle.load(handle)
with open(os.getcwd()+'/offline/weights.pickle', 'rb') as handle:
    weights_pickle = pickle.load(handle)

print(edges == edges_pickle, weights == weights_pickle)

## THE MODEL

In [None]:
# kmeans = KMeans(n_clusters = 10)
# kmeans.fit(rids_clean_ingredients)

In [None]:
## technique from https://towardsdatascience.com/recommendation-system-in-python-lightfm-61c85010ce17

user_book_interaction = pd.pivot_table(interactions_selected, index='user_id', columns='book_id', values='rating')
# fill missing values with 0
user_book_interaction = user_book_interaction.fillna(0)
user_id = list(user_book_interaction.index)
user_dict = {}
counter = 0 
for i in user_id:
    user_dict[i] = counter
    counter += 1
# convert to csr matrix
user_book_interaction_csr = csr_matrix(user_book_interaction.values)
user_book_interaction_csr

In [None]:
## technique from https://towardsdatascience.com/recommendation-system-in-python-lightfm-61c85010ce17

model = LightFM(loss='warp',
                random_state=2016,
                learning_rate=0.90,
                no_components=150,
                user_alpha=0.000005)
model = model.fit(user_book_interaction_csr,
                  epochs=100,
                  num_threads=16, verbose=False)

In [None]:
## technique from https://towardsdatascience.com/recommendation-system-in-python-lightfm-61c85010ce17

def sample_recommendation_user(model, interactions, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 5, show = True):
    
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items), item_features=books_metadata_csr))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index).sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    if show == True:
        print ("User: " + str(user_id))
        print("Known Likes:")
        counter = 1
        for i in known_items:
            print(str(counter) + '- ' + i)
            counter+=1
print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            counter+=1

## Word clouds for fun

In [None]:
## ref: https://towardsdatascience.com/generate-meaningful-word-clouds-in-python-5b85f5668eeb

In [None]:
text = []
for i in ingredients["INGREDIENT"]:
    text.append(i)

text = ' '.join(text).lower()

wordcloud = WordCloud(stopwords = STOPWORDS, collocations = True).generate(text)

plt.imshow(wordcloud, interpolation='bilInear')
plt.axis('off')
plt.figure(figsize=(12,10))
plt.show()