In [None]:
import csv
import pandas as pd
import unidecode
from bitarray import bitarray
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from ipywidgets import Layout, Button, Box, VBox, HBox, Output


#  Load alcoholic drink recipe CSV into dict
with open('alcoholic_full.csv', mode='r', encoding='utf-8') as infile:
    reader = csv.reader(infile)
    array_headers = []
    alcoholic_recipe_list = []
    alcoholic_recipe_dict = {}
    next(reader, None)

    for row in reader:
        array_headers.append(row[1])
        alcoholic_recipe_dict[row[1]] = row[17:32]
        alcoholic_recipe_list.append(row[17:32])
    #  print(str(len(alcoholic_recipe_dict)) + " recipes loaded!")

#  Load ingredient CSV into list
with open('ingredients.csv', mode='r', encoding='utf-8') as infile:
    reader = csv.reader(infile)
    ingredientlist = []
    next(reader, None)
    for row in reader:
        ingredientlist.append(unidecode.unidecode(row[0].lower().strip()))
    #  print(str(len(ingredientlist)) + " ingredients loaded! Processing...")

#  Create new dict with ingredients
alcoholic_drink_dict = {}
for ingredient in ingredientlist:
    emptyarray = bitarray(len(alcoholic_recipe_dict))
    emptyarray.setall(0)
    alcoholic_drink_dict[ingredient] = emptyarray

#  Set flags for each recipe per ingredient.
for r in range(0, len(alcoholic_recipe_list)):
    recipe = alcoholic_recipe_list[r]
    for ingredient in recipe:
        if ingredient == 'Bailey':
            ingredient = 'baileys irish cream'
        else:
            ingredient = unidecode.unidecode(ingredient.strip().lower())
        if len(ingredient) > 0:
            current_array = alcoholic_drink_dict[ingredient]
            current_array[r] = 1
            alcoholic_drink_dict[ingredient] = current_array

#  Build prune list for unused ingredients
unused_ingredients = []
for i in alcoholic_drink_dict.keys():
    if alcoholic_drink_dict[i].count(1) == 0:
        unused_ingredients.append(i)

#  Prune unused ingredients
for i in unused_ingredients:
    del alcoholic_drink_dict[i]

#  Build dataframe of recipes by ingredient
data = {}
for a in array_headers:
    data[a] = []
indexes = []
for i in alcoholic_drink_dict.keys():
    indexes.append(i)
    for a in range(len(array_headers)):
        data[array_headers[a]].append(alcoholic_drink_dict[i][a])
df = pd.DataFrame(data, index=indexes)

ingredient_matrix = cosine_similarity(df)
df_sims = pd.DataFrame(ingredient_matrix, df.index)
df_sims.columns = df.index
heat_map = plt.imshow(df_sims, cmap='hot', interpolation='nearest')

out1 = Output()

with out1:
    plt.show()
    #  display(heat_map.figure)



items_layout = Layout( width='auto')     # override the default width of the button to 'auto' to let the button grow

box_layout = Layout(display='flex',
                    flex_flow='column',
                    align_items='stretch',
                    border='solid',
                    width='50%')

words = ['correct', 'horse', 'battery', 'staple']
items = [Button(description=word, layout=items_layout, button_style='danger') for word in words]
box = Box(children=items, layout=box_layout)
box2 = Box(children=[out1], layout=box_layout)
VBox([box, box2])