## popularity processing

In [1]:
# move file to backend folder first
import csv
from cocktailLab import CocktailLab
import editdistance
import numpy as np
import math

In [2]:
cocktailLab = CocktailLab()

In [3]:
def popfile_to_names(file):
    """ Returns a dictionary of format {'cocktail name' : 'ingred1,ingred2'}
    Parameters:
    file: name of file

    ***Note: CURRENTLY CONFIGURED FOR cocktail_flavors_ingreds_combined***
    """
    with open(file, encoding="utf8") as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        out = {}
        for row in csv_reader:
            if line_count == 0:
                line_count += 1
            else:
                out[row[2].lower()] = ""
    return out

In [4]:
popdrinks = popfile_to_names("./data/popularity.csv")

In [5]:
ourdrinks = list(cocktailLab.cocktail_name_to_index.keys())

In [6]:
for popdrink in popdrinks:
    for drink in ourdrinks:
        if popdrink in drink or drink in popdrink:
            popdrinks[popdrink] = drink

In [7]:
[(i, popdrinks[i]) for i in popdrinks]

[("'57 chevy", "'57 chevy with a white license plate"),
 ('caipirinha', 'dark caipirinha'),
 ('long island icetea', ''),
 ('piña colada', ''),
 ('tall blonde', ''),
 ('wiki waki woo', ''),
 ('ghostbuster', ''),
 ('mai tai', 'kentucky mai tai'),
 ('77 sunset strip', ''),
 ('zombie', ''),
 ('bleeding heart', ''),
 ('time bomb', ''),
 ('sunny', ''),
 ('daiquiri slush', ''),
 ('cuba libre', ''),
 ('union jack', ''),
 ('lemon drop', 'lemon drop'),
 ('american beauty (aperitif)', ''),
 ('sex on the beach', ''),
 ('baby aspirin', ''),
 ('boston tea party', ''),
 ('mojito', 'pink mojito'),
 ('mojito original', 'mojito'),
 ('highfly', ''),
 ("veronika's birthday", ''),
 ('b52', ''),
 ('cuban special', ''),
 ('sputnik', ''),
 ('baby face', 'ace'),
 ('grasshopper', ''),
 ('gimlet', ''),
 ("planter'spunch (1)", ''),
 ('a day at the beach', ''),
 ('lumumba', ''),
 ('white russian (der echte!)', ''),
 ('absinth', ''),
 ('sex on the beach 3', ''),
 ('tequila sunrise', 'tequila sunrise'),
 ('---', '')

In [8]:
# 
popdrinks['pina colada "real"'] = 'pink colada'
popdrinks['piña colada'] = 'pink colada'
popdrinks['frensh 75 (cdrink)'] = 'french 75'

In [9]:
for popdrink in popdrinks:
    if popdrinks[popdrink] != "":
        continue
    distances = []
    for drink in ourdrinks:
        distances += [editdistance.eval(popdrink, drink)]
    a = np.array(distances)
    print((popdrink, ourdrinks[np.argmin(a)]))

('long island icetea', 'holland house')
('tall blonde', 'la la land')
('wiki waki woo', 'andy warhol')
('ghostbuster', 'stinger')
('77 sunset strip', 'le sang et sable')
('zombie', '3am')
('bleeding heart', 'blood & sand')
('time bomb', 'bird on')
('sunny', 'monet')
('daiquiri slush', 'apricot blush')
('cuba libre', 'culebra')
('union jack', 'applejack')
('american beauty (aperitif)', 'round pond aperitif')
('sex on the beach', 'redondo beach')
('baby aspirin', 'dark caipirinha')
('boston tea party', 'bird on a wire')
('highfly', 'bird on')
("veronika's birthday", 'fiona graham')
('b52', '3am')
('cuban special', 'army special')
('sputnik', 'martini')
('grasshopper', 'beachcomber')
('gimlet', 'monet')
("planter'spunch (1)", 'prata punch')
('a day at the beach', 'adam & eve')
('lumumba', 'club med')
('white russian (der echte!)', 'citrus quo (adapted)')
('absinth', 'a.nise')
('sex on the beach 3', 'redondo beach')
('---', '3am')
('---nazi_from_hell', 'bacio romana')
('erdbeerbowle', 'cre

In [10]:
pop_rawscores = {}

In [11]:
def popfile_pops(file):
    """ Returns a dictionary of format {'cocktail name' : 'ingred1,ingred2'}
    Parameters:
    file: name of file

    ***Note: CURRENTLY CONFIGURED FOR cocktail_flavors_ingreds_combined***
    """
    with open(file, encoding="utf8") as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            if line_count == 0:
                line_count += 1
            else:
                if popdrinks[row[2].lower()] != "":
                    name = row[2].lower()
                    if name not in pop_rawscores:
                      pop_rawscores[row[2].lower()] = [int(row[1])]
                    else:
                        pop_rawscores[name] += [int(row[1])]

In [12]:
popfile_pops("./data/popularity.csv")

In [13]:
pop_sumscores = {}
for drink in pop_rawscores:
    pop_sumscores[drink] = math.log(11 * len(pop_rawscores[drink]) - sum(pop_rawscores[drink]) + 1)

In [14]:
popdrinks_short = dict((k, v) for k, v in popdrinks.items() if v)

In [23]:
from cocktailLab import CocktailLab
cl = CocktailLab()

In [16]:
popdrinks_short_inv = {v: k for k, v in popdrinks_short.items()}
short_name_to_index = {
    name: index for index, name in enumerate(popdrinks_short_inv)
}
short_index_to_name = {v:k for k, v in short_name_to_index.items()}

In [19]:
# want: to label each of our drinks with a popularity score, based on how close they are to a drink in the popular dataset * popularity factor
# make doc-vocab matrix out of popular drinks (popdrinks_short_inv)
# get the cos sim measure b/t each of our drinks and the closest popular drink using that doc-vocab matrix
# to get the popularity score, use the pop_sumscores dictionary

In [24]:
pop_ingreds = [cl.cocktail_names_to_ingreds[c] for c in popdrinks_short_inv]

In [55]:
pop_idxs = [cl.cocktail_name_to_index[i] for i in popdrinks_short_inv]
pop_idxs

[0, 154, 410, 271, 296, 411, 342, 29, 495, 321, 194, 277, 315, 205]

In [51]:
cl.ingreds_doc_by_vocab[pop_idxs].shape

(14, 949)

In [75]:
# process every drink that doesn't have a score
assigned_pop_scores = []
scores = []
cos_scores = []
for drink in ourdrinks:
    # skip drinks that alr have a score
    if drink in popdrinks_short_inv:
        # continue
        pass
    # print(drink, end="")
    ranks = (cl.cos_rank(
        cl.ingreds_doc_by_vocab[cl.cocktail_name_to_index[drink]],
        cl.ingreds_doc_by_vocab[pop_idxs]
    ))
    actual_idx = pop_idxs[ranks[0][0]]
    # print(list(popdrinks_short_inv.keys())[ranks[0][0]])
    assigned_pop_scores += [{
        "drink name" : drink,
        "closest popular drink" : cl.cocktail_index_to_name[actual_idx],
        "score" : ranks[0][1] * pop_sumscores[popdrinks_short_inv[cl.cocktail_index_to_name[actual_idx]]]
    }]
    scores += [ranks[0][1] * pop_sumscores[popdrinks_short_inv[cl.cocktail_index_to_name[actual_idx]]]]
    cos_scores += [ranks[0][1]]
    

In [77]:
field_names = ["drink name", "closest popular drink", "score"]
with open("all_popular_scores.csv", "w") as file:
    writer = csv.DictWriter(file, fieldnames=field_names)
    writer.writeheader()
    writer.writerows(assigned_pop_scores)

In [30]:
cl.ingreds

{"'57 chevy with a white license plate": 'creme de cacao,vodka',
 '10 to 7': '1 oz no. 209 gin, 1 oz luxardo maraschino liqueur, 1 oz st. germain, 1 oz lemon juice',
 '155 belmont': 'dark rum,light rum,vodka,orange juice',
 "1880's americana": '2 oz hirsch small batch reserve bourbon, 10 ml amer picon, 10 ml house cola syrup',
 '20th century': "1.5 oz no.3 london dry gin, .75 oz tempus fugit kina l'aero d'or, .75 oz tempus fugit creme de cacao, .75 oz fresh lemon juice",
 '3 & tree': '1.5 oz no.3 london dry gin,  top fever-tree tonic water, .25 oz maurin quina',
 '3 degrees': '1.75 oz no.3 london dry gin, .75 oz velvet falernum, .5 oz lime juice, .5 oz pear sage foam, 1 float prosecco',
 '3am': "1.5 oz no.3 london dry gin, .75 oz h by hine, .75 oz luxardo triplum (triple sec), .5 oz king's ginger liqueur, .5 oz fresh lime juice",
 '50/50 split': '1.5 oz junipero gin, .75 oz dry vermouth, .75 oz tempus fugit alessio vermouth bianco, 1 dash the bitter truth orange bitters',
 '7th heaven'

# fjdihg

In [None]:
import csv


def read_file(file):
    with open(file) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        cocktail_names = []
        tags = []
        out = {}
        for row in csv_reader:
            if line_count == 0:
                print(f'Column names are {", ".join(row)}')
                line_count += 1
                # out.append(row)
            else:
                # for idx, item in enumerate(row):
                #     if idx == 0:
                #         out.append([item])
                #     else:
                #         out[-1] += item.split(',')
                # print(out[-1])
                # cocktail_names += row[1]
                # tags += row[2].split(',')
                out[row[1]] = row[2]
                print(row[2])
    return out



data = read_file("cocktail_tags.csv")
# tags = [data[cocktail] for cocktail in data.keys()]
# [item for sublist in tags for item in sublist]
print(len(data))

In [None]:
import csv
import pandas as pd

input = pd.read_csv('cocktail_flavors.csv')
input['combined'] = input['Ingredients']
input['flavors'] = ''

flavors = ['floral', 'citrus', 'herbaceous', 'fruity', 'spicy', 'sweet', 'bitter']
for index, row in input.iterrows():
    for flavor in flavors:
      if row[flavor]:
          input.at[index, 'combined'] = input.at[index, 'combined'] + f",{flavor}"
          input.at[index, 'flavors'] = input.at[index, 'flavors'] + f",{flavor}"

input.to_csv("cocktail_flavors_ingreds_combined.csv", index=False)

In [None]:
import csv

file = 'popularity.csv'
drink_popularity = {}
with open(file) as csv_file:
    csv_reader = csv.reader(csv_file)

    for row in csv_reader:
        # if line_count == 0:
        #     print(f'Column names are {", ".join(row)}')
        #     line_count += 1
        # out.append(row)
        # for idx, item in enumerate(row):
        #     if idx == 0:
        #         out.append([item])
        #     else:
        #         out[-1] += item.split(',')
        # print(out[-1])
        # cocktail_names += row[1]
        # tags += row[2].split(',')
        # print(row[2])
        if not row[2] in drink_popularity:
            drink_popularity[row[2].lower()] = ''

    print(len(drink_popularity))


def read_file_ingreds(file):
    """ Returns a dictionary of format {'cocktail name' : 'ingred1,ingred2'}
    Parameters:
    file: name of file

    ***Note: CURRENTLY CONFIGURED FOR cocktail_flavors_ingreds_combined***
    """
    with open(file, encoding="utf8") as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        out = {}
        for row in csv_reader:
            if line_count == 0:
                line_count += 1
            else:
                out[row[0].lower()] = row[3].lower()
    return out


drink_ingreds = read_file_ingreds('cocktail_flavors_ingreds_combined.csv')

count = 0
common_drinks = {}
for drink in drink_popularity:
    if drink in drink_ingreds:
        count += 1
        common_drinks[drink] = ''
    else:
        for drink2 in drink_ingreds:
            if drink in drink2 or drink2 in drink:
                count += 1
                common_drinks[drink2] = ''


# set(drink_popularity.keys()).intersection(drink_ingreds.keys())
print(common_drinks.keys())


101
dict_keys(["'57 chevy with a white license plate", 'caipirinha', 'brazilian mai tai', 'kentucky mai tai', 'lemon drop', 'mojito', 'ace', 'tequila sunrise', 'martini', 'flambeed brandy alexander', 'kir', 'manhattan'])


In [None]:
def read_file_popularity(file):
        """ Returns a dictionary of format {'cocktail name' : 'flavor1,flavor2'}
        Parameters:
        file: name of file

        """
        with open(file, encoding="utf8") as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            line_count = 0
            out = {}
            for row in csv_reader:
                if line_count == 0:
                    line_count += 1
                else:
                    out[row[0].lower()] = row[12]
        return out

read_file_popularity('cocktail_flavors_popularity.csv')