In [1]:
import json
from os import listdir

raw = open('all_drinks.csv').read()

In [2]:
terms = []
term = ''
in_quote = False
for i in range(len(raw)):
    if (raw[i] == ',' or raw[i] == '\n') and not in_quote:
        terms.append(term)
        term = ''
    elif raw[i] == '"':
        in_quote = not in_quote
    else:
        term += raw[i]
        
for i in range(len(terms[:41])):
    print('%d. %s' % (i, terms[i]))

0. 
1. strDrink
2. dateModified
3. idDrink
4. strAlcoholic
5. strCategory
6. strDrinkThumb
7. strGlass
8. strIBA
9. strIngredient1
10. strIngredient10
11. strIngredient11
12. strIngredient12
13. strIngredient13
14. strIngredient14
15. strIngredient15
16. strIngredient2
17. strIngredient3
18. strIngredient4
19. strIngredient5
20. strIngredient6
21. strIngredient7
22. strIngredient8
23. strIngredient9
24. strInstructions
25. strMeasure1
26. strMeasure10
27. strMeasure11
28. strMeasure12
29. strMeasure13
30. strMeasure14
31. strMeasure15
32. strMeasure2
33. strMeasure3
34. strMeasure4
35. strMeasure5
36. strMeasure6
37. strMeasure7
38. strMeasure8
39. strMeasure9
40. strVideo


# Next steps

1. Convert terms into json with 
    - names: list of strings
    - ingredients: list of strings
    - method: list of strings
    - picture: string
2. Load existing drinks
3. Compare names
4. Remove duplicates
5. Save all drinks to drinks folder

In [10]:
class Drink:
    def __init__(self, terms=None, filename=None):
        if terms:
            self.names = [terms[1]]
            # join measures and ingredients
            self.ingredients = [m + ' ' + i for m, i in zip(terms[25:40], terms[9:24])]
            # remove any whitespace
            self.ingredients = [i for i in map(lambda x: x.strip(), self.ingredients)]
            # remove blanks
            self.ingredients = [i for i in filter(lambda x: x != '', self.ingredients)]
            # start with the glass, then add instructions
            self.method = [terms[7]] + terms[24].split('.')
            # some drinks have numbered instructions. Splitting on '.' puts the numbers at the end
            # remove them
            def clean(meth):
                if len(meth) > 0:
                    if meth[-1] in '123456789':
                        meth = meth[:-1]
                # strip whitespace and capitalize
                return meth.strip().capitalize()
            self.method = [m for m in map(clean, self.method)]
            # remove any empty method strings
            self.method = [i for i in filter(lambda x: x.strip() != '', self.method)]
            # all drinks come with a thumbnail
            self.picture = terms[6]
        elif filename:
            drink = json.load(open(filename))
            self.names = drink['names']
            self.ingredients = drink['ingredients']
            self.method = drink['method']
            self.picture = drink['picture']
    
    def __iter__(self):
        yield 'names', self.names
        yield 'ingredients', self.ingredients
        yield 'method', self.method
        yield 'picture', self.picture
    
    def save(self, filename):
        with open(filename, 'w') as f:
            f.write(json.dumps(dict(self), indent=4))       

In [11]:
drinks = []

for k in range(1, 547):
    drink = terms[k*41:k*41+41]
    drinks.append(Drink(terms=drink))
    
old_drinks = []
for name in listdir('old/drinks'):
    if name != '.DS_Store':
        old_drinks.append(Drink(filename='old/drinks/' + name))

In [15]:
# find drinks we have that cocktail db doesn't (i.e. sweaty mexican) and keep those
missing = []
for od in old_drinks:
    found = False
    for on in od.names:
        for nd in drinks:
            for nn in nd.names:
                if nn.lower() == on.lower():
                    found = True
    if not found:
        missing.append(od)

drinks += missing

In [18]:
for i in range(len(drinks)):
    drinks[i].save('drinks/%d.json' % i)