In [82]:
# imports
import mwparserfromhell
import pywikibot
import tqdm
import pandas as pd


In [83]:
# extract list of cocktails from https://en.wikipedia.org/wiki/List_of_cocktails

def get_cocktails():
    e = pywikibot.Site('en','wikipedia')
    p = pywikibot.Page(e, 'List of cocktails')            
    wt = p.get()               
    wc = mwparserfromhell.parse(wt)
    c = [ x.title for x in wc.filter_wikilinks()]
    return c

cocktails = get_cocktails()

print(f"Total links: {len(cocktails)}")

Total links: 602


In [84]:
# get everything from infobox, certain pages have more listed.
# Like legendary Joe Gilmore's page which is linked
def get_cocktails_drinkware(cocktail):
    c_d = {}
    e = pywikibot.Site('en','wikipedia')
    p = pywikibot.Page(e, cocktail)            
    try:
        wt = p.get(get_redirect=True)               
        wc = mwparserfromhell.parse(wt)
    except Exception as e:
        print(f"Error fetching page {p}: {e}")
        return c_d
    
    # find the infobox templates
    for t in wc.filter_templates():
        if t.name.strip().lower() == 'infobox cocktail':
            try:
                infbx_params = {k.strip().lower():v.strip().lower() for (k,v) in [x.split('=', 1) for x in [y for y in t.params if '=' in y]]}
            except Exception as e:
                print(f"Failed to unpack {t}")
                print(f"Exception: {e}")

            try:
                n = infbx_params['name']
                d = infbx_params['drinkware']
                # skip empty values
                if n == '' or d == '':
                    print(f"Empty name or drinkware in {t}")
                    continue
                c_d[n] = [d] 
            except Exception as e:
                pass
    return c_d

c = "Jägerbomb"
print(f"drinkware for cocktail {c}: {get_cocktails_drinkware(c)}")


drinkware for cocktail Jägerbomb: {'jägerbomb': ['pubshot']}


In [85]:
# cocktail -> drink ware
cocktail_drinkware = {}
for c in tqdm.tqdm(cocktails):
    d = get_cocktails_drinkware(c)
    cocktail_drinkware = {**cocktail_drinkware, **d}

cocktail_drinkware

  7%|▋         | 44/602 [00:22<04:11,  2.22it/s]

Empty name or drinkware in {{Infobox cocktail 
| iba         = 
| name        = Savoy Corpse Reviver
| image       = 
| caption     = 
| type        = cocktail
| flaming     = 
| brandy = yes
| served      = straight
| garnish     = 
| drinkware   = 
| ingredients = *1 part [[Brandy]]
*1 part [[Fernet Branca]]
*1 part White [[Crème de menthe]]
| prep        = Shake ingredients together with ice, and strain into a glass.
| notes       = 
| footnotes   = 
}}


 16%|█▌        | 97/602 [00:53<04:40,  1.80it/s]

Empty name or drinkware in {{Infobox cocktail 
| iba         = 
| name        = Mickey Slim
| image       = 
| caption     = 
| type        = cocktail
| flaming     = 
| gin = yes
| served      = 
| garnish     = 
| drinkware   = 
| ingredients = *One part [[gin]]
*A pinch of [[DDT]]
| prep        = Stir the DDT into the gin and serve
| notes       = DDT is not very soluble in gin so only a small quantity will dissolve.  DDT has been [[DDT#Effects on human health|linked]] to numerous health problems in humans.<ref>{{
cite journal
 |last=Eskenazi 
 |first=Brenda 
 |date=May 4, 2009 
 |title=The Pine River Statement: Human Health Consequences of DDT Use 
 |journal=[[Environmental Health Perspectives]] 
 |volume=117 
 |issue=9 
 |pages=1359–1367 
 |url= |pmid=19750098|pmc=2737010|doi=10.1289/ehp.11748
}}</ref>
| footnotes   = 
}}


 25%|██▌       | 152/602 [01:19<03:33,  2.11it/s]

Empty name or drinkware in {{Infobox cocktail
| iba         =
| name        = Painkiller
| image       = File:Pussers Rum Painkiller.JPG
| caption     = A Painkiller and a bottle of Pussers
| type        = cocktail
| flaming     = no
| rum = yes
| served      = on the rocks
| garnish     = [[Nutmeg]]
| drinkware   = 
| ingredients = *2-4 part(s) Pusser's [[Rum]]
*4 parts [[pineapple]] juice
*1 part [[coconut milk#cream of coconut|cream of coconut]]
*1 part [[orange juice]]
*Sprinkle with [[nutmeg]]
| prep        = Shake and garnish.
| notes       =
| footnotes   =
}}


 28%|██▊       | 168/602 [01:27<03:28,  2.08it/s]

Empty name or drinkware in {{Infobox cocktail
| iba         = 
| name        = Tamagozake
| image       = 
| caption     = 
| type        = cocktail
| flaming     = 
| sake = yes
| served      = Hot
| garnish     = 
| drinkware   = 
| ingredients = 
*One raw egg (or egg yolk only)
*3/4 cup (200 cc) sake
*Honey or sugar to taste
| prep        = Whisk the raw egg yolk and honey/sugar into 3/4 cup hot sake (hot enough to lightly cook the egg – 1:30 in the microwave).
| notes       = An alternative preparation method is to heat the sake/egg/honey mixture together, while whisking occasionally.<br />Be careful to stop cooking before it turns into scrambled eggs.
| footnotes   = 
}}


 36%|███▌      | 217/602 [01:55<02:33,  2.51it/s]

Empty name or drinkware in {{infobox cocktail
| iba         = 
| source      = 
| sourcelink  = 
| name        = Astro pop
| image       = 
| caption     = 
| type        = [[Mixed drink]]
| flaming     = 
| ALCOHOLTYPE = 
| served      = Typically layered
| garnish     = 
| drinkware   = 
| ingredients = [[Vodka]], blue [[Curaçao (liqueur)|curaçao]], [[grenadine]], [[Sour mix|sweet and sour mix]] and others
| prep        = 
| notes       = Some versions are served as a [[cocktail]] or [[Shooter (mixed drink)|shooter]]
| footnotes   = 
}}


 36%|███▌      | 218/602 [01:55<02:29,  2.56it/s]

Empty name or drinkware in {{Infobox cocktail 
| iba         = 
| name        = Batida
| image       = 
| caption     = 
| type        = cocktail
| flaming     = 
| cachaça = yes
| served      = rocks
| garnish     = 
| drinkware   = 
| ingredients = *2 parts [[cachaça]]
*1 part [[fruit juice]]
*1 tbsp [[sugar]]
| prep        = Mix and pour into chilled glass.
| notes       = 
| footnotes   = 
}}


 37%|███▋      | 223/602 [01:57<02:31,  2.50it/s]

Empty name or drinkware in {{Infobox cocktail
| iba =
| name = BLT
| image =BLT cocktail.jpg
| caption = BLT cocktail with [[Bloody Mary (cocktail)|Bloody Mary]]
| type = vodka
|vodka=yes
| served = straight
| garnish =[[Bacon salt]]
| drinkware = 
| ingredients = [[Bacon]]<br />[[Lettuce]] or [[Liquor]]<br />[[Tomato]]<br />[[Vodka]] or [[Bacon vodka]]
| prep = 
| notes =
| footnotes =
}}


 40%|███▉      | 240/602 [02:06<02:58,  2.02it/s]

Empty name or drinkware in {{Infobox cocktail
| iba         = no
| name        = 
| sourcelink = 
| image       = Harvey_Wallbanger.jpg
| caption     =
| type        = mixed
| flaming     =
| vodka       = yes
| served      = rocks
| garnish     = [[Orange (fruit)|orange]] slice and [[maraschino cherry]]
| drinkware   = highball
| ingredients = * 4.5 cl (3 parts) [[Vodka]]
* 1.5 cl (1 part) [[Galliano (liqueur)|Galliano]]
* 9 cl (6 parts) fresh [[orange juice]]
| prep        = Stir the vodka and orange juice with ice in the glass, then float the Galliano on top.  Garnish and serve.
| timing      = All day
}}


 43%|████▎     | 256/602 [02:14<02:13,  2.58it/s]

Empty name or drinkware in {{Infobox cocktail
| iba         =
| source      =
| sourcelink  =
| name        = Salmiakki Koskenkorva
| image       = 
| caption     = 
| type        = cocktail
| flaming     =
| vodka = yes
| served      = neat
| garnish     =
| drinkware   =
| ingredients =
| prep        = Premixed cocktail
| notes       =
| footnotes   =
}}


 71%|███████   | 426/602 [03:36<01:14,  2.35it/s]

Empty name or drinkware in {{Infobox cocktail 
| iba         = 
| name        = Batida
| image       = 
| caption     = 
| type        = cocktail
| flaming     = 
| cachaça = yes
| served      = rocks
| garnish     = 
| drinkware   = 
| ingredients = *2 parts [[cachaça]]
*1 part [[fruit juice]]
*1 tbsp [[sugar]]
| prep        = Mix and pour into chilled glass.
| notes       = 
| footnotes   = 
}}


 71%|███████▏  | 429/602 [03:37<01:04,  2.67it/s]

Empty name or drinkware in {{Infobox cocktail 
| iba         = 
| name        = Batida
| image       = 
| caption     = 
| type        = cocktail
| flaming     = 
| cachaça = yes
| served      = rocks
| garnish     = 
| drinkware   = 
| ingredients = *2 parts [[cachaça]]
*1 part [[fruit juice]]
*1 tbsp [[sugar]]
| prep        = Mix and pour into chilled glass.
| notes       = 
| footnotes   = 
}}


 71%|███████▏  | 430/602 [03:37<01:03,  2.71it/s]

Empty name or drinkware in {{Infobox cocktail 
| iba         = 
| name        = Batida
| image       = 
| caption     = 
| type        = cocktail
| flaming     = 
| cachaça = yes
| served      = rocks
| garnish     = 
| drinkware   = 
| ingredients = *2 parts [[cachaça]]
*1 part [[fruit juice]]
*1 tbsp [[sugar]]
| prep        = Mix and pour into chilled glass.
| notes       = 
| footnotes   = 
}}


 73%|███████▎  | 440/602 [03:43<01:11,  2.27it/s]

Empty name or drinkware in {{ infobox cocktail
| source      =
| sourcelink  = 3514
| name        = Polar Bear
| image       = Polar Bear cocktail, Empire State South, Atlanta GA.jpg
| caption     = Polar Bear cocktail
| type        = shooter
| flaming     = 
| schnapps    = yes
| cacao       = yes
| served      = blended, on the rocks or neat
| garnish     = 
| drinkware   = 
| ingredients = 
| prep        = 
| notes       = 
| footnotes   = [http://www.drinksmixer.com/drink602.html Classic Polar Bear recipe] at DrinksMixer.com
}}


 76%|███████▌  | 457/602 [03:52<01:26,  1.69it/s]

Empty name or drinkware in {{Infobox cocktail
| iba         = 
| name        = Flips
| image       = [[File:Brandy flip in stemmed cocktail glass.png.jpg|frameless]]
| caption     = A '''flip''' made from [[brandy]], an egg, and simple syrup is shown served in a stemmed cocktail glass and garnished with grated [[nutmeg]].
| type        = family
| whiskey     = yes
| brandy      = yes
| rum         = yes
| served      = 
| garnish     = 
| drinkware   = 
| ingredients = Whole, raw egg
| prep        = 
| notes       = See the article for specifics.
| footnotes   = 
}}


 78%|███████▊  | 467/602 [03:59<01:36,  1.40it/s]

Empty name or drinkware in {{Infobox cocktail
| iba         = 
| name        = Sours
| image       = [[File:Whiskey sour.jpg|frameless]]
| caption     = A whiskey sour garnished with a wheel of lemon and maraschino cherries.
| type        = family
| amaretto    = yes
| bourbon     = yes
| brandy      = yes
| gin         = yes
| pisco       = yes
| rum         = yes
| served      = 
| garnish     = 
| drinkware   = 
| ingredients = 
| prep        = 
| notes       = See the article for specifics.
| footnotes   = 
}}


 80%|███████▉  | 479/602 [04:08<01:17,  1.58it/s]

Error fetching page [[en:Agave spirits]]: Page [[en:Agave spirits]] doesn't exist.


 81%|████████▏ | 490/602 [04:14<01:07,  1.66it/s]

Error fetching page [[en:Carrot top oil]]: Page [[en:Carrot top oil]] doesn't exist.


 82%|████████▏ | 494/602 [04:16<00:47,  2.28it/s]

Error fetching page [[en:Jungle Bird]]: Page [[en:Jungle Bird]] doesn't exist.


 83%|████████▎ | 499/602 [04:20<01:19,  1.30it/s]

Error fetching page [[en:Old Style Whiskey Smash]]: Page [[en:Old Style Whiskey Smash]] doesn't exist.


 83%|████████▎ | 502/602 [04:22<01:02,  1.60it/s]

Error fetching page [[en:Whiskey smash]]: Page [[en:Whiskey smash]] doesn't exist.


 84%|████████▍ | 506/602 [04:24<00:51,  1.87it/s]

Empty name or drinkware in {{ infobox cocktail
| iba         = no
| name        = John Daly
| image       = 
| caption     = 
| type        = mixed
| flaming     = 
| absinthe    = 
| amaretto    = 
| beer        = 
| bourbon     = 
| brandy      = 
| cacao       = 
| cachaça     = 
| campari     = 
| canadian    = 
| champagne   = 
| cider       = 
| coffee      = 
| cognac      = 
| curaçao     = 
| everclear   = 
| fortified   = 
| gin         = 
| irishc      = 
| irishw      = 
| jäger       = 
| menthe      = 
| moonshine   = 
| pastis      = 
| pisco       = 
| port        = 
| pucker      = 
| rum         = 
| sake        = 
| sambuca     = 
| schnapps    = 
| scotch      = 
| sparkling   = 
| tennessee   = 
| tequila     = 
| vermouth    = 
| vodka       = yes
| whiskey     = 
| whisky      = 
| wine        = 
| other       = 
| served      = 
| garnish     = 
| drinkware   = 
| ingredients = [[Lemonade]], [[Iced tea]]
| prep        = 
| timing      =
| notes       = A [[tongu

 85%|████████▌ | 512/602 [04:27<00:37,  2.43it/s]

Error fetching page [[en:Cooler (cocktail)]]: Page [[en:Cooler (cocktail)]] doesn't exist.


 85%|████████▌ | 513/602 [04:28<00:56,  1.58it/s]

Error fetching page [[en:Henry's Hard Soda]]: Page [[en:Henry's Hard Soda]] doesn't exist.


 87%|████████▋ | 521/602 [04:32<00:41,  1.95it/s]

Error fetching page [[en:Redd's Apple Ale]]: Page [[en:Redd's Apple Ale]] doesn't exist.


 87%|████████▋ | 522/602 [04:32<00:36,  2.17it/s]

Error fetching page [[en:Szarlotka (cocktail)]]: Page [[en:Szarlotka (cocktail)]] doesn't exist.


 88%|████████▊ | 527/602 [04:36<00:56,  1.33it/s]

Error fetching page [[en:Transfusion (cocktail)]]: Page [[en:Transfusion (cocktail)]] doesn't exist.


 89%|████████▊ | 533/602 [04:39<00:38,  1.79it/s]

Error fetching page [[en:Henry's Hard Soda]]: Page [[en:Henry's Hard Soda]] doesn't exist.


 89%|████████▉ | 537/602 [04:41<00:27,  2.39it/s]

Error fetching page [[en:Whiskey highball]]: Page [[en:Whiskey highball]] doesn't exist.


 91%|█████████ | 547/602 [04:46<00:34,  1.59it/s]

Error fetching page [[en:Henry's Hard Soda]]: Page [[en:Henry's Hard Soda]] doesn't exist.


 92%|█████████▏| 555/602 [04:49<00:17,  2.76it/s]

Error fetching page [[en:Vodka and Coke]]: Page [[en:Vodka and Coke]] doesn't exist.


 94%|█████████▎| 564/602 [04:54<00:22,  1.72it/s]

Error fetching page [[en:Vodka cocktail]]: Page [[en:Vodka cocktail]] doesn't exist.


 95%|█████████▍| 571/602 [05:00<00:27,  1.11it/s]

Error fetching page [[en:White port]]: Page [[en:White port]] doesn't exist.


 95%|█████████▌| 574/602 [05:02<00:16,  1.68it/s]

Error fetching page [[en:Cucumber cooler]]: Page [[en:Cucumber cooler]] doesn't exist.


100%|██████████| 602/602 [05:23<00:00,  1.86it/s]


{'dry martini': ['cocktail'],
 'death in the afternoon': ['flute'],
 'black and tan': ['[[pint glass]]'],
 'black velvet': ['pilsner glass'],
 'irish car bomb': ['pubshot'],
 'michelada': ['pint'],
 'queen mary': ['[[beer glassware]]'],
 'snakebite': ['pint'],
 'brandy alexander': ['cocktail'],
 'chicago cocktail': ['old'],
 'the blenheim': ['cocktail'],
 'churchill': ['cocktail'],
 'common market': ['cocktail'],
 'four score': ['cocktail'],
 'golden doublet': ['cocktail'],
 'kensington court special': ['cocktail'],
 'link up': ['cocktail'],
 'lorraine': ['cocktail'],
 'missouri mule': ['cocktail'],
 'moonwalk': ['cocktail'],
 'my fair lady': ['cocktail'],
 'nixon': ['cocktail'],
 'powerscourt': ['cocktail'],
 'the ed shelly': ['snifter'],
 'prince of wales': ['flute'],
 'royal arrival': ['cocktail'],
 'savoy affair': ['champagne cocktail glass'],
 'savoy royale': ['champagne glass'],
 'wolfram': ['cocktail'],
 'french connection': ['old'],
 "horse's neck": ['highball'],
 'jack rose': 

In [86]:
# perform cleanup on certain data name / values

# 'black and tan': '[[pint glass]]'
cocktail_drinkware['black and tan'] = ['pint']

#'queen mary': '[[beer glassware]]'
cocktail_drinkware['queen mary'] = ['beer glassware']

# 'savoy affair': ['champagne cocktail glass'
cocktail_drinkware['savoy affair'] = ['champagne glass']

# 'cloister': '[[cocktail glass]]'
cocktail_drinkware['cloister'] = ['cocktail']

# 'gin and tonic': '[[highball glass]] or [[rocks glass]]'
cocktail_drinkware['gin and tonic'] = ['highball', 'rocks']

# 'greyhound': ['old fashioned glass']
cocktail_drinkware['greyhound'] = ['old']

# 'bushwacker': '[[hurricane glass|hurricane glass]]
cocktail_drinkware['bushwacker'] = ['hurricane']

# "<big>cobra's fang</big>": 'tall clear glass'
cocktail_drinkware["cobra's fang"] = ['tall clear glass']
del(cocktail_drinkware["<big>cobra's fang</big>"])

# 'cuban sunset': '[[highball glass]]'
cocktail_drinkware['cuban sunset'] = ['highball']

# 'flaming volcano': 'special: [[volcano bowl]]'
cocktail_drinkware['flaming volcano'] = ['volcano bowl']

# 'hurricane': '[[hurricane lamp]]–shaped glass'
cocktail_drinkware['hurricane'] = ['hurricane lamp–shaped glass']

# '<big>mr. bali hai</big>': 'special: mr. bali hai mug'
cocktail_drinkware['mr. bali hai'] = ['mr. bali hai mug']
del(cocktail_drinkware['<big>mr. bali hai</big>'])

# 'bermuda rum swizzle': ['cocktail glass']
cocktail_drinkware['bermuda rum swizzle'] = ['cocktail']

# '151 swizzle': ['pilsner glass or tall glass']
cocktail_drinkware['151 swizzle'] = ['pilsner glass', 'tall glass']

# 'leite de onça': ['mug (preferably a non-transparent one)']
cocktail_drinkware['leite de onça'] = ['mug']

# 'platinum blonde': '[[cocktail glass]]'
cocktail_drinkware['platinum blonde'] = ['cocktail']

# 'up to date': '[[coupe glass|coupe]] or [[cocktail glass]]'
cocktail_drinkware['up to date'] = ['coupe', 'cocktail']

# 'baby guinness': ['[[shot glass|shot]]']
cocktail_drinkware['baby guinness'] = ['shot']
    
# 'jello shot': ['various (cup, glass, bowl, tray, etc)']
cocktail_drinkware['jello shot'] = ['various']

# 'punch': ['often served in a [[punch bowl]] with [[mug|punch glasses]].']
cocktail_drinkware['punch'] = ['punch']

cocktail_drinkware

{'dry martini': ['cocktail'],
 'death in the afternoon': ['flute'],
 'black and tan': ['pint'],
 'black velvet': ['pilsner glass'],
 'irish car bomb': ['pubshot'],
 'michelada': ['pint'],
 'queen mary': ['beer glassware'],
 'snakebite': ['pint'],
 'brandy alexander': ['cocktail'],
 'chicago cocktail': ['old'],
 'the blenheim': ['cocktail'],
 'churchill': ['cocktail'],
 'common market': ['cocktail'],
 'four score': ['cocktail'],
 'golden doublet': ['cocktail'],
 'kensington court special': ['cocktail'],
 'link up': ['cocktail'],
 'lorraine': ['cocktail'],
 'missouri mule': ['cocktail'],
 'moonwalk': ['cocktail'],
 'my fair lady': ['cocktail'],
 'nixon': ['cocktail'],
 'powerscourt': ['cocktail'],
 'the ed shelly': ['snifter'],
 'prince of wales': ['flute'],
 'royal arrival': ['cocktail'],
 'savoy affair': ['champagne glass'],
 'savoy royale': ['champagne glass'],
 'wolfram': ['cocktail'],
 'french connection': ['old'],
 "horse's neck": ['highball'],
 'jack rose': ['cocktail'],
 'paradis

In [87]:
cocktail_drinkware_records = []
for k,v in cocktail_drinkware.items():
    for v_ in v:
        cocktail_drinkware_records.append({'cocktail': k, 'glass': v_})
        
df = pd.DataFrame.from_records(cocktail_drinkware_records)
df.groupby(by="glass").count().sort_values(by="cocktail", ascending=False)

Unnamed: 0_level_0,cocktail
glass,Unnamed: 1_level_1
cocktail,71
highball,26
old,24
collins,11
shot,7
flute,6
pubshot,4
rocks,4
pint,3
mug,3


In [88]:
# generate glasses => cocktails
glass_cocktails = {}
for k,v in cocktail_drinkware.items():
    for v_ in v:
        c = glass_cocktails.get(v_, [])
        c.append(k)
        glass_cocktails[v_] = c
    
# Generate data for d3.
glass_cocktail_d3 = {}
glass_cocktail_d3["name"]= "glass"

# Add glasses and cocktails
glass_cocktail_d3["children"] = []
for g,cs in glass_cocktails.items():
    gc =  {}
    gc['name'] = g
    gc['children'] = []
    for c in cs:
        gc['children'].append({'name': c})
    glass_cocktail_d3['children'].append(gc)
    
glass_cocktail_d3


{'name': 'glass',
 'children': [{'name': 'cocktail',
   'children': [{'name': 'dry martini'},
    {'name': 'brandy alexander'},
    {'name': 'the blenheim'},
    {'name': 'churchill'},
    {'name': 'common market'},
    {'name': 'four score'},
    {'name': 'golden doublet'},
    {'name': 'kensington court special'},
    {'name': 'link up'},
    {'name': 'lorraine'},
    {'name': 'missouri mule'},
    {'name': 'moonwalk'},
    {'name': 'my fair lady'},
    {'name': 'nixon'},
    {'name': 'powerscourt'},
    {'name': 'royal arrival'},
    {'name': 'wolfram'},
    {'name': 'jack rose'},
    {'name': 'paradise'},
    {'name': 'porto flip'},
    {'name': 'sidecar'},
    {'name': 'stinger'},
    {'name': 'alexander'},
    {'name': 'angel face'},
    {'name': "bee's knees"},
    {'name': 'bijou cocktail'},
    {'name': 'blackthorn'},
    {'name': 'breakfast martini'},
    {'name': 'bronx'},
    {'name': 'casino'},
    {'name': 'cloister'},
    {'name': 'clover club cocktail'},
    {'name': 'c

In [89]:
# export as json
import json
with open("glass_cocktails_flare.json", "w") as f:
    json.dump(glass_cocktail_d3, f)