In [1]:
import sys
sys.path.insert(0, "/Users/artur/workspace/recipedb")

In [2]:
from recipedb.db import get_db
import functools

In [3]:
db = get_db()

In [4]:
exs = db.allrecipes.find({"error":{"$eq":None}})

In [5]:
NUM_EXAMPLES = 30000
exs_recipes = list(map(lambda x: next(exs), range(NUM_EXAMPLES))) # grab the next NUM_EXAMPLES from cursor above

In [6]:
def write_list_file(file, lst):
    with open(file, "w+") as f:
        f.writelines(map(lambda x: "%s\n"%x,list(lst)))

In [7]:
# Lets try simple counts first and then lets see if tf idf improves it, first step is finding common ingredients
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import pandas as pd

In [8]:
# add up all the list of list of ingredients into long corpus of ingredients
corpus = functools.reduce(lambda a,b: a+b, list(map(lambda x: x['data']['ingredients'], exs_recipes)))

In [9]:
vectorizer = CountVectorizer(analyzer='word',ngram_range=(1,5),)
tfidf = TfidfVectorizer(ngram_range=(1,5),)

In [10]:
vectorizer.fit(corpus)
tfidf.fit(corpus)

TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
                dtype=<class 'numpy.float64'>, encoding='utf-8',
                input='content', lowercase=True, max_df=1.0, max_features=None,
                min_df=1, ngram_range=(1, 5), norm='l2', preprocessor=None,
                smooth_idf=True, stop_words=None, strip_accents=None,
                sublinear_tf=False, token_pattern='(?u)\\b\\w\\w+\\b',
                tokenizer=None, use_idf=True, vocabulary=None)

In [11]:
write_list_file('gram.txt', vectorizer.get_feature_names())
write_list_file("tfidf.txt", tfidf.get_feature_names())

In [13]:
def read_list_file(file):
    with open(file, "r") as f:
        data = f.readlines()
        return list(map(lambda x: x.strip(), data))

In [14]:
data = read_list_file('gram.txt')

In [15]:
# run through the list and remove the ones that occur in other ones
def remove_occurences(lst):
    i = 0
    lst = list(lst)
    while (len(lst)-1-i) > 0:
        current = lst[i] 
        nxt = lst[i+1]
        if current in nxt:
            lst.remove(current)
        else:
            i += 1
    return lst

In [16]:
cleaned_data = remove_occurences(data)

In [85]:
print("AVERAGE NUMBER OF INGREDIENTS PER RECIPE AFTER PROCESSING %f" % (len(cleaned_data)/NUM_EXAMPLES))

AVERAGE NUMBER OF INGREDIENTS PER RECIPE AFTER PROCESSING 3.268933


In [86]:
def term_counts(ingredients_set):
    counts = {}

    for string in ingredients_set:
        for part in string.split():
            if counts.get(part) is None:
                counts[part] = 0
            counts[part] += 1
    return counts
    

In [87]:
counts = term_counts(cleaned_data)
counts_series = pd.Series(counts)
counts_series.sort_values(ascending=False).head(1000).to_csv('counts.csv')

In [88]:
# opening a manually annotated file of non ingredient words using counts
with open('allrecipes_noningredient_words.txt', "r+") as f:
    annotated_data = list(map(lambda x: x.strip(), f.readlines()))

In [89]:
def remove_common_words(data_series):
    output = []
    for x in data_series:
        original_words = list(x.split())
        for word in x.split():
            if word in annotated_data or word.isdigit():
                original_words.remove(word)
        output.append(' '.join(original_words))
    return output

In [90]:
ingredients_set = set(remove_common_words(cleaned_data))

In [91]:
print("NUMBER OF INGREDIENTS NO HAMMING %d" % len(ingredients_set))

NUMBER OF INGREDIENTS NO HAMMING 36918


In [92]:
with open("ingredient_set.txt", "w+") as f:
    f.writelines(map(lambda x: "%s\n"%x,list(ingredients_set)))


In [93]:
ingreds = list(ingredients_set)

In [94]:
ingreds.sort()

In [95]:
# https://en.wikipedia.org/wiki/Hamming_distance distance between two strings
# a more relative hamming distance
def relative_hamming_score(string1, string2):
    dist_counter = 0
    max_val = min(len(string1),len(string2))
    for n in range(max_val):
        if string1[n] != string2[n]:
            dist_counter += 1
    if max_val is 0:
        return 0
    return dist_counter/max_val

In [96]:
THRESHOLD = 0.3
ingreds = list(ingredients_set)
ingreds.sort()
duplicates = []
for i in range(len(ingreds)-1):    
    if relative_hamming_score(ingreds[i], ingreds[i+1]) < THRESHOLD:
        print('matched')
        print(ingreds[i])
        print(ingreds[i+1])
        duplicates.append(ingreds[i]) # add first match to duplicates

matched

10x12
matched
10x12
12x12 inches each reynolds wrap
matched
12x12 squares aluminum
12x12 squares aluminum foil
matched
12x18 parchment
12x20 parchment
matched
18x24 inches reynolds wrap aluminum
18x24 inches reynolds wrap heavy
matched
1x1
1x1 matchsticks
matched
2x1
2x1 cubes
matched
2x2
2x2 sourdough
matched
2x2 sourdough
2x2 sourdough bread
matched
7up
7up cherry
matched
9x9 pan cornbread
9x9 pan cornbread cooled
matched
a1
a1 steak sauce
matched
about
about degrees degrees
matched
about diameter
about diameter red
matched
about diameter red
about diameter red bliss
matched
about each
about each down
matched
about inches
about inches long
matched
absinthe
absinthe based bitters
matched
acai berry pulp
acai berry sorbet
matched
acai pulp
acai pulp sambazon
matched
accents organic sprouted quinoa trio
accents organic sprouted rice trio
matched
achiote annatto seeds
achiote goya
matched
achiote goya
achiote powder
matched
acini di
acini di pepe
matched
acini di pepe
acini di p

baby potatoes quarters
matched
baby ray
baby red
matched
baby red
baby red gold
matched
baby red gold
baby red gold beets
matched
baby red gold beets
baby red gold potatoes
matched
baby red yellow
baby red yellow potatoes
matched
baby reds mashed potatoes
baby reds peas
matched
baby romaine lettuce
baby romaine lettuce mixed
matched
baby spinach
baby spinach bite
matched
baby spinach bite
baby spinach leaves
matched
baby spinach leaves
baby spinach leaves stems
matched
baby spinach leaves stems
baby spinach stems
matched
baby spinach stems
baby spinach taste
matched
baby squid rings
baby squid tubes
matched
baby squid tubes
baby squid tubes tentacles
matched
bacardi coconut
bacardi gold
matched
back rib at room temperature
back ribs
matched
back ribs
back ribs individual
matched
backbone heavy duty
backbone heavy duty scissors
matched
backfin crabmeat
backfin crabmeat picked
matched
backstrap
backstrap chunks
matched
bacon
bacon bacon bits
matched
bacon bits
bacon bits hormel
matched
b

bass
matched
bass
bass about
matched
bass fillets
bass fillets skinned
matched
basting mix
basting oil
matched
basting oil
basting sauce
matched
bay
bay blackened seasoning
matched
bay leaf
bay leaf crumbled
matched
bay leaves
bay leaves broken half
matched
bay scallops
bay scallops cleaned
matched
bay scallops cleaned
bay scallops cleaned patted
matched
bay scallops cleaned patted
bay scallops if
matched
bay scallops if
bay scallops raw
matched
bay scallops raw
bay scallops tough muscle
matched
bay seasoning
bay seasoning tm
matched
bay seasoning tm
bay seasoning tm taste
matched
bbq sauce
bbq spice rub parmesan
matched
beam
bean
matched
bean
bean bacon soup
matched
bean dip
bean dip fritos
matched
bean flour bob
bean flour needed
matched
bean mix
bean mix soup
matched
bean paste
bean paste kochujang
matched
bean soup
bean soup mix
matched
bean split
bean split scraped
matched
bean split scraped
bean split seeds
matched
bean split seeds
bean split seeds scraped
matched
bean sprouts
be

bitter melon flesh
matched
bitter melon flesh
bitter melon jelly
matched
bittermens fee
bittermens fee brothers
matched
bittermens fee brothers
bitters
matched
bitters
bitters angostura
matched
bittersweet chocolate
bittersweet chocolate chips
matched
bittersweet chocolate chips
bittersweet chocolate grated
matched
bittersweet chocolate grated
bittersweet chocolate melted
matched
bittersweet white chocolate
bittersweet white chocolate chips
matched
black
black angus
matched
black angus
black angus ribeye
matched
black angus ribeye
black angus ribeye steaks
matched
black bean garlic sauce
black bean sauce
matched
black bean sauce
black bean soup
matched
black bean soup
black bean vegetable blend
matched
black bean vegetable blend
black beans
matched
black beans
black beans bush
matched
black beans bush
black beans canned
matched
black beans canned
black beans drain
matched
black beans drain
black beans drain reserve
matched
black beans drain reserve
black beans green
matched
black beans

bread cubes plain croutons
matched
bread cubes walnut
bread cubes without crusts
matched
bread flour
bread flour all
matched
bread flour all
bread flour all purpose
matched
bread flour all purpose
bread flour half
matched
bread flour half
bread flour if
matched
bread flour if
bread flour king
matched
bread flour king
bread flour needed
matched
bread flour needed
bread flour plus
matched
bread french baguette
bread french bread
matched
bread inches diameter
bread inches long
matched
bread loaf hollowed
bread loaf hollowed make
matched
bread machine
bread machine yeast
matched
bread machine yeast
bread mix
matched
bread mix
bread mix jiffy
matched
bread processed crumbs
bread processed fine crumbs
matched
bread roll mix yeast
bread rolls
matched
bread rolls
bread rolls split half
matched
bread rounds
bread rounds brown
matched
bread rounds brown
bread rounds half
matched
bread rounds half
bread rounds triangles
matched
bread split
bread split half
matched
bread stuffing
bread stuffing cu

campbell healthy
campbell healthy request
matched
campbell healthy request
campbell healthy request condensed
matched
campbell healthy request condensed
campbell healthy request condensed cream
matched
campbell tomato juice tomato
campbell tomato juice tomato juice
matched
campbell v8 vegetable
campbell v8 vegetable juice
matched
canada ginger
canada ginger ale
matched
canadian bacon
canadian beer
matched
canadian beer
canadian beer steam
matched
canadian whiskey
canadian whiskey crown
matched
candied cherries
candied citron
matched
candied citron
candied citron peel
matched
candied citron peel
candied citrus peel
matched
candied mixed citrus peel
candied mixed fruit
matched
candied mixed fruit
candied mixed fruit peel
matched
candied waxgourd
candies
matched
candies
candies available cake decorating
matched
candies colored sugars
candies desired
matched
candies hershey
candies hershey pumpkin
matched
candies unwrapped
candy
matched
candy
candy bar
matched
candy bar
candy bar broken
ma

celery stalks
celery stalks thirds
matched
celery stalks thirds
celery stick
matched
celery stick
celery sticks
matched
celery sticks
celery sticks bell pepper
matched
celery thirds
celery tops
matched
cellentani
cellentani cavatappi pasta
matched
cellentani corkscrew
cellentani corkscrew pasta
matched
cellophane noodles
cellophane noodles warm
matched
center
center bacon crumbled
matched
center beef tenderloin
center beef tenderloin roast
matched
center beef tenderloin roast
center bone
matched
center bone
center bone pork
matched
center boneless pork
center boneless pork loin
matched
center boneless pork loin
center boneless salmon
matched
center boneless salmon
center boneless salmon fillets
matched
center pork chops
center pork loin
matched
center pork loin
center pork loin chops
matched
center ribs discarded
center ribs discarded leaves
matched
center ribs discarded leaves
center ribs out
matched
center salmon
center salmon fillet
matched
center salmon fillet
center salmon fillet 

cherry cola
matched
cherry flavor gelatin
cherry flavor lemon
matched
cherry flavor lemon
cherry flavor lemon lime
matched
cherry flavor lemon lime
cherry flavor lemon lime carbonated
matched
cherry gelatin
cherry gelatin mix
matched
cherry grape
cherry grape tomatoes
matched
cherry jam
cherry jell
matched
cherry jell
cherry jell mix
matched
cherry juice
cherry juice concentrate
matched
cherry lemon
cherry lemon lime
matched
cherry lemon lime
cherry lemon lime soda
matched
cherry liqueur
cherry liqueur cherry
matched
cherry peppers
cherry peppers juice
matched
cherry peppers juice
cherry pie
matched
cherry pie
cherry pie filling
matched
cherry sauce
cherry soda
matched
cherry soda
cherry soft drink mix
matched
cherry soft drink mix
cherry stem
matched
cherry stem
cherry sweetened
matched
cherry sweetened
cherry sweetened soft
matched
cherry sweetened soft
cherry sweetened soft drink
matched
cherry tomato halves
cherry tomatoes
matched
cherry tomatoes
cherry tomatoes desired
matched
che

chunky red green
matched
chunky tomato
chunky tomato sauce
matched
chutney
chutney branston
matched
ciabatta
ciabatta bread
matched
ciabatta bread
ciabatta bread cubes
matched
ciabatta bread cubes
ciabatta bread dinner rolls
matched
cider
cider glaze
matched
cider vinegar
cider vinegar bragg
matched
cider vinegar bragg
cider vinegar needed
matched
cider vinegar needed
cider vinegar taste
matched
cilantro
cilantro garnish
matched
cilantro garnish
cilantro green
matched
cilantro green
cilantro green onion
matched
cilantro leaves
cilantro leaves garnish
matched
cilantro leaves lightly
cilantro leaves loosely
matched
cilantro leaves mccormick
cilantro leaves picked
matched
cilantro leaves picked
cilantro leaves picked stems
matched
cilantro leaves picked stems
cilantro leaves plus
matched
cilantro leaves plus
cilantro leaves stems
matched
cilantro leaves stems
cilantro leaves taste
matched
cilantro leaves taste
cilantro leaves thinner
matched
cilantro leaves thinner
cilantro leaves thinner

container blackberries mashed
container blackberry yogurt
matched
container blueberries
container blueberries stemmed
matched
container blueberry nonfat yogurt
container blueberry yogurt
matched
container breakstone knudsen
container breakstone knudsen sour
matched
container brown sugar
container brown sugar cinnamon
matched
container buitoni refrigerated
container buitoni refrigerated alfredo
matched
container buitoni refrigerated alfredo
container buitoni refrigerated alfredo sauce
matched
container buitoni refrigerated alfredo sauce
container buitoni refrigerated light
matched
container buitoni refrigerated light
container buitoni refrigerated light alfredo
matched
container butter flavor
container butter flavor popcorn
matched
container butter flavor popcorn
container butter flavor popcorn oil
matched
container cherry
container cherry grape
matched
container cherry grape
container cherry grape tomatoes
matched
container cherry grape tomatoes
container cherry yogurt
matched
containe

crab boil
crab boil seasoning
matched
crab claw
crab claw picked
matched
crab claw picked
crab claw picked over
matched
crab legs
crab legs if
matched
crab legs if
crab legs shell
matched
crab meat
crab meat excess
matched
crab shrimp
crab shrimp boil
matched
crab shrimp boil
crab shrimp boil seasoning
matched
crab shrimp seasoning
crab shrimp seasoning mix
matched
crabapples
crabapples cleaned
matched
crabapples cleaned
crabapples stems
matched
crabmeat
crabmeat bite sized
matched
crabmeat flaked
crabmeat flaked cartilage
matched
crabmeat picked free shell
crabmeat picked over
matched
crabmeat picked over
crabmeat picked over cartilage
matched
crabmeat well
crabmeat well picked
matched
crabs cleaned
crabs cleaned cracked shell
matched
crabs cleaned cracked shell
crabs cleaned their
matched
cracked
cracked black pepper
matched
cracked black pepper
cracked black pepper taste
matched
cracked black pepper taste
cracked black peppercorns
matched
cracked coriander seed
cracked ice
matched
c

matched
dark porter
dark porter beer
matched
dark raisins
dark red
matched
dark red
dark red kidney
matched
dark red kidney
dark red kidney beans
matched
dark roast coffee
dark roast coffee beans
matched
dark roast espresso coffee
dark rum
matched
dark rum
dark rum rum
matched
dark rum rum
dark rum rum flavoring
matched
dark rum taste
dark rum tres
matched
dark sweet
dark sweet cherries
matched
dark sweet cherries
dark sweet cherries heavy
matched
dark turkey
dark turkey meat
matched
dash almond extract
dash almond syrup
matched
dash bitters
dash bitters angostura
matched
dash celery salt
dash celery seed
matched
dash chili powder
dash chili sauce
matched
dash garlic powder
dash garlic salt
matched
dash grenadine
dash grenadine syrup
matched
dash hot pepper sauce
dash hot pepper sauce tabasco
matched
dash hot pepper sauce tabasco
dash hot sauce
matched
dash onion powder
dash onion salt
matched
dash orange bitters
dash orange flower water
matched
dash pepper sauce
dash peppermint extrac

dough separated
dough separated each portion
matched
dough sheets pillsbury
dough shells
matched
doughnut
doughnuts
matched
down backbone
down backbone heavy
matched
dr oetker whip
dr oetker whip it
matched
dr pepper
dr pepper soda
matched
dr pepper soda
dr pepper soft
matched
dr pepper soft
dr pepper soft drink
matched
dragees
dragees desired
matched
dragon
dragon eyes
matched
dragon fruit
dragon fruit pitaya
matched
drain reserve liquid
drains
matched
dream
dreamfields
matched
dressing
dressing annie lemon
matched
dressing bernstein
dressing bernstein restaurant
matched
dressing marie
dressing miracle whip
matched
dressing miracle whip
dressing mix
matched
dressing mix
dressing mix good
matched
dressing mix good
dressing mix hidden
matched
dressing mix hidden
dressing mix lipton
matched
dressing mix lipton
dressing mix taste
matched
dressing needed
dressing newman own
matched
dressing wish bone
dressing wishbone
matched
drink coke
drink concentrate
matched
drink mix
drink mix any fla

fat cheese mexican cheese
matched
fat cream celery soup
fat cream cheese
matched
fat cream cheese
fat cream cheese neufchatel softened
matched
fat cream cheese neufchatel softened
fat cream cheese onion
matched
fat cream cheese onion
fat cream cheese room temperature
matched
fat cream cheese room temperature
fat cream cheese softened
matched
fat cream cheese softened
fat cream cheese spread
matched
fat creamy ranch dressing
fat creamy salad dressing
matched
fat creamy salad dressing
fat creamy salad dressing miracle
matched
fat crescent roll dough
fat crescent rolls
matched
fat drippings any
fat drippings any type
matched
fat feta cheese
fat feta cheese crumbled
matched
fat free
fat free american cheese
matched
fat free blue cheese
fat free blue cheese salad
matched
fat free butter
fat free butter granules
matched
fat free butter granules
fat free buttermilk
matched
fat free catalina salad
fat free catalina salad dressing
matched
fat free cheddar
fat free cheddar cheese
matched
fat fre

french baguette half
french baguette inches
matched
french baguette inches
french baguette inches long
matched
french baguette inches long
french baguettes
matched
french blend
french bread
matched
french bread
french bread baguette
matched
french bread baguette
french bread baguettes split
matched
french bread baguettes split
french bread bite
matched
french bread bite
french bread broken
matched
french bread broken
french bread crumbled
matched
french bread crumbled
french bread crust
matched
french bread crust
french bread crusts
matched
french bread crusts
french bread cubes
matched
french bread cubes
french bread dipping
matched
french bread dipping
french bread half
matched
french bread half
french bread lightly
matched
french chili original
french chili original seasoning
matched
french chili original seasoning
french chili original seasoning mix
matched
french french fried onions
french fried
matched
french fried
french fried onion rings
matched
french fried onion rings
french 

garden herb tofu
garden herb tofu crumbled
matched
garden vegetable
garden vegetable pasta
matched
garden vegetable pasta
garden vegetable pasta sauce
matched
garden vegetable pasta sauce
garden vegetables
matched
garlic
garlic aioli
matched
garlic bagel
garlic bagel chips
matched
garlic barbecue sauce
garlic basil
matched
garlic basil
garlic basil spread
matched
garlic basil spread
garlic basil spread see
matched
garlic basil spread see
garlic basil spread see footnote
matched
garlic bread
garlic bruised
matched
garlic butter
garlic butter melted
matched
garlic cheese spread
garlic chicken
matched
garlic chicken
garlic chicken sausage
matched
garlic chile oil
garlic chile paste
matched
garlic chile paste
garlic chili sauce
matched
garlic chili sauce
garlic chives
matched
garlic chives
garlic clove
matched
garlic clove
garlic clove mashed
matched
garlic clove pressed
garlic cloves
matched
garlic cloves
garlic cloves lightly
matched
garlic cloves separated
garlic cloves smashed
matched


matched
goya blackeye
goya blackeye peas
matched
goya chick
goya chick peas
matched
goya chipotle
goya chipotle chile
matched
goya chipotle chile
goya chipotle chile adobo
matched
goya chipotle chile adobo
goya chipotle chiles
matched
goya chipotle chiles
goya chipotle chiles adobo
matched
goya chipotle chiles adobo
goya chipotle peppers
matched
goya chipotle peppers
goya chipotle peppers adobo
matched
goya chocolate maria
goya chocolate maria cookies
matched
goya coconut
goya coconut milk
matched
goya coriander annatto
goya corn
matched
goya corn
goya corn kernels
matched
goya corn kernels
goya corn oil
matched
goya corn oil
goya corn oil frying
matched
goya corn tortillas
goya corn tortillas warmed
matched
goya cream coconut
goya cream coconut stirred
matched
goya cumin
goya cumin taste
matched
goya discos
goya discos yellow
matched
goya discos yellow
goya discos yellow white
matched
goya evaporated
goya evaporated milk
matched
goya extra virgin olive
goya extra virgin olive oil
matc

green onions fine
green onions garnish
matched
green onions garnish
green onions green
matched
green onions green
green onions green parts
matched
green onions green parts
green onions green parts only
matched
green onions green parts only
green onions green portion
matched
green onions green portion
green onions green portion only
matched
green onions green portion only
green onions green tops
matched
green onions including
green onions on
matched
green onions on
green onions plus
matched
green onions plus
green onions pressed flat
matched
green onions scallions
green onions sesame
matched
green onions sesame
green onions slivered
matched
green onions slivered
green onions some
matched
green onions some
green onions taste
matched
green onions taste
green onions tops
matched
green onions tops
green onions tops few
matched
green onions tops few
green onions tops included
matched
green onions tops included
green onions white
matched
green onions white
green onions white green
matched
gre

hard rolls
hard rolls split
matched
hard salami
hard salami bite
matched
hard shell blue crabs
hard then
matched
hard white wheat
hard white wheat flour
matched
haribo
haricots verts french
matched
haricots verts french
haricots verts french green
matched
haricots verts french green
haricots verts french green beans
matched
harissa
harissa north african red
matched
harissa north african red
harissa north african red pepper
matched
harissa taste
harissa taste see
matched
harissa taste see
harp
matched
harp
harvest peach yogurt
matched
hash
hash brown
matched
hash brown
hash brown patties
matched
hash brown patties
hash brown potato cubes
matched
hash brown potato cubes
hash brown potatoes
matched
hash brown potatoes
hash brown potatoes onion
matched
hash brown potatoes onion
hash browns
matched
hass avocado
hass avocado mashed
matched
hass avocado mashed
hass avocados
matched
havarti
havarti cheese
matched
havarti cheese
havarti cheese cubes
matched
havarti cheese cubes
havarti cubes
ma

matched
ice slush
ice slush consistency
matched
ice water
ice water ice
matched
ice water ice
ice water ice cubes
matched
ice water ice cubes
ice water if
matched
ice water if
ice water if needed
matched
ice water needed
ice water plus
matched
ice water plus
ice water reduce
matched
ice water reduce
ice water reduce sharpness
matched
iceberg
iceberg lettuce
matched
iceberg lettuce
iceberg lettuce bite
matched
iceberg lettuce bite
iceberg lettuce cored
matched
iceberg lettuce cored
iceberg lettuce julienned
matched
iceberg lettuce leaves separated
iceberg lettuce wedges
matched
iced tea
iced tea mix
matched
iced tea mix
iced tea powder
matched
iced water
iced water needed
matched
icing
icing sugar
matched
idaho
idaho dice
matched
idaho potato
idaho potatoes
matched
idaho potatoes
idaho potatoes scrubbed but unpeeled
matched
idahoan applewood smoked
idahoan applewood smoked bacon
matched
idahoan baby reds
idahoan baby reds mashed
matched
idahoan baby reds mashed
idahoan baby reds peas
ma

matched
juice cocktail
juice cocktail clamato
matched
juice cocktail clamato
juice cocktail v8
matched
juice cocktail v8
juice cold
matched
juice cold
juice concentrate
matched
juice concentrate
juice concentrate any
matched
juice concentrate any
juice concentrate any flavor
matched
juice concentrate any flavor
juice concentrate any flavor except
matched
juice concentrate undiluted
juice concentrate water
matched
juice cranberry juice cocktail
juice crazy
matched
juice crazy
juice crazy steve
matched
juice crazy steve
juice crazy steve cajun
matched
juice drain juice
juice drain juice reserve
matched
juice jarred pepperoncini
juice jarred pepperoncini peppers
matched
juice jarred pepperoncini peppers
juice juice
matched
juice juice
juice juice reserved
matched
juice lemon
juice lemon juice
matched
juice lemon juice
juice lemons
matched
juice lightly
juice lightly hands
matched
juice lightly hands
juice lime
matched
juice lime
juice lime juice seasoned
matched
juice not concentrate
juic

leg confit
matched
leg lamb
leg lamb at
matched
leg lamb at
leg lamb at room
matched
leg lamb bone
leg lamb butterflied
matched
leg lamb deboned
leg lamb meat
matched
leg steaks
legs
matched
legs
legs if
matched
legs if
legs patted
matched
lemon
lemon balm leaves
matched
lemon balm leaves
lemon basil
matched
lemon basil
lemon basil dressing
matched
lemon basil dressing
lemon basil taste
matched
lemon cake
lemon cake mix
matched
lemon carbonated
lemon carbonated beverage
matched
lemon curd
lemon curd any flavor
matched
lemon gelatin
lemon gelatin mix
matched
lemon gelatin mix
lemon gin
matched
lemon gin
lemon glaze
matched
lemon glaze
lemon grass
matched
lemon grass
lemon grass bruised
matched
lemon grass bruised
lemon grass bulb
matched
lemon grass bulb
lemon grass smashed
matched
lemon grass smashed
lemon grass white
matched
lemon grass white
lemon grass white part
matched
lemon grass white part
lemon grass white part only
matched
lemon half
lemon halves reserved
matched
lemon ice
lem

matched
loma linda big
loma linda big franks
matched
london broil
london broil roast
matched
london broil roast
london broil steak
matched
london broil steak
london broil steak about inches
matched
london broil steak about inches
long
matched
long
long asparagus spears
matched
long beans
long beans bite
matched
long beans bite
long beans halves
matched
long chinese eggplants
long chives
matched
long chives
long chives tying
matched
long even
long french
matched
long french
long french fries
matched
long french fries
long french fry
matched
long french fry
long french fry shaped
matched
long french fry shaped
long french fry shaped strands
matched
long grain
long grain brown
matched
long grain brown
long grain brown rice
matched
long grain grain
long grain rice
matched
long grain rice
long grain rice minute
matched
long grain white
long grain white rice
matched
long grain white rice
long grain white rice cooled
matched
long grain white rice cooled
long grain white rice not
matched
long 

mayonnaise hellmann
matched
mayonnaise hellmann
mayonnaise hellmann best
matched
mayonnaise hellmann best
mayonnaise hellmann light
matched
mayonnaise hellmann light
mayonnaise hellmann low
matched
mayonnaise hellmann low
mayonnaise if
matched
mayonnaise if
mayonnaise if needed
matched
mayonnaise if needed
mayonnaise kewpie
matched
mayonnaise kewpie
mayonnaise kraft
matched
mayonnaise made olive
mayonnaise made olive oil
matched
mayonnaise needed
mayonnaise not salad dressing
matched
mayonnaise not salad dressing
mayonnaise plus
matched
mayonnaise plus
mayonnaise salad dressing
matched
mayonnaise taste
mayonnaise trader
matched
mayonnaise trader
mayonnaise trader joe
matched
mazola chicken flavor bouillon
mazola chicken flavor bouillon powder
matched
mazola corn oil
mazola corn oil deep
matched
mccormick
mccormick bay leaves
matched
mccormick black pepper
mccormick black pepper coarse
matched
mccormick black pepper coarse
mccormick black pepper coarse grind
matched
mccormick cajun seaf

matched
milk plus if
milk plus if necessary
matched
milk plus needed
milk plus taste
matched
milk powder
milk powder nesquik
matched
milk ricotta cheese
milk ricotta cheese at
matched
milk ricotta cheese at
milk ricotta cheese at room
matched
milk scalded
milk scalded cooled
matched
milk soy creamer
milk soy dream
matched
milkfish bangus
milkfish bangus taste
matched
milled flax seed
milled flax seeds
matched
miller high life
miller lite
matched
miller lite
millet
matched
millet
millet chia bread
matched
mincemeat
mincemeat crumbled
matched
mincemeat crumbled
mincemeat crumbled taste
matched
mincemeat filling topping crosse
mincemeat pie
matched
mincemeat pie
mincemeat pie filling
matched
minestrone soup
mini
matched
mini
mini bagels
matched
mini bagels
mini bagels split
matched
mini candy
mini candy coated
matched
mini candy coated
mini candy coated chocolate
matched
mini candy coated chocolate
mini candy coated chocolates
matched
mini cheese
mini cheese filled
matched
mini cheese fil

neck turkey excluding liver
necks
matched
nectar
nectar honey
matched
nectar kern
nectar needed
matched
nectarine
nectarine do not
matched
nectarine do not
nectarine do not peel
matched
nectarine do not peel
nectarine stone
matched
nectarine stone
nectarines
matched
nectarines
nectarines blueberries
matched
nectarines blueberries
nectarines blueberries strawberries
matched
nectarines blueberries strawberries
nectarines cubes
matched
needed
needed blending
matched
needed consistency
needed cover
matched
needed mazola pure no
needed mazola pure no stick
matched
needed mazola pure no stick
needed pan
matched
needed pan
needed pectin
matched
needed thicken
needed thicken sauce
matched
needed thicken sauce
needed thirds
matched
negi
negra modelo
matched
nesquick
nesquick taste
matched
nesquick taste
nesquik
matched
nestle
nestle abuelita
matched
nestle butterfinger bites
nestle butterfinger bites candy
matched
nestle carnation
nestle carnation evaporated
matched
nestle carnation evaporated


matched
old paso taco seasoning
old paso taco shells
matched
old paso zesty
old paso zesty sour
matched
old world traditional pasta
old world traditional sauce
matched
olive
olive bread
matched
olive bread
olive brine
matched
olive brine
olive brine needed
matched
olive garden
olive garnish
matched
olive half
olive halves
matched
olive juice
olive juice adjust
matched
olive juice adjust
olive juice adjust personal
matched
olive oil
olive oil aged balsamic vinaigrette
matched
olive oil bertolli
olive oil brush
matched
olive oil brush
olive oil brushing
matched
olive oil canola
olive oil canola oil
matched
olive oil cracked pepper
olive oil cracked pepper reduced
matched
olive oil cracked pepper reduced
olive oil desired
matched
olive oil drizzle over
olive oil drizzling
matched
olive oil greasing pan
olive oil herbs
matched
olive oil herbs
olive oil if
matched
olive oil if
olive oil marinating
matched
olive oil melted
olive oil melted butter
matched
olive oil melted butter
olive oil nee

matched
paprika plus
paprika plus garnish
matched
parboiled long grain rice
parboiled long grain white
matched
parboiled long grain white
parboiled long grain white rice
matched
parboiled long grain white rice
parboiled rice
matched
parboiled rice
parboiled rice uncle
matched
parchment paper
parchment paper this recipe
matched
pared
pared cored
matched
pareve margarine
pareve margarine melted
matched
parkay original spread tub
parkay original spread tub melted
matched
parmesan cheese
parmesan cheese croutons
matched
parmesan cheese garnish
parmesan cheese grated
matched
parmesan cheese grated
parmesan cheese if
matched
parmesan cheese if
parmesan cheese needed
matched
parmesan cheese needed
parmesan cheese plus
matched
parmesan cheese plus
parmesan cheese plus extra
matched
parmesan cheese plus extra
parmesan cheese rind
matched
parmesan cheese rind
parmesan cheese serving
matched
parmesan cheese serving
parmesan cheese shaved
matched
parmesan cheese shaved
parmesan cheese shavings
mat

matched
pesto taste
pesto three
matched
pesto three
pesto three bridges
matched
pet
pet evaporated
matched
pet evaporated
pet evaporated milk
matched
petals
petals taste
matched
petals taste
petals white base off
matched
peter pan creamy peanut
peter pan creamy peanut butter
matched
peter pan crunchy peanut
peter pan crunchy peanut butter
matched
peter pan crunchy peanut butter
petes
matched
petit ecolier european biscuits
petite
matched
petite
petite green peas
matched
petite tomatoes
petite tomatoes chipotle
matched
petite tomatoes chipotle
petite tomatoes green
matched
petite tomatoes green
petite tomatoes juice
matched
petite tomatoes juice
petite tomatoes onions
matched
petite tomatoes onions
petite tomatoes well
matched
pheasant
pheasant breast
matched
pheasant breast
pheasant breast halves
matched
pheasant breast halves
pheasant breast halves cleaned
matched
pheasant cleaned
pheasant deboned skinned
matched
pheasant deboned skinned
pheasant meat
matched
philadelphia
philadelphia

matched
pomegranate skin light colored
pomegranate syrup
matched
pomegranate syrup
pomegranate vodka
matched
pomegranate vodka
pomegranate vodka van
matched
ponzu sauce
ponzu sauce citrus soy
matched
ponzu sauce citrus soy
ponzu sauce citrus soy based
matched
popcorn microwave air popped
popcorn oil
matched
popcorn oil
popcorn popped
matched
popcorn salt
popcorn taste
matched
popped popcorn
popped popcorn garnish
matched
popped popcorn microwave
popped popcorn microwave air
matched
popped popcorn microwave air
popped popcorn microwave air popped
matched
poppy seed
poppy seed dressing
matched
poppy seed rolls
poppy seed salad
matched
poppy seed salad
poppy seed salad dressing
matched
poppy seed salad dressing
poppy seeds
matched
poppy seeds
poppy seeds taste
matched
porcini
porcini mushrooms
matched
porcini mushrooms
porcini mushrooms cleaned
matched
porcini mushrooms cleaned
pork
matched
pork
pork baby back ribs
matched
pork beans
pork beans tomato
matched
pork butt
pork butt roast
mat

quinces cored
quinces cored peels
matched
quinoa
quinoa elbow pasta
matched
quinoa taste
quinoa trio
matched
quorn
quorn chicken
matched
quorn chicken
quorn chicken recipe
matched
quorn chicken recipe
quorn chicken recipe tenders
matched
rabbit
rabbit cleaned
matched
rabbit meat
rabbit meat cleaned
matched
rabbit patted
rabbit up
matched
rabbits dressed deboned
rabbits dressed deboned bones
matched
rabbits dressed deboned bones
rabe
matched
rabe
rabe broccoli florets
matched
rack beef
rack beef back
matched
rack beef back
rack beef back rib
matched
rack beef back rib
rack beef ribs
matched
racks lamb
racks lamb fat
matched
racks lamb fat
racks lamb ribs
matched
racks smithfield extra tender pork
racks smithfield extra tender st
matched
radicchio
radicchio cored
matched
radish
radish grated
matched
radish matchstick
radish matchsticks
matched
radish sprouts
radish sprouts taste
matched
radish sticks
radishes
matched
radishes
radishes ends
matched
radishes ends
radishes ends radishes
mat

reduced fat cheese mexican
matched
reduced fat cheese mexican
reduced fat cheese mexican cheese
matched
reduced fat coconut
reduced fat coconut milk
matched
reduced fat coconut milk
reduced fat colby
matched
reduced fat colby
reduced fat colby cheese
matched
reduced fat cottage cheese
reduced fat cream
matched
reduced fat cream
reduced fat cream celery
matched
reduced fat cream celery
reduced fat cream cheese
matched
reduced fat cream cheese
reduced fat cream cheese neufchatel
matched
reduced fat cream cheese neufchatel
reduced fat cream cheese room
matched
reduced fat cream cheese room
reduced fat cream cheese softened
matched
reduced fat cream cheese softened
reduced fat cream cheese spread
matched
reduced fat cream cheese spread
reduced fat cream chicken
matched
reduced fat cream chicken
reduced fat cream mushroom
matched
reduced fat cream mushroom
reduced fat creamy ranch
matched
reduced fat creamy ranch
reduced fat creamy ranch dressing
matched
reduced fat crescent roll dough
redu

matched
roasted pecans
roasted peking duck meat
matched
roasted pistachio nuts
roasted pistachios
matched
roasted pumpkin
roasted pumpkin seed kernels
matched
roasted pumpkin seed kernels
roasted pumpkin seeds
matched
roasted pumpkin seeds
roasted red
matched
roasted red
roasted red bell
matched
roasted red bell
roasted red bell pepper
matched
roasted red bell pepper
roasted red bell peppers
matched
roasted red pepper
roasted red pepper garlic
matched
roasted red pepper garlic
roasted red pepper hummus
matched
roasted red pepper hummus
roasted red pepper tomato sauce
matched
roasted red pepper tomato sauce
roasted red peppers
matched
roasted red peppers
roasted red peppers liquid
matched
roasted red peppers liquid
roasted red peppers oil
matched
roasted red peppers oil
roasted red peppers patted
matched
roasted red potatoes
roasted red sweet
matched
roasted red sweet
roasted red sweet peppers
matched
roasted red yellow
roasted red yellow peppers
matched
roasted red yellow peppers
roast

matched
sargento fine traditional
sargento off
matched
sargento off
sargento off block
matched
sargento reduced fat
sargento reduced fat cheese
matched
sargento reduced fat cheese
sargento reduced fat mild
matched
sargento reduced fat mild
sargento reduced sodium
matched
sargento reduced sodium
sargento reduced sodium mild
matched
sargento reduced sodium mild
sargento reduced sodium mozzarella
matched
sargento state cheddar
sargento state cheddar cheese
matched
sashimi grade tuna
sashimi grade tuna steak
matched
sashimi grade tuna steak
sashimi grade tuna steaks
matched
satay sauce
satay sauce taste
matched
sauce
sauce a1
matched
sauce a1
sauce a1 steak
matched
sauce bull eye
sauce bush
matched
sauce bush
sauce bush best
matched
sauce canned chilies
sauce canned chipotle peppers
matched
sauce dei fratelli
sauce del monte
matched
sauce garlic onions
sauce garnish
matched
sauce golden curry
sauce golden dragon
matched
sauce golden dragon
sauce goya
matched
sauce goya
sauce goya salsa
mat

matched
shallot
shallot crisps
matched
shallot crisps
shallot crosswise
matched
shallot vinaigrette recipe below
shallots
matched
shallots
shallots julienned
matched
shank
shank bone
matched
shank osso buco
shanks
matched
shanks
shanks on bone
matched
shape strands
shaped
matched
shaped
shaped chewy fruit snacks
matched
shaped pasta
shaped pasta fusilli
matched
shaped pasta fusilli
shaped pasta gemilli
matched
shaped pretzels
shaped pretzels utz
matched
shaped strands
shapes
matched
shark steaks
shark steaks chunks
matched
shark steaks chunks
sharp
matched
sharp
sharp cheddar
matched
sharp cheddar
sharp cheddar cheese
matched
sharp cheddar cheese
sharp cheddar cheese grated
matched
sharp cheddar cheese grated
sharp cheddar cheese plus
matched
sharp cheddar cheese plus
sharp cheddar cheese singles
matched
sharp cheddar cheese singles
sharp cheddar cheese spread
matched
sharp new york
sharp new york cheddar
matched
sharp new york cheddar
sharp new york cheddar cheese
matched
sharp proces

sloppy joe
sloppy joe sauce
matched
sloppy joe sauce
sloppy joe seasoning mix
matched
slow cooker liner
slow cookers
matched
smart
smart balance brand
matched
smart balance brand
smart balance brand may
matched
smart balance brand may
smart balance brand may use
matched
smart balance brand may use
smart balance light
matched
smart balance light
smart balance melted
matched
smart balance melted
smart balance omega
matched
smashed
smashed apricots
matched
smith
smith apple
matched
smith apple
smith apple cored
matched
smith apple cored
smith apple matchstick
matched
smith apple unpeeled
smith apple unpeeled cored
matched
smith apple unpeeled cored
smith apples
matched
smith apples
smith apples bite
matched
smith apples bite
smith apples chunks
matched
smith apples chunks
smith apples cored
matched
smith apples cored
smith apples other firm
matched
smithfield extra tender pork back
smithfield extra tender pork spareribs
matched
smithfield extra tender pork spareribs
smithfield extra tende

matched
split down back
split down its length
matched
split half
split half then
matched
split heated
split hinged
matched
split hinged
split husked mung beans
matched
split pea flour
split peas
matched
split peas
split peas moong dal
matched
split peas moong dal
split peas tuvar dal
matched
split seeds scraped
split seeds scraped out
matched
split yellow lentils moong
split yellow lentils moong dal
matched
split yellow lentils moong dal
split yellow peas
matched
sponge
sponge biga
matched
sponge cake
sponge cake cubes
matched
sponge cake cubes
sponge cake shells
matched
spoon
spoon powdered sugar
matched
sport pepper sauce
sport peppers
matched
sports drink
sports drink gatorade
matched
spray
spray blueberries cleaned
matched
spread
spread almond butter peanut
matched
spread see footnote
spread see footnote recipe
matched
spreadable cheese alouette
spreadable cheese boursin
matched
spreadable cream cheese
spreadable fruit
matched
spreadables butter original melted
spreadables melted
m

sugar sugar raw
matched
sugar sugar raw
sugar sugar substitute honey
matched
sugar vanilla extract
sugars
matched
sukiyaki ingredients
sukiyaki sauce
matched
sumac
sumac powder
matched
summer savory
summer savory chubritsa
matched
summer squash
summer squash chunks
matched
summer squash zucchini
summer squashes
matched
sun
sun gold
matched
sun tomato
sun tomato halves
matched
sun tomato halves
sun tomato oil
matched
sun tomato oil
sun tomato oregano
matched
sun tomato paste
sun tomato pesto
matched
sun tomato spread
sun tomatoes
matched
sun tomatoes
sun tomatoes marinated
matched
sun tomatoes marinated
sun tomatoes not
matched
sun tomatoes not
sun tomatoes not oil
matched
sun tomatoes not oil
sun tomatoes oil
matched
sun tomatoes oil
sun tomatoes rehydrated
matched
sundried tomato mayonnaise see
sundried tomato mayonnaise see below
matched
sundried tomato mayonnaise see below
sundried tomatoes
matched
sunflower kernels
sunflower meal
matched
sunflower meal
sunflower oil
matched
sunflow

tart apple
tart apple cored
matched
tart apple cored
tart apple fugi
matched
tart apple fugi
tart apple granny
matched
tart apple granny
tart apples
matched
tart apples
tart apples cored
matched
tart cherries
tart cherries door
matched
tart cooking apples
tart cooking apples cored
matched
tart green apple
tart green apples
matched
tart green apples
tart green apples cored
matched
tart red
tart red apple
matched
tart shells
tart shells baked
matched
tartar
tartar sauce
matched
tartaric acid
tarts
matched
tasso ham
tasso ham matchsticks
matched
taste
taste broken
matched
taste flaked
taste grated
matched
taste leaves
taste leaves stems
matched
taste plus extra
taste plus extra dusting
matched
taste see ingredient
taste see ingredient note
matched
taste see ingredient note
taste serving
matched
taste thai
taste thai peanut
matched
taste thai peanut
taste thai peanut satay
matched
taste wear
taste wear gloves
matched
tater
tater tots
matched
tea
tea bags
matched
tea bags
tea bags black tea

turkey meat
turkey meat light
matched
turkey meat light
turkey meat light dark
matched
turkey meat pulled
turkey meat taste
matched
turkey meatballs
turkey neck
matched
turkey neck
turkey neck giblets
matched
turkey neck giblets
turkey neck giblets reserved
matched
turkey neck heart gizzard
turkey neck stock
matched
turkey neck stock
turkey necks
matched
turkey pan drippings
turkey pan drippings fat
matched
turkey pan drippings fat
turkey pan drippings fat skimmed
matched
turkey pan drippings fat skimmed
turkey pan drippings turkey
matched
turkey pepperoni
turkey pepperoni hormel
matched
turkey plain
turkey plain smoked
matched
turkey sausage
turkey sausage bite
matched
turkey sausage bite
turkey sausage casings
matched
turkey sausage casings
turkey sausage casings discarded
matched
turkey sausage casings discarded
turkey sausage chunks
matched
turkey sausage chunks
turkey sausage crumbles
matched
turkey sausage link without casing
turkey sausage links
matched
turkey sausage links
turk

vinaigrette salad dressing taste
vinaigrette taste
matched
vine ripe tomato
vine ripe tomatoes
matched
vine ripened tomato cored
vine ripened tomatoes
matched
vinegar
vinegar acidity
matched
vinegar based carolina
vinegar based carolina bbq
matched
vinegar based hot pepper
vinegar based hot pepper sauce
matched
vinegar plus
vinegar potato chips
matched
vinegar rice vinegar
vinegar rice wine vinegar
matched
vinegar taste
vinegar white wine vinegar
matched
virgin coconut oil
virgin coconut oil melted
matched
virgin coconut oil melted
virgin coconut oil room
matched
virgin coconut oil room
virgin coconut oil room temperature
matched
virgin olive oil
virgin olive oil aged balsamic
matched
virgin olive oil brushing
virgin olive oil canola
matched
virgin olive oil canola
virgin olive oil drizzling
matched
virgin olive oil drizzling
virgin olive oil pesto
matched
virgin olive oil pesto
virgin olive oil plus
matched
vital wheat gluten
vital wheat gluten flour
matched
vitar liquid
vitar liquid 

matched
without casing
without casings
matched
without flavor lightly
without oil
matched
without oil
without peanuts
matched
without peanuts
without peel
matched
without peel
without pulp
matched
without shells
without skin
matched
without skin
without tails
matched
without tails
without tails deveined
matched
won
won ton
matched
won ton
won ton wrappers
matched
wonton
wonton filling
matched
wonton wrappers
wonton wrappers needed
matched
wonton wrappers needed
wontons
matched
wood chips
wood chips needed
matched
wooden skewers
wooden skewers coffee
matched
wooden skewers coffee
wooden skewers coffee stirrers
matched
wooden skewers coffee stirrers
wooden skewers long
matched
wooden skewers long
wooden skewers needed
matched
wooden skewers needed
wooden skewers water
matched
wooden skewers water
wooden stake
matched
wooden stake
wooden sticks
matched
worcestershire
worcestershire sauce
matched
worcestershire sauce
worcestershire sauce lea
matched
worcestershire sauce lea
worcestershire 

In [97]:
cleaned_ingredients_set = list(ingredients_set)
for dupe in duplicates:
    cleaned_ingredients_set.remove(dupe)
cleaned_ingredients_set.sort()
print("NUM OF INGREDIENTS AFTER HAMMING %d" % len(cleaned_ingredients_set))

NUM OF INGREDIENTS AFTER HAMMING 19147


In [98]:
write_list_file('ingredients_set.txt', cleaned_ingredients_set)

In [99]:
counts = term_counts(cleaned_ingredients_set)
counts_series = pd.Series(counts)
counts_series.sort_values(ascending=False).head(1000).to_csv('counts.csv')

In [100]:
len(cleaned_ingredients_set)

19147

In [101]:
cleaned_ingredients_set

['12x12 inches each reynolds wrap',
 '12x12 squares aluminum foil',
 '12x16 parchment paper',
 '12x16 squares aluminum',
 '12x18 inches each reynolds wrap',
 '12x18 nylon',
 '12x20 parchment',
 '12x36 inches reynolds wrap non',
 '18x15 inches each reynolds wrap',
 '18x24 inches reynolds wrap heavy',
 '1x1 matchsticks',
 '24x8x1 untreated cedar plank',
 '2x1 cubes',
 '2x2 sourdough bread',
 '2x3',
 '3x6',
 '4x4',
 '60l grain bag',
 '6x1',
 '6x9 sheet puff',
 '7up cherry',
 '7up needed',
 '7up taste',
 '8x2 inches',
 '9x9 pan cornbread cooled',
 'a1 steak sauce',
 'a1 taste',
 'abalone without shell',
 'about degrees degrees',
 'about diameter red bliss',
 'about each down',
 'about inches long',
 'about lime',
 'about sea salt',
 'about thickness',
 'above',
 'absinthe based bitters',
 'absinthe substitute',
 'absolut citron',
 'absolut ruby red',
 'abuelita',
 'ac cent',
 'acai berry sorbet',
 'acai juice',
 'acai pulp sambazon',
 'accent flavor enhancer',
 'accents organic sprouted ri