## Notebook to start expirementing with NLP analysis of Judge description from Translated CSV


In [8]:
import spacy as sp

nlp = sp.load('en_core_web_sm')


In [3]:
#First Text output 
sample_text = "Bright red color and initial aromas dominated by a spicy that coupled with fruity notes were a very interesting set, in the mouth has a pleasant presence, is fresh and delicate, always dominated by fruity notes and with an average end of length but persistent flavor."

doc = nlp(sample_text)

In [4]:
#Extract all the Adjectives
descriptors = [token.text.lower() for token in doc if token.pos_ == "ADJ"]

print(descriptors)

['bright', 'red', 'initial', 'interesting', 'pleasant', 'fresh', 'delicate', 'average', 'persistent']


In [5]:
#Experimenting with Matcher

from spacy.matcher import Matcher

matcher = Matcher(nlp.vocab)

pattern = [{"POS": "ADJ"},
          {"IS_STOP": True, "OP": "?"},
          {"POS": "NOUN"}]

matcher.add("ADJ_NOUN_PATTERN", [pattern])

matches = matcher(doc)

for match_id, start, end in matches:
    span = doc[start:end]
    print(span.text)

red color
initial aromas
interesting set
pleasant presence
average end
persistent flavor


In [6]:
flavor_lexicon = {
    "spicy": ["spicy", "peppery", "cinnamon", "clove", "nutmeg"],
    "fruity": ["fruity", "apple", "pear", "cherry", "strawberry", "citrus"],
    "floral": ["floral", "rose", "violet", "lilac", "lavender"],
}

#need to add earthy, oaky, other common wine categories and descriptors


In [7]:
extracted_flavors = []

for token in doc:
    token_text_lower = token.text.lower()
    for category, words in flavor_lexicon.items():
        if token_text_lower in words:
            extracted_flavors.append(category)

print(extracted_flavors)

['spicy', 'fruity', 'fruity']


In [9]:
#Parse Color, Finish, etc

wine_color = None
finish_descriptors = []

for sent in doc.sents:
    if "color" in sent.text.lower():
        wine_color = " ".join([t.text for t in sent if t.dep_ == "amod" or t.pos_ == "ADJ"])
    if "finish" in sent.text.lower() or "end" in sent.text.lower():
        finish_descriptors.append(sent.text)

In [10]:
print(wine_color)
print()
print(finish_descriptors)

Bright red initial interesting pleasant fresh delicate average persistent

['Bright red color and initial aromas dominated by a spicy that coupled with fruity notes were a very interesting set, in the mouth has a pleasant presence, is fresh and delicate, always dominated by fruity notes and with an average end of length but persistent flavor.']


In [11]:
wine_profile = {
    "color": wine_color,
    "flavors": list(set(extracted_flavors)),
    "finish": finish_descriptors,
}

print(wine_profile)

{'color': 'Bright red initial interesting pleasant fresh delicate average persistent', 'flavors': ['fruity', 'spicy'], 'finish': ['Bright red color and initial aromas dominated by a spicy that coupled with fruity notes were a very interesting set, in the mouth has a pleasant presence, is fresh and delicate, always dominated by fruity notes and with an average end of length but persistent flavor.']}
