# morning drinks for existencial questions

by nicolás escarpentier

## Structure of this notebook
I want to be able to switch the source dialogs, but the recipe structure will always be the same. At the same time, the list of target questions and answers will be something I input. 

The flow of this notebook will be the following:
- load recipe structures
- create tracery grammar rules
- create dialog loader
  - this creates the lists of tokens and rest of the stuff
- construct functions that let me use all from above with the input of q&a targets

## import packages

In [1]:
# python packages
import random as rng
from collections import Counter
import numpy as np
from numpy import dot
from numpy.linalg import norm

In [2]:
# spacy
import spacy
nlp = spacy.load('en_core_web_md')

In [3]:
# tracery
import tracery
from tracery.modifiers import base_english

In [4]:
# recipe scraper package
from recipe_scrapers import scrape_me

### functions
I will also define some base functions that have been used in class and are not specific to this project

In [5]:
# list functions
def remove_all(bye, words):
    while(bye in words):
        words.remove(bye)

In [6]:
# vector addition
def addv(coord1, coord2):
    return [c1 + c2 for c1, c2 in zip(coord1, coord2)]

# vector subtraction
def subtractv(coord1, coord2):
    return [c1 - c2 for c1, c2 in zip(coord1, coord2)]

# vector average
def meanv(coords):
    # assumes every item in coords has same length as item 0
    sumv = [0] * len(coords[0])
    for item in coords:
        for i in range(len(item)):
            sumv[i] += item[i]
    mean = [0] * len(sumv)
    for i in range(len(sumv)):
        mean[i] = float(sumv[i]) / len(coords)
    return mean

In [7]:
# get spacy vector
def vec(s):
    return nlp.vocab[s].vector

# get spacy sentence vector
def sentvec(s):
    sent = nlp(s)
    return meanv([w.vector for w in sent])

# cosine similarity
def cosine(v1, v2):
    if norm(v1) > 0 and norm(v2) > 0:
        return dot(v1, v2) / (norm(v1) * norm(v2))
    else:
        return 0.0

# closest word to target vector from token list
def spacy_closest(token_list, vec_to_check, n=10):
    return sorted(token_list, key=lambda x: cosine(vec_to_check, vec(x)), reverse=True)[:n]

# closest sentence to target vector from token list
def spacy_closest_sent(token_list, vec_to_check, n=10):
    return sorted(token_list, key=lambda x: cosine(vec_to_check, sentvec(x)), reverse=True)[:n]

# recipe structures

### units and extra ingredients

First, the extra ingredients and ingredients unit list is just made by hand

In [8]:
ingr_units = ["fluid oz.",
             "tablespoons",
             "teaspoons",
             "oz."]

ingr_extra = ["fresh mint leaves",
             "1 lime, cut into wedges",
             "ice cubes",
             "rimming salt",
             "1 orange, sliced",
             "twist lime zest",
             "maraschino cherries",
             "pineapple wedges"]

### instructions

Now, onto creating the scraped instructions

In [9]:
def verb_tracer(s):
    # look for the verb on the sentence
    verb = ""
    for word in s:
        if word.tag_ == "VB":
            verb = word
    # if there's no verb, return ""
    if verb is "":
        return ""
    # if the verb has children, go through them looking for "prep" and "dobj"
    elif len(list(verb.children)) > 0:
        # get the prep
        prep_children = [ch for ch in list(verb.children) if ch.dep_ == "prep"]
        # join them for tracery
        prep_text = " ".join([ch.text+" #np#" for ch in prep_children])
        # get the dobj
        dobj_children = [ch for ch in list(verb.children) if ch.dep_ == "dobj"]
        # joint them as a string
        dobj_text = ""
        for ch in dobj_children:
            dobj_text += " ".join([word.text for word in ch.subtree])
        # get the noun_chunks from the sentence and replace them with
        # a tracery placeholder in the dobj_text
        chunks = s.noun_chunks
        for ch in chunks:
            dobj_text = dobj_text.replace(ch.text, "#np#")
        # return the beautiful phrase
        return verb.text + " " + dobj_text + " " + prep_text
    # else, just return the verb + a tracery placeholder
    else:
        return verb.text+" #np#"

In [10]:
# scrape the full recipes
drinks_sources = [line.strip() for line in open('./recipe_sources.txt').readlines()]
drinks_scraped = [scrape_me(item) for item in drinks_sources]

In [11]:
# extract the instructions from the recipes
drinks_instructions = []
[drinks_instructions.append(drink.instructions()) for drink in drinks_scraped]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [12]:
# use nlp to separate the sentences and have all the data we need
nlp_instructions = [list(nlp(inst).sents) for inst in drinks_instructions]

In [13]:
# get the finishing touches
instr_finish = [instr[-1].text.strip() for instr in nlp_instructions]

In [14]:
# separate the instruction bodies
instr_nlp_body = []
[instr_nlp_body.extend(instr[:-1]) for instr in nlp_instructions]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [15]:
# and create the instructions ready for tracery
instr_body = [verb_tracer(instr).strip() for instr in instr_nlp_body]
remove_all('', instr_body)

## tracery grammar

## dialog loader

In [27]:
# turn everything into a function!
def dialog_loader(file_name):
    # load the dialog and use nlp to get the sentences
    dialog = [line.strip() for line in open(file_name).readlines()]
    dialog_sents = [line.text.strip() for line in list( nlp(' '.join(dialog)).sents )]
    # get the questions and answers
    q_a_lines = [[q_line, a_line] for q_line,a_line in zip(dialog_sents, dialog_sents[1:]) if '?' in q_line]
    # return these pairs!
    return q_a_lines

## final functions

This is what puts everything together and are the only places where you'd have to enter inputs

In [65]:
def get_drink(q_target, a_target):
    # == DEFINE FROM DIALOG SOURCE
    # define a question according to the target
    all_questions = spacy_closest_sent( dialog_q, vec(q_target), 5 )
    question = rng.choice( all_questions )
    print(question)
    print('')
    # get the target noun_chunk to "solve" and create the solution vector (question to target) 
    question_chunks = [ch.text for ch in nlp(question).noun_chunks]
    selected_chunk = rng.choice(question_chunks)
    solution_vector = subtractv(vec(q_target), sentvec(selected_chunk))
    # get the answer noun_chunks > the ingredients
    a_vector = vec(a_target)
    all_ingredients = spacy_closest(ingr_tokens, addv(solution_vector, a_vector), 15)
    # == INGREDIENTS
    # make an ingredients list
    if len(all_ingredients) >= 6:
        ingredients = rng.sample(all_ingredients, rng.randrange(3,6))
    else:
        ingredients = all_ingredients
    # and write the list with the amounts
    for ingr in ingredients:
        amnt = rng.randrange(1, 4)
        unit = rng.choice(ingr_units)
        print( str(amnt) + ' ' + unit + ' of ' + ingr )
    # add random extra ingredient
    print( rng.choice(ingr_extra) )
    print('')
    # == INSTRUCTIONS
    # select the instructions
    instructions = ''
    for i in range(rng.randrange(3, 5)):
        # select a random instruction
        instructions += rng.choice(instr_body) + '\n'
    # replace the placeholders with ingredients in the order of the list, 
    # overflowing if the index goes out of bounds
    ingr_index = 0
    while "#np#" in instructions:
        instructions = instructions.replace("#np#", ingredients[ingr_index], 1)
        ingr_index = (ingr_index+1)%len(ingredients)
    # add the finishing instruction
    instructions += rng.choice( instr_finish )
    # and print the instructions
    print(instructions)
    print("\n==========\n")

## INPUT !!

In [51]:
# filename input
filename = "./hp7_dialog.txt"
# load dialog and create the lists for q&a + answer noun_chunks > the ingredients (global)
dialog = dialog_loader(filename)
dialog_q = [pair[0] for pair in dialog]
dialog_a = [pair[1] for pair in dialog]
ingr_tokens = list(set( [ch.text for ch in nlp( ' '.join(dialog_a) ).noun_chunks] ))

## EXECUTE !!

In [66]:
# set the targets: what to solve in the question and what you want to achieve in the answer
question_target = "tired"
answer_target = "happiness"
# and execute!
get_drink(question_target, answer_target)

"Didn't I tell you this nutter was just raving as usual?

2 fluid oz. of pain
2 teaspoons of life
3 fluid oz. of soul
3 teaspoons of sense
1 oz. of longing
pineapple wedges

add pain and life
Dip soul of sense in longing into pain
combine
serve  over life
Add ice and stir.




And now create :D


In [67]:
# 2, hp7, from stressed to rested
question_target = "stressed"
answer_target = "rested"
get_drink(question_target, answer_target)

Mind, who's to say they haven't already caught and killed him without publicizing it?"

2 fluid oz. of centuries
3 tablespoons of lips
2 oz. of Nerves
pineapple wedges

punch centuries
punch lips
fill Nerves with centuries
Garnish with a lime wedge.




In [70]:
# 3, hp7, from anxious to ecstatic
question_target = "anxious"
answer_target = "ecstatic"
get_drink(question_target, answer_target)

How are you feeling?

2 teaspoons of desperation
3 tablespoons of panic
2 tablespoons of excitement
ice cubes

Pour desperation of panic onto excitement
Mix desperation of your favorite sparkling white to panic
Pour  in excitement
Fill desperation
Garnish with maraschino cherry.




### making a more dialog function

In [73]:
def get_drink2(q_target, a_target):
    # == DEFINE FROM DIALOG SOURCE
    # define a question according to the target
    all_questions = spacy_closest_sent( dialog_q, vec(q_target), 5 )
    question = rng.choice( all_questions )
    all_questions.remove(question)
    print(question)
    for q in all_questions:
        print(q)
    print('')
    # get the target noun_chunk to "solve" and create the solution vector (question to target) 
    question_chunks = [ch.text for ch in nlp(question).noun_chunks]
    selected_chunk = rng.choice(question_chunks)
    solution_vector = subtractv(vec(q_target), sentvec(selected_chunk))
    # get the answer noun_chunks > the ingredients
    a_vector = vec(a_target)
    all_ingredients = spacy_closest(ingr_tokens, addv(solution_vector, a_vector), 15)
    # == INGREDIENTS
    # make an ingredients list
    if len(all_ingredients) >= 6:
        ingredients = rng.sample(all_ingredients, rng.randrange(3,6))
    else:
        ingredients = all_ingredients
    # and write the list with the amounts
    for ingr in ingredients:
        amnt = rng.randrange(1, 4)
        unit = rng.choice(ingr_units)
        print( str(amnt) + ' ' + unit + ' of ' + ingr )
    # add random extra ingredient
    print( rng.choice(ingr_extra) )
    print('')
    # == INSTRUCTIONS
    # select the instructions
    instructions = ''
    for i in range(rng.randrange(3, 5)):
        # select a random instruction
        instructions += rng.choice(instr_body) + '\n'
    # replace the placeholders with ingredients in the order of the list, 
    # overflowing if the index goes out of bounds
    ingr_index = 0
    while "#np#" in instructions:
        instructions = instructions.replace("#np#", ingredients[ingr_index], 1)
        ingr_index = (ingr_index+1)%len(ingredients)
    # add the finishing instruction
    instructions += rng.choice( instr_finish )
    # and print the instructions
    print(instructions)
    print("\n==========\n")

and test!

In [74]:
# 4, sw7, from lost to decided

# filename input
filename = "./sw7_dialog.txt"
# load dialog and create the lists for q&a + answer noun_chunks > the ingredients (global)
dialog = dialog_loader(filename)
dialog_q = [pair[0] for pair in dialog]
dialog_a = [pair[1] for pair in dialog]
ingr_tokens = list(set( [ch.text for ch in nlp( ' '.join(dialog_a) ).noun_chunks] ))

question_target = "lost"
answer_target = "decided"
get_drink2(question_target, answer_target)

It wasn't all bad, was it?
Who had it, Ducain?
Wasn't he a war hero?!
What happened -- did he hurt you?
I've had a pretty messed up day, alright?!

1 fluid oz. of They
2 tablespoons of him
3 tablespoons of I
1 teaspoons of them
pineapple wedges

release They oils and him juice
chill
Dip I of them in They into him
Makes 9 delicious drinks, perfect for any ladies night in.




In [None]:
# 5, hp5, from lost to decided

# filename input
filename = "./hp5_dialog.txt"
# load dialog and create the lists for q&a + answer noun_chunks > the ingredients (global)
dialog = dialog_loader(filename)
dialog_q = [pair[0] for pair in dialog]
dialog_a = [pair[1] for pair in dialog]
ingr_tokens = list(set( [ch.text for ch in nlp( ' '.join(dialog_a) ).noun_chunks] ))

question_target = "lost"
answer_target = "decided"
get_drink2(question_target, answer_target)