## Algorithmic Invention
### Mark Wolff<br>Hartwick College
#### ELO 2018

\>\>\> model.wv.most_similar(positive=['femme', 'roi'], negative=['homme'], topn=1)

[(u'reine', 0.8085041046142578)]

In [1]:
from IPython.display import IFrame
IFrame('http://www.ghostweather.com/files/word2vecpride/', 2100, 800)

![alt text](NCF_short_author_Flaubert_tsne_plot.svg "Word Vector Model for Flaubert")

In [2]:
import re
import pickle
import gensim
import spacy

In [3]:
discourse = 'Flaubert'
# There are four options for vector spaces of words, which represent
# different discourses, or the ways in which language is used: Trump,
# Balzac, Sand, Flaubert.
# See below.

In [4]:
assertion = u"Il faut être toujours ivre. Tout est là : " + \
    u"c'est l'unique question. Pour ne pas sentir l'horrible " + \
    u"fardeau du Temps qui brise vos épaules et vous penche " + \
    u"vers la terre, il faut vous enivrer sans trêve."
# The assertion, from Baudelaire's poem Enivrez-vous!, will be altered
# by word substitutions based on the analogy below.

positive = u'bien'
negative = u'mal'
# These two words establish the analogy for finding similar words in
# the vector space.

In [5]:
params = {
    'Flaubert':
        ['NCF_Flaubert_model',
         # vector space of words from 30 volumes by Flaubert
         
         'NCF_pos_dict.pkl',
        # a dictionary of all words in the vector space with
         # part-of-speech (POS) tags

         'fr',
         # the language of the vector space
         
         ('DET', 'PUNCT')
         # POS tags for words that will not be replaced in asserted
         # text
        ],
    'RussianTrolls':
        ['RussianTrolls_model',
         # a vector space of words from the Russian Troll tweets
         # shared by fivethirtyeight
         
         'RussianTrolls_pos_dict.pkl',
         
         'en',
         
         ('DT', 'PUNCT', 'IN')
        ]
}

number_of_options = 15
# the max number of similar words proposed from the vector space
# for each word in the asserted text.

In [6]:
model = gensim.models.Word2Vec.load(params[discourse][0])
pickleFile = open(params[discourse][1], 'rb')
posd = pickle.load(pickleFile)

nlp = spacy.load(params[discourse][2])
parsed = nlp(assertion)
words = [(w.text.lower(), w.tag_, w.lemma_.lower()) for w in parsed]
# Build a list of 3-tuples for each word in the asserted text:
# (the word in the asserted text, its POS, its lemma)

new_words = []

for word in words:
    try:
        hits = []
        # a list of vector space words to be built that will be similar
        # to a word in the asserted text.
        
        psw = word[1].split('__')[0]
        # The POS tag for a word in the asserted text.
        
        #print word[0], word[1], word[2] # for debugging
        
        for item in model.wv.most_similar(positive=[positive.lower(),
                                                    word[2]],
                                          negative=[negative.lower()],
                                          topn=number_of_options):
        # Take each word in the asserted text and look for similar words
        # in the vector space based on the analogy.
        
            #print '\t', item # for debugging
            
            if posd[item[0]]:
            # does the vector-space word have a POS tag?
            
                psd = next(iter(posd[item[0]])).split('__')[0]
                
                #print '\t\t', psd # for debugging
                
                if (psw not in params[discourse][3]) and (psw == psd):
                # We exclude certain POS words (like determiners and
                # punctuation: see above) to maintain readability in
                # the invented text. We also select words from the
                # vector space that are the same POS as the original
                # word in the asserted text.
                
                    hits.append(item[0])
                    
        if len(hits) > 0:
        # Did we find at least one vector space word with the same POS?
        # If so, display them in parentheses in the invented text.
        
            replacement = '(' + '|'.join(hits) + ')'
            new_words.append(replacement)
            
        else:
        # If we found nothing that matches, use the original word.
        
            new_words.append(word[0])
    except:
    # If something weird happens, just use the original word.
    
        new_words.append(word[0])
        
        #print 'EXCEPTION', word[0] # for debugging

response = ' '.join(new_words)

IOError: [Errno 2] No such file or directory: 'NCF_short_author_Flaubert_model'

In [None]:
print assertion, '\n'
print response

On espère être complètement fougueux. L’impossible est là : c’est l’unique œuvre. Pour davantage (peut-être) exprimer l’inspiration tyrannique du moment qui roule vos épaules et vous penche vers le banc, on espère vous effaroucher sans trêve.

In [None]:
%%HTML
<blockquote class="twitter-tweet" data-lang="en">
<p lang="en" dir="ltr">&quot;No one is born hating another person because of the
color of his skin or his background or his religion...&quot;
<a href="https://t.co/InZ58zkoAm">pic.twitter.com/InZ58zkoAm</a>
</p>&mdash; Barack Obama (@BarackObama)
<a href="https://twitter.com/BarackObama/status/896523232098078720?ref_src=twsrc%5Etfw">August 13, 2017</a></blockquote>
<script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script> 


In [None]:
%%HTML
<blockquote class="twitter-tweet" data-conversation="none" data-lang="en">
<p lang="en" dir="ltr">&quot;People must learn to hate, and if they can learn to hate,
they can be taught to love...&quot;</p>&mdash; Barack Obama (@BarackObama)
<a href="https://twitter.com/BarackObama/status/896523304873238528?ref_src=twsrc%5Etfw">August 13, 2017</a></blockquote>
<script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>  


In [None]:
%%HTML
<blockquote class="twitter-tweet" data-conversation="none" data-lang="en">
<p lang="en" dir="ltr">&quot;...For love comes more naturally to the human heart
than its opposite.&quot; - Nelson Mandela</p>&mdash; Barack Obama (@BarackObama)
<a href="https://twitter.com/BarackObama/status/896523357272911872?ref_src=twsrc%5Etfw">August 13, 2017</a></blockquote>
<script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>  


In [None]:
discourse = 'RussianTrolls'
# There are four options for vector spaces of words, which represent
# different discourses, or the ways in which language is used: Trump,
# Balzac, Sand, Flaubert.
# See below.

In [None]:
assertion = u"No one is born hating another person because of the " + \
    u"color of his skin or his background or his religion. People " + \
    u"must learn to hate, and if they can learn to hate, they can " + \
    u"be taught to love. For love comes more naturally to the " + \
    u"human heart than its opposite."
# The assertion, a tweet by Barack Obama posted August 12, 2017
# quoting Nelson Mandela, will be altered by word substitutions
# based on the analogy below.

positive = [u'white', u'charlottesville']
negative = [u'minority']
# These words establish the analogy for finding similar words in
# the vector space.

In [None]:
model = gensim.models.Word2Vec.load(params[discourse][0])
pickleFile = open(params[discourse][1], 'rb')
posd = pickle.load(pickleFile)

nlp = spacy.load(params[discourse][2])
parsed = nlp(assertion)
words = [(w.text.lower(), w.tag_, w.lemma_.lower()) for w in parsed]
# Build a list of 3-tuples for each word in the asserted text:
# (the word in the asserted text, its POS, its lemma)

new_words = []

for word in words:
    try:
        hits = []
        # a list of vector space words to be built that will be similar to a word
        # in the asserted text.
        
        psw = word[1].split('__')[0]
        # The POS tag for a word in the asserted text.
        
        #print word[0], word[1], word[2] # for debugging
        
        for item in model.wv.most_similar(positive=positive + [word[2]],
                                          negative=negative,
                                          topn=number_of_options):
        # Take each word in the asserted text and look for similar words
        # in the vector space based on the analogy.
        
            #print '\t', item # for debugging
            
            if posd[item[0]]:
            # does the vector-space word have a POS tag?
            
                psd = next(iter(posd[item[0]])).split('__')[0]
                
                #print '\t\t', psd # for debugging
                
                if (psw not in params[discourse][3]) and (psw == psd):
                # We exclude certain POS words (like determiners and punctuation: see above)
                # to maintain readability in the invented text.
                # We also select words from the vector space that are the same POS
                # as the original word in the asserted text.
                
                    hits.append(item[0])
                    
        if len(hits) > 0:
        # Did we find at least one vector space word with the same POS?
        # If so, display them in parentheses in the invented text.
        
            replacement = '(' + '|'.join(hits) + ')'
            new_words.append(replacement)
            
        else:
        # If we found nothing that matches, use the original word.
        
            new_words.append(word[0])
    except:
    # If something weird happens, just use the original word.
    
        new_words.append(word[0])
        
        #print 'EXCEPTION', word[0] # for debugging

response = ' '.join(new_words)

In [None]:
print assertion, '\n'
print response

In [None]:
IFrame('http://www.alamo.free.fr/pmwiki.php?n=Logiciels.Programmes', 2100, 800)