In [1]:
import numpy as np
import string
from functools import partial
from collections import defaultdict
import pandas as pd

In [2]:
def rec_dd(depth=0):
    if depth == 2:
        return 0
    
    return defaultdict(partial(rec_dd, depth + 1))

In [3]:
initial = {} # start of a phrase (first word)
first_order = rec_dd()
second_order = rec_dd()

In [4]:
def remove_punctuation(s):
    return s.translate(str.maketrans('','',string.punctuation))

In [5]:
def add2dict(d, k, v):  #dictionary, key, value = list of possible next words
    if k not in d:
        d[k] = []
    d[k].append(v)   # [cat, cat, dog, dog, dog, mouse, ...]

In [6]:
mc_quotes = pd.read_csv("mckenna_quotes.csv")
for row in mc_quotes["quotes"]:    # traverse poems, populate dictionary
    tokens = remove_punctuation(row.rstrip().lower()).split()    # turns every line into list of words

    T = len(tokens)
    for i in range(T):
        t = tokens[i]
        if i == 0:
            initial[t] = initial.get(t, 0.) + 1 # if t does not exist in dict, it gets created (assigned value 0, immediately added 1)
        
        else:
            t_1 = tokens[i-1]
            
            if i == T - 1:   # checking if end of sentence (starts counting at 0, thus T-1)
                if 'END' not in second_order[(t_1, t)]:
                    second_order[(t_1, t)]['END'] = 0 
                second_order[(t_1, t)]['END'] += 1    
                
            if i == 1:  # when given only the first word
                first_order[t_1][t] += 1
                
            else:
                t_2 = tokens[i-2]
                
                if t not in second_order[(t_1, t)]:
                    second_order[(t_2, t_1)][t] = 0
                second_order[(t_2, t_1)][t] += 1

In [7]:
second_order

defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 1),
            {('we',
              'call'): defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 2), {'reality': 1,
                          'imagination': 1,
                          'understanding': 1,
                          'three': 1,
                          'the': 1,
                          'civilization': 1,
                          'styles': 1,
                          'motifs': 1}),
             ('what',
              'we'): defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 2), {'call': 1,
                          'are': 2,
                          'have': 1,
                          'experience': 1,
                          'think': 1,
                          'say': 1,
                          'represent': 1,
                          'might': 1,
                          'need': 1,
                          'really': 1,
                          'seem': 1,
  

In [8]:
first_order

defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 1),
            {'what': defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 2),
                         {'we': 8,
                          'civilization': 1,
                          'is': 7,
                          'blinds': 2,
                          'they': 1,
                          'the': 4,
                          'was': 1,
                          'i': 3,
                          'happens': 1,
                          'im': 1,
                          'history': 1,
                          'good': 1,
                          'people': 1,
                          'you': 1,
                          'foods': 1,
                          'ive': 1}),
             'notice': defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 2),
                         {'that': 1}),
             'nature': defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 2),
          

In [9]:
first_order.values()

dict_values([defaultdict(..., {'we': 8, 'civilization': 1, 'is': 7, 'blinds': 2, 'they': 1, 'the': 4, 'was': 1, 'i': 3, 'happens': 1, 'im': 1, 'history': 1, 'good': 1, 'people': 1, 'you': 1, 'foods': 1, 'ive': 1}), defaultdict(..., {'that': 1}), defaultdict(..., {'loves': 2, 'is': 4, 'has': 1, 'and': 1}), defaultdict(..., {'are': 5, 'see': 1, 'have': 1, 'put': 1, 'arent': 1, 'know': 1, 'will': 1, 'may': 1, 'can': 1, 'could': 1}), defaultdict(..., {'are': 4, 'can': 1}), defaultdict(..., {'or': 1}), defaultdict(..., {'imagination': 3, 'most': 1, 'real': 5, 'cost': 1, 'drugs': 1, 'major': 1, 'world': 6, 'problem': 2, 'truth': 2, 'tension': 1, 'only': 4, 'main': 2, 'bigger': 1, 'artist’s': 1, 'overriding': 1, 'long': 1, 'surface': 1, 'psychedelic': 10, 'internet': 2, 'smart': 1, 'shaman': 5, 'history': 2, 'drug': 1, 'word': 1, 'mind': 3, 'apocalypse': 2, 'engineers': 1, 'culture': 1, 'future': 1, 'logos': 1, 'question': 1, 'reason': 2, 'propsychedelic': 1, 'leading': 1, 'way': 2, 'psychede

In [10]:
# normalize the distributions (turning counts into percentage of total)
initial_total = sum(initial.values())
for t, c in initial.items():
    initial[t] = c / initial_total # maximum likelihood estimate
    

for t, c in first_order.items():    # new in ver2
    first_order_subtotal = sum(c.values())
    for sub_t, sub_c, in c.items():
         first_order[t][sub_t] = sub_c / first_order_subtotal
    
for t, c in second_order.items():    # new in ver2
    second_order_subtotal = sum(c.values())
    for sub_t, sub_c, in c.items():
         second_order[t][sub_t] = sub_c / second_order_subtotal
            
            
    
#second_order_total = sum(second_order.values())  # new in ver2
#for k, ts in second_order.items():
#    second_order[k] = c / second_order_total    

In [11]:
# convert [cat, cat, cat, dog, dog, dot, mouse, ...] into {cat: 0.5, dog: 0.4, mouse: 0.1}

#def list2pdict(ts):
    #d = {}
    #n = len(ts)
    #for t in ts:
        #d[t] = d.get(t, 0.) + 1
    #for t, c in d.items():
        #d[t] = c / n
    #return d

In [12]:
#for t_1, ts in first_order.items():
    # replace list with dictionary of probabilities
    #first_order[t_1] = list2pdict(ts)   

In [13]:
#for k, ts in second_order.items():
    #second_order[k] = list2pdict(ts)

In [14]:
def sample_word(d): # sample a word given a dict of probabilities
    p0 = np.random.random()  # randomly chooses number (just once) between 0 and 1
    #print(f"p0 = {p0}")
    cumulative = 0
    for t, p in d.items():     # token and corresponding probability
        cumulative += p    # adds probability of each token
        #print(cumulative) 
        if cumulative > p0:    # originally if p0 < cumulative
            #print(t)
            return t
    assert(False) # should never get here

In [15]:
def generate():
    for i in range(4): # generate 4 lines at a time
        sentence = []
        
        # sample initital word
        w0 = sample_word(initial)
        sentence.append(w0)
        
        # sample second word
        w1 = sample_word(first_order[w0])
        sentence.append(w1)
        
        # second-order transitions until END
        while True:
            w2 = sample_word(second_order[(w0, w1)])
            if w2 == 'END':
                break   # goes to next line
            sentence.append(w2)
            w0 = w1
            w1 = w2
        print(' '.join(sentence))  

In [16]:
generate()

if in fact it seems to be lifted
everything will come true in cyberspace things are simpler than they are male ego dominated
escape into the visual and the heart of the fractal laws that govern the world is something new and intelligent things to each other the insides of our lives and our collective past
the central figure in the offing the world at an informational level there is not culturally given to you is your responsibility because the only choice is real and what the goddess really is a prophetic dimension a glimpse of the potential of a neurotic culture against the dehumanizing values that are normally hidden from view


In [17]:
first_order

defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 1),
            {'what': defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 2),
                         {'we': 0.22857142857142856,
                          'civilization': 0.02857142857142857,
                          'is': 0.2,
                          'blinds': 0.05714285714285714,
                          'they': 0.02857142857142857,
                          'the': 0.11428571428571428,
                          'was': 0.02857142857142857,
                          'i': 0.08571428571428572,
                          'happens': 0.02857142857142857,
                          'im': 0.02857142857142857,
                          'history': 0.02857142857142857,
                          'good': 0.02857142857142857,
                          'people': 0.02857142857142857,
                          'you': 0.02857142857142857,
                          'foods': 0.02857142857142857,
                     

In [18]:
second_order

defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 1),
            {('we',
              'call'): defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 2), {'reality': 0.125,
                          'imagination': 0.125,
                          'understanding': 0.125,
                          'three': 0.125,
                          'the': 0.125,
                          'civilization': 0.125,
                          'styles': 0.125,
                          'motifs': 0.125}),
             ('what',
              'we'): defaultdict(functools.partial(<function rec_dd at 0x7f3bf4392310>, 2), {'call': 0.0625,
                          'are': 0.125,
                          'have': 0.0625,
                          'experience': 0.0625,
                          'think': 0.0625,
                          'say': 0.0625,
                          'represent': 0.0625,
                          'might': 0.0625,
                          'need': 0.0625,
  