# NLP Poetry Generator #

## Import Files & APIs ##

In [1]:
!wget -nc https://raw.githubusercontent.com/lazyprogrammer/machine_learning_examples/master/hmm_class/robert_frost.txt
!wget -nc https://raw.githubusercontent.com/lazyprogrammer/machine_learning_examples/master/hmm_class/edgar_allan_poe.txt

File 'robert_frost.txt' already there; not retrieving.

File 'edgar_allan_poe.txt' already there; not retrieving.



In [2]:
import numpy as np
import string

np.random.seed(1234)

In [3]:
!head robert_frost.txt

Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth; 

Then took the other, as just as fair,
And having perhaps the better claim
Because it was grassy and wanted wear,
Though as for that the passing there


In [4]:
!head edgar_allan_poe.txt

LO! Death hath rear'd himself a throne
In a strange city, all alone,
Far down within the dim west
Where the good, and the bad, and the worst, and the best,
Have gone to their eternal rest.
 
There shrines, and palaces, and towers
Are not like any thing of ours
Oh no! O no! ours never loom
To heaven with that ungodly gloom!


## Data Cleaning & Preparation ##

In [5]:
samples_list = []
for line in open("robert_frost.txt", "r"):
    line = line.rstrip().lower()
    if line:
        line = line.translate(str.maketrans('', '', string.punctuation))
        samples_list.append(line)

In [6]:
samples_list[:5]

['two roads diverged in a yellow wood',
 'and sorry i could not travel both',
 'and be one traveler long i stood',
 'and looked down one as far as i could',
 'to where it bent in the undergrowth']

## Train Data Using Markov Models ##

In [7]:
# Dictionaries for initial state distributions and state transition matrices
initial = {}
first_order = {}
second_order = {}

In [8]:
# Create dictionaries to store initial state distributions, first-order transitions, and second-order transitions
for line in samples_list:
    prev_word_1 = None
    prev_word_2 = None
    tokens = line.split()
    tokens.append("END")
    for word in tokens:
        if prev_word_1 == None:
            # Initial state distribution
            initial[word] = initial.get(word, 0) + 1
            prev_word_1 = word
        elif prev_word_2 == None:
            # First-order transition matrix
            if prev_word_1 not in first_order:
                first_order[prev_word_1] = {}
            
            first_order[prev_word_1][word] = first_order[prev_word_1].get(word, 0) + 1
            prev_word_2 = prev_word_1
            prev_word_1 = word
        else:
            # Second-order transition matrix
            if prev_word_2 not in second_order:
                second_order[prev_word_2] = {}
            if prev_word_1 not in second_order[prev_word_2]:
                second_order[prev_word_2][prev_word_1] = {}
            
            second_order[prev_word_2][prev_word_1][word] = second_order[prev_word_2][prev_word_1].get(word, 0) + 1
            prev_word_2 = prev_word_1
            prev_word_1 = word

In [9]:
# Normalize initial
initial_vals_sum = sum(initial.values())
for word in initial:
    initial[word] /= initial_vals_sum

# Normalize first_order
for prev_word_1 in first_order:
    prev_word_1_sum = sum(first_order[prev_word_1].values())
    for word in first_order[prev_word_1]:
        first_order[prev_word_1][word] /= prev_word_1_sum

# Normalize second_order
for prev_word_2 in second_order:
    for prev_word_1 in second_order[prev_word_2]:
        prev_word_1_sum = sum(second_order[prev_word_2][prev_word_1].values())
        for word in second_order[prev_word_2][prev_word_1]:
            second_order[prev_word_2][prev_word_1][word] /= prev_word_1_sum

## Build Poetry Generator ##

In [10]:
class PoetryGenerator:
    def __init__(self, initial, first_order, second_order):
        self.initial = initial
        self.first_order = first_order
        self.second_order = second_order
    
    def _find_next_word(self, dictionary):
        sample = np.random.random()
        cumulative_prob = 0
        for word, prob in dictionary.items():
            cumulative_prob += prob
            if cumulative_prob > sample:
                return word
        assert(False)
    
    def generatePoem(self, num_lines):
        for i in range(num_lines):
            new_line = []
            new_word = None
            j = 0
            while new_word != "END":
                if len(new_line) <= 1:
                    dict_to_use = initial if new_word == None else first_order[new_line[0]]
                    new_word = self._find_next_word(dict_to_use)
                else:
                    new_word = self._find_next_word(second_order[new_line[j - 2]][new_line[j - 1]])
                j += 1
                new_line.append(new_word)
            print(" ".join(new_line[:-1]))

## Generate Poetry ##

In [11]:
pg = PoetryGenerator(initial, first_order, second_order)

In [12]:
pg.generatePoem(4)

i went to bed alone and left me
might just as empty
but it isnt as if and thats not all the money goes so fast
you couldnt call it living for it aint
