https://www.python.org/downloads/release/python-364/

In [11]:
import phonetic
import corpus
import generate_sample
import verse
import re
import nltk
import importlib
import string
import seaborn
import numpy as np
from scipy import sparse
from matplotlib import pyplot as plt
%matplotlib inline

In [16]:
nltk.download('cmudict')
nltk.download('gutenberg')
dictionary = phonetic.PhoneticDictionary()
dictionary.import_file("data/cmudict/cmudict-0.7b")

[nltk_data] Downloading package cmudict to
[nltk_data]     C:\Users\Marissa\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\cmudict.zip.
[nltk_data] Downloading package gutenberg to
[nltk_data]     C:\Users\Marissa\AppData\Roaming\nltk_data...
[nltk_data]   Package gutenberg is already up-to-date!


In [4]:
importlib.reload(corpus)

def get_rhythm_string(template, dictionary):
    
    wordstrings = template.split()
    wordlengths = np.zeros(len(wordstrings),dtype=np.int)
    nonos = string.punctuation + '0123456789'
    meter = []

    for i in range(len(wordstrings)):
        wordstring = wordstrings[i].strip(nonos).lower()
        word = dictionary.lookup(wordstring)
        wordlengths[i] = len(word.rhythm)
        meter.extend(word.rhythm)
    
    return wordlengths, np.array(meter)

In [133]:
importlib.reload(corpus)
importlib.reload(phonetic)
importlib.reload(generate_sample)
importlib.reload(verse)

corp = corpus.WordCorpus(dictionary)
corp.initialize("I know a song that gets on everybody's nerves, everybody's \
nerves, everybody's nerves, oh, I know a song that gets on everybody's nerves\
and this is how it goes", is_filename=False)

template = verse.get_sample(1)
print("Template rhythm: ", template)

wordlengths, rhythm = get_rhythm_string(template, dictionary)
print("\nSample: ", generate_sample.generate_verse(corp, wordlengths, rhythm))

Input text: 28 words, 16 unique
Template rhythm:  How does a bastard, orphan, son of a whore and a scotsman, dropped 	in the middle of a forgotten spot in the caribbean in providence impoverished 	in squalor, grow up to be a hero and a scholar?
probabilities do not sum to 1-try anything

Sample:  is how it DAda DAda know a song that gets on DAda nerves oh i DAda a song daDAda gets on da daDADAda oh DAdada DADAda a DAda that gets on DA nerves DAda nerves oh DAda


In [79]:
importlib.reload(verse)
importlib.reload(generate_sample)

templatestring = verse.get_sample(4)
print("Template rhythm: ", templatestring)

template = verse.VerseTemplate(templatestring, dictionary, breakrules='phrase')
print("\tNumber of syllables: ", len(template.stresses))
print("\tRelevant chunk: ",template.stresses[:9])
print("\tBreakpoints: ", template.breakpoints)
word = dictionary.lookup("alexander")
print("Word rhythm: ", word.rhythm)

print("\n")
print(generate_sample.scansion_score(word, 9-word.length, 4, template, False))
print("\n")
print(generate_sample.scansion_score(word, 5, 12, template, True))

Template rhythm:  buddy you're a boy make a big noise, playing in the street, 	gonna be a big man some day, you got mud on your face, you big disgrace, 	kicking your can all over the place, singing, we will, we will, rock you, 	we will, we will, rock you
	Number of syllables:  55
	Relevant chunk:  [1, 0, 1, 0, 1, 1, 0, 1, 1]
	Breakpoints:  [8, 13, 21, 27, 31, 40, 42, 44, 46, 48, 50, 52]
Word rhythm:  [2 0 1 0]


[1, 0, 1, 1]
loc_inds =  [5 8]
[0 0]
[False  True  True False]
2
0.625
0.625


[1, 0, 1, 1]
loc_inds =  [5 8]
[0 0]
[False  True  True False]
2
0.625
0.625


In [117]:
scores = np.array([0,0.25,0.5,0.6125,0.6667,0.8125,0.25,0,0,1.0,0.9125,0])
q = 75
cutoff = np.nanpercentile(scores, q)
print("cutoff: ", cutoff)
print(scores[scores>=cutoff])
print(np.nonzero(scores >= cutoff)[0])

print(np.nanpercentile(np.array([0.25,0.75]),75))

cutoff:  0.7031499999999999
[0.8125 1.     0.9125]
[ 5  9 10]
0.625


In [152]:
corp = corpus.WordCorpus(dictionary)
emma = nltk.corpus.gutenberg.words('austen-emma.txt')
emma = " ".join(emma)
corp.initialize(emma, is_filename=False)

Input text: 190529 words, 7096 unique


In [121]:
template = verse.get_sample(1)
print("Template rhythm: ", template)

wordlengths, rhythm = get_rhythm_string(template, dictionary)
print("\nSample: ", generate_sample.generate_verse(corp, wordlengths, rhythm))

Template rhythm:  How does a bastard, orphan, son of a whore and a scotsman, dropped 	in the middle of a forgotten spot in the caribbean in providence impoverished 	in squalor, grow up to be a hero and a scholar?

Sample:  i have a very handsome i was a most she had rather i have i never to the idea jane jane the daDADAda and DAdada DADAda her very much i was i had longer i had only


In [146]:
importlib.reload(corpus)
importlib.reload(phonetic)
importlib.reload(generate_sample)
importlib.reload(verse)

for i in range(5):
    template = verse.VerseTemplate(verse.get_sample(i), dictionary, \
                                   breakrules='phrase')
    template.get_rhythm()


 I know a song that gets on everybody's nerves, everybody's 	nerves, everybody's nerves. Oh, I know a song that gets on everybody's nerves, 	and this is how it goes, oh oh oh
[1, 1, 0, 1, 1, 1, 1, 1, 0, 2, 0, 1, 1, 0, 2, 0, 1, 1, 0, 2, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 2, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
[1, 2, 3, 4, 5, 6, 7, 11, 12, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44]

 I know a song that gets on everybody's nerves, everybody's 	nerves, everybody's nerves. Oh, I know a song that gets on everybody's nerves, 	and this is how it goes, oh oh oh
[1, 1, 0, 1, 1, 1, 1, 1, 0, 2, 0, 1, 1, 0, 2, 0, 1, 1, 0, 2, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 2, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
[12, 17, 22, 23, 35, 41]

 How does a bastard, orphan, son of a whore and a scotsman, dropped 	in the middle of a forgotten spot in the caribbean in providence impoverished 	in squalor, grow up to be a hero and a scholar?
[1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0

In [3]:
import numpy as np
myarr = np.zeros(10)
myarr[3:6] = 1.0
print(myarr)

[0. 0. 0. 1. 1. 1. 0. 0. 0. 0.]


In [9]:
import numpy as np

A = np.random.randint(10,size=(4,4))
print(A, '\n')
A[A<6] = 0
print(A)

print(np.nonzero(A[1,:])[0])

[[7 1 1 1]
 [4 0 4 7]
 [0 5 9 4]
 [1 9 7 2]] 

[[7 0 0 0]
 [0 0 0 7]
 [0 0 9 0]
 [0 9 7 0]]
[3]


In [2]:
mystr = "n- - - - boy, - - - noise"
print(mystr)

- - - - boy, - - - noise
