Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
summerstay authored May 15, 2020
1 parent 71d9024 commit 7cbcb10
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion for_making_data_files/make_rhyming_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,18 @@
word_tokens=tokenizer.encode(word,add_prefix_space = True)
syll_count = min(syllable_count_dictionary[word.upper()],9)
syllable_tokens[syll_count] = syllable_tokens[syll_count].union(word_tokens)

for word in rhyme_dictionary:
rhyme_part = rhyme_dictionary[word]
rhyming_words = reverse_rhyme_dictionary[rhyme_part]
for rhyming_word in rhyming_words:
these_rhyming_tokens=tokenizer.encode(rhyming_word.capitalize(),add_prefix_space = True)
rhyming_tokens[word] = rhyming_tokens[word].union(set(these_rhyming_tokens))
word_tokens=tokenizer.encode(word.capitalize(),add_prefix_space = True)
syll_count = min(syllable_count_dictionary[word.upper()],9)
syllable_tokens[syll_count] = syllable_tokens[syll_count].union(word_tokens)

with open("rhyming_tokens.p","wb") as f:
pickle.dump(rhyming_tokens, f)
pickle.dump(syllable_tokens, f)
with open("syllable_tokens.p","wb") as f:
pickle.dump(syllable_tokens, f)

0 comments on commit 7cbcb10

Please sign in to comment.