<a href="https://colab.research.google.com/github/junting-huang/data_storytelling/blob/main/case_1_pattern.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# case_1. pattern


## 1.1 installation

In [6]:
! pip install textblob



In [7]:
! python -m textblob.download_corpora

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package conll2000 to /root/nltk_data...
[nltk_data]   Unzipping corpora/conll2000.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
Finished.


## 1.2 basic usage

In [8]:
from textblob import TextBlob

In [26]:
blob = TextBlob("When I wrote the following pages, or rather the bulk of them, I lived alone, in the woods, a mile from any neighbor, in a house which I had built myself, on the shore of Walden Pond, in Concord, Massachusetts, and earned my living by the labor of my hands only. ")

In [27]:
print(blob.tags)

[('When', 'WRB'), ('I', 'PRP'), ('wrote', 'VBD'), ('the', 'DT'), ('following', 'JJ'), ('pages', 'NNS'), ('or', 'CC'), ('rather', 'RB'), ('the', 'DT'), ('bulk', 'NN'), ('of', 'IN'), ('them', 'PRP'), ('I', 'PRP'), ('lived', 'VBD'), ('alone', 'RB'), ('in', 'IN'), ('the', 'DT'), ('woods', 'NNS'), ('a', 'DT'), ('mile', 'NN'), ('from', 'IN'), ('any', 'DT'), ('neighbor', 'NN'), ('in', 'IN'), ('a', 'DT'), ('house', 'NN'), ('which', 'WDT'), ('I', 'PRP'), ('had', 'VBD'), ('built', 'VBN'), ('myself', 'PRP'), ('on', 'IN'), ('the', 'DT'), ('shore', 'NN'), ('of', 'IN'), ('Walden', 'NNP'), ('Pond', 'NNP'), ('in', 'IN'), ('Concord', 'NNP'), ('Massachusetts', 'NNP'), ('and', 'CC'), ('earned', 'VBD'), ('my', 'PRP$'), ('living', 'NN'), ('by', 'IN'), ('the', 'DT'), ('labor', 'NN'), ('of', 'IN'), ('my', 'PRP$'), ('hands', 'NNS'), ('only', 'RB')]


In [30]:
print(blob.words)

['When', 'I', 'wrote', 'the', 'following', 'pages', 'or', 'rather', 'the', 'bulk', 'of', 'them', 'I', 'lived', 'alone', 'in', 'the', 'woods', 'a', 'mile', 'from', 'any', 'neighbor', 'in', 'a', 'house', 'which', 'I', 'had', 'built', 'myself', 'on', 'the', 'shore', 'of', 'Walden', 'Pond', 'in', 'Concord', 'Massachusetts', 'and', 'earned', 'my', 'living', 'by', 'the', 'labor', 'of', 'my', 'hands', 'only']


In [28]:
print(blob.noun_phrases)

['walden pond', 'concord', 'massachusetts']


In [29]:
print(blob.words[5].singularize())

page


In [37]:
print(blob.words[2].lemmatize('v'))

write


In [38]:
print(blob.words[4].stem())

follow


## 1.3 rule-based text generation

In [None]:
import random

In [None]:
# Dictionary of words for each part of speech
word_dict = {
    'adjectives': ['bright', 'dark', 'colorful', 'dreary', 'vibrant', 'lifeless'],
    'nouns': ['sun', 'moon', 'forest', 'desert', 'sky', 'ocean'],
    'verbs': ['shines', 'sings', 'whispers', 'screams', 'glistens', 'cries'],
    'adverbs': ['loudly', 'softly', 'brightly', 'painfully', 'carefully', 'joyfully']
}

In [None]:
# Sentence structures that the program can choose from
sentence_structures = [
    ['The', 'adjectives', 'nouns', 'verbs', 'adverbs'],
    ['adjectives', 'nouns', 'verbs', 'adverbs', 'in the', 'adjectives', 'nouns'],
    ['The', 'nouns', 'verbs', 'adverbs', 'under the', 'adjectives', 'nouns']
]

In [None]:
# Returns a random word of the given part of speech
def get_word(part_of_speech):
    return random.choice(word_dict[part_of_speech])

In [None]:
# Returns a sentence generated according to a random sentence structure
def get_sentence():
    sentence_structure = random.choice(sentence_structures)
    sentence = ' '.join(get_word(part_of_speech) if part_of_speech in word_dict else part_of_speech for part_of_speech in sentence_structure)
    return sentence.capitalize() + '.'

In [None]:
# Generates a 4-line poem
def generate_poem():
    for _ in range(4):
        print(get_sentence())

In [None]:
generate_poem()

## 1.4 regulated verse generation

In [4]:
from textblob import Word
import random
import requests

seed_words = ['love', 'moon', 'star', 'dream']
adjectives = ['bright', 'dark', 'sweet', 'silent']
nouns = ['night', 'sky', 'heart', 'light']

def generate_line(seed_word):
    adjective = random.choice(adjectives)
    noun = random.choice(nouns)
    return f"The {adjective} {seed_word} of {noun}"

def find_rhyme(word, pos):
    url = f"https://api.datamuse.com/words?rel_rhy={word}&tags={pos}"
    response = requests.get(url)
    if response.status_code == 200:
        words = response.json()
        if words:
            return words[0]['word']
    return word  # return the original word if no rhyme is found

def generate_poem():
    poem = []
    for i in range(4):  # Generate 4 lines
        seed_word = random.choice(seed_words)
        line = generate_line(seed_word)
        if i % 2 != 0:  # For every second line, find a rhyme for the last word of the previous line
            last_word = poem[i - 1].split()[-1]
            rhyme_word = find_rhyme(last_word, 'nn')  # Find a noun that rhymes
            line = line.rsplit(' ', 1)[0] + ' ' + rhyme_word  # Replace the last word with the rhyming word
        poem.append(line)
    return '\n'.join(poem)

# Generate and print the poem
print(generate_poem())

'The dark love of night\nThe sweet dream of light\nThe dark dream of night\nThe sweet star of light'