In [48]:
import random


def clean_text(text):
  """
  Cleans a block of text by removing punctuation and converting to lowercase.

  Args:
      text: The text to be cleaned.

  Returns:
      The cleaned text.
  """
  # Remove punctuation
  text = ''.join([char for char in text if char.isalnum() or char.isspace()])
  # Convert to lowercase
  text = text.lower()
  return text


def generate(text_collection: str, start_words: list[str], chain_length: int, num_generated: int) -> str:
  """
  Generates a sentence that imitates the style of a text collection using Markov chains.

  Args:
      text_collection: A string containing the text to use for training (cleaned beforehand).
      start_words: A list of words to start the generated sentence (must be same length as chain_length).
      chain_length: The number of words to consider when predicting the next word.
      num_generated: The number of words to generate in the output sentence (including start_words).

  Returns:
      A string containing a generated sentence that imitates the style of the input text collection.
  """

  # Clean the text collection
  cleaned_text = clean_text(text_collection)

  # Validate input
  if not cleaned_text:
    raise ValueError("Cannot generate text from an empty collection")

  if len(start_words) != chain_length:
    raise ValueError("start_words list must be the same length as chain_length")

  if chain_length <= 0:
    raise ValueError("chain_length must be a positive integer")

  # Lowercase the text and split it into words
  text = cleaned_text.split()

  # Create a dictionary to store word transitions
  transitions = {}
  for i in range(len(text) - chain_length):
    current_words = tuple(text[i:i + chain_length])
    next_word = text[i + chain_length]
    if current_words not in transitions:
      transitions[current_words] = []
    transitions[current_words].append(next_word)

  # Generate the sentence
  generated_sentence = start_words
  for _ in range(num_generated):
    current_words = tuple(generated_sentence[-chain_length:])
    if current_words not in transitions:
      # If the current word sequence is not found, choose a random word from the entire text
      next_word = random.choice(text)
    else:
      # Randomly pick a word from the words that follow the current word sequence
      next_word = random.choice(transitions[current_words])
    generated_sentence.append(next_word)

  # Return the generated sentence as a string
  return " ".join(generated_sentence)


# Sample text collection (already cleaned)
text_collection = "the sun was shining brightly in the clear blue sky birds were singing in the trees and a gentle breeze was blowing it was a perfect day for a walk in the park i wandered along the winding paths enjoying the fresh air and the beauty of nature"

# Example usage
start_words = ["the", "rain", "was", "falling"]
chain_length = 4
num_generated = 7

generated_sentence = generate(text_collection, start_words, chain_length, num_generated)

print(generated_sentence)


the rain was falling brightly a gentle the day along sky


In [49]:
import random


def clean_text(text):
  """
  Cleans a block of text by removing punctuation and converting to lowercase.

  Args:
      text: The text to be cleaned.

  Returns:
      The cleaned text.
  """
  # Remove punctuation
  text = ''.join([char for char in text if char.isalnum() or char.isspace()])
  # Convert to lowercase
  text = text.lower()
  return text



In [28]:
def validate_input(text_collection, start_words, chain_length):
  """
  Validates the input parameters for the text generation process.

  Args:
      text_collection: The text to use for training (cleaned beforehand).
      start_words: A list of words to start the generated sentence.
      chain_length: The number of words to consider when predicting the next word.

  Raises:
      ValueError: If the text collection is empty, start_words length mismatches chain_length,
                  or chain_length is non-positive.
  """
  if not text_collection:
    raise ValueError("Cannot generate text from an empty collection")

  if len(start_words) != chain_length:
    raise ValueError("start_words list must be the same length as chain_length")

  if chain_length <= 0:
    raise ValueError("chain_length must be a positive integer")


In [61]:
text_collection = "the sun was shining brightly in the clear blue sky birds were singing in the trees and a gentle breeze was blowing it was a perfect day for a walk in the park i wandered along the winding paths enjoying the fresh air and the beauty of nature"

# Example usage
start_words = ["the", "quick", "brown"]

chain_length = 3
num_generated = 7  # Increase num_generated to trigger missing transitions

try:
  generated_sentence = generate_text(text_collection, start_words, chain_length, num_generated)
  # If generation completes without exception, consider it a pass
  print("Test Case 2 Passed!")
except ValueError:  # Exception might be raised if missing transitions occur
  print("Test Case 2: Expected behavior (handling missing transitions)")


Test Case 2 Passed!


In [63]:
# Same text collection as before
test_cases = [
    {"start_words": ["it", "was", "a"], "chain_length": 3, "num_generated": 4},
    {"start_words": ["the", "birds", "were"], "chain_length": 3, "num_generated": 5}
]

for test_case in test_cases:
    start_words = test_case["start_words"]
    chain_length = test_case["chain_length"]
    num_generated = test_case["num_generated"]

    generated_sentence = generate_text(text_collection, start_words, chain_length, num_generated)
    print(f"Generated sentence (start_words: {start_words}, chain_length: {chain_length}, num_generated: {num_generated}):")
    print(generated_sentence)
    print("---")

print("Test Case 3 Passed!")


Generated sentence (start_words: ['it', 'was', 'a', 'perfect', 'day', 'for', 'a'], chain_length: 3, num_generated: 4):
it was a perfect day for a
---
Generated sentence (start_words: ['the', 'birds', 'were', 'shining', 'the', 'was', 'was', 'a'], chain_length: 3, num_generated: 5):
the birds were shining the was was a
---
Test Case 3 Passed!


In [59]:
# Sample text collection (already cleaned)
text_collection = "the sun was shining brightly in the clear blue sky birds were singing in the trees and a gentle breeze was blowing it was a perfect day for a walk in the park i wandered along the winding paths enjoying the fresh air and the beauty of nature"

# Example usage
start_words = ["the", "quick", "brown"]
chain_length = 3
num_generated = 3

generated_sentence = generate_text(text_collection, start_words, chain_length, num_generated)
print(generated_sentence)

if generated_sentence.startswith(" ".join(start_words)) and len(generated_sentence.split()) == (num_generated + chain_length):
    print("Test Case 1 passed!")


the quick brown shining blue was
Test Case 1 passed!
