<a href="https://colab.research.google.com/github/suranakhushi/WE-Module3/blob/main/Markov_Chain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import random
from collections import defaultdict

def build_chain(text_corpus, chain_length):
    # Initialize the transition dictionary
    transition_dict = defaultdict(list)

    # Split the text corpus into words
    words = text_corpus.split()

    # Build the transition dictionary
    for i in range(len(words) - chain_length):
        key = tuple(words[i:i + chain_length])
        next_word = words[i + chain_length]
        transition_dict[key].append(next_word)

    return transition_dict

def generate_text_from_chain(transition_dict, start_words, chain_length, num_generated):
    # Initialize the generated sentence with the start words
    generated_sentence = list(start_words)

    # Generate the sentence
    for _ in range(num_generated):
        # Get the last chain_length words
        current_key = tuple(generated_sentence[-chain_length:])

        # Choose the next word from pre-computed possible next words
        next_word_options = transition_dict.get(current_key, ["."])  # "." used as fallback
        next_word = random.choice(next_word_options)

        # Append the next word to the generated sentence
        generated_sentence.append(next_word)

    # Join the generated words to form the sentence
    generated_sentence = ' '.join(generated_sentence)

    return generated_sentence

# Sample text cases and start words
text_cases = [
    "The quick brown fox jumps over the lazy dog.",
    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
    "To be or not to be, that is the question.",
    "In the beginning God created the heavens and the earth.",
    "All work and no play makes Jack a dull boy.",
    "Roses are red, violets are blue, sugar is sweet, and so are you.",
    "It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness.",
    "I have a dream that one day this nation will rise up and live out the true meaning of its creed.",
    "Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal."
]

start_words_list = [
    ["The", "quick", "brown"],
    ["Lorem", "ipsum", "dolor"],
    ["To", "be", "or"],
    ["In", "the", "beginning"],
    ["All", "work", "and"],
    ["Roses", "are", "red"],
    ["It", "was", "the"],
    ["I", "have", "a"],
    ["Four", "score", "and"]
]

chain_length = 3
num_generated = 10

# Pre-process text corpora and build chains with pre-computed next words
chains = [build_chain(text_corpus, chain_length) for text_corpus in text_cases]

# Generate and print sentences for each text case
for i, (text_corpus, start_words) in enumerate(zip(text_cases, start_words_list)):
    print(f"Text Case {i+1}:")
    generated_sentence = generate_text_from_chain(chains[i], start_words, chain_length, num_generated)
    print(generated_sentence)
    print()





Text Case 1:
The quick brown fox jumps over the lazy dog. . . . .

Text Case 2:
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt

Text Case 3:
To be or not to be, that is the question. . . .

Text Case 4:
In the beginning God created the heavens and the earth. . . .

Text Case 5:
All work and no play makes Jack a dull boy. . . .

Text Case 6:
Roses are red . . . . . . . . . .

Text Case 7:
It was the best of times, it was the age of wisdom, it

Text Case 8:
I have a dream that one day this nation will rise up and

Text Case 9:
Four score and seven years ago our fathers brought forth on this continent,



In [7]:
# Empty Text Corpus
empty_text_corpus = ""
empty_start_words = ["Empty", "start", "words"]
empty_chain = build_chain(empty_text_corpus, chain_length)
empty_generated_sentence = generate_text_from_chain(empty_chain, empty_start_words, chain_length, num_generated)
print("Edge Case: Empty Text Corpus")
print(empty_generated_sentence)
print()

Edge Case: Empty Text Corpus
Empty start words . . . . . . . . . .



In [9]:

# Text Corpus with Very Large Size
very_large_text_corpus = "Lorem ipsum " * 100000  # Repeat "Lorem ipsum" 100000 times
very_large_start_words = ["Very", "large", "start"]
very_large_chain = build_chain(very_large_text_corpus, chain_length)
very_large_generated_sentence = generate_text_from_chain(very_large_chain, very_large_start_words, chain_length, num_generated)
print("Edge Case: Text Corpus with Very Large Size")
print(very_large_generated_sentence)


Edge Case: Text Corpus with Very Large Size
Very large start . . . . . . . . . .


In [10]:
# Text Corpus with Short Sentences
short_sentences_text_corpus = "Hello world. Goodbye world."
short_sentences_start_words = ["Short", "sentences", "start"]
short_sentences_chain = build_chain(short_sentences_text_corpus, chain_length)
short_sentences_generated_sentence = generate_text_from_chain(short_sentences_chain, short_sentences_start_words, chain_length, num_generated)
print("Edge Case: Text Corpus with Short Sentences")
print(short_sentences_generated_sentence)
print()

Edge Case: Text Corpus with Short Sentences
Short sentences start . . . . . . . . . .



In [11]:
# Single Word Text Corpus
single_word_text_corpus = "Hello"
single_word_start_words = ["Single", "word", "start"]
single_word_chain = build_chain(single_word_text_corpus, chain_length)
single_word_generated_sentence = generate_text_from_chain(single_word_chain, single_word_start_words, chain_length, num_generated)
print("Edge Case: Single Word Text Corpus")
print(single_word_generated_sentence)
print()

Edge Case: Single Word Text Corpus
Single word start . . . . . . . . . .

