In [1]:
import random
from collections import defaultdict

def read_file(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        return file.read()

def tokenize(text):
    return text.split()

def build_markov_chain(words, chain_length):
    chain = defaultdict(list)
    for i in range(len(words) - chain_length):
        key = tuple(words[i:i + chain_length])
        chain[key].append(words[i + chain_length])
    return chain

def generate_sentence(chain, start_words, chain_length):
    sentence = list(start_words)
    current_words = tuple(start_words)

    while len(sentence) < chain_length:
        next_word = random.choice(chain[current_words])
        sentence.append(next_word)
        current_words = tuple(sentence[-chain_length:])

    return ' '.join(sentence)

def generate(filename, start_words, chain_length, num_generated):
    text = read_file(filename)
    words = tokenize(text)
    chain = build_markov_chain(words, chain_length)

    generated_sentences = []
    for _ in range(num_generated):
        generated_sentence = generate_sentence(chain, start_words, chain_length)
        generated_sentences.append(generated_sentence)

    return ' '.join(generated_sentences)


In [8]:
import random
from collections import defaultdict

def tokenize(text):
    return text.split()

def build_markov_chain(words, chain_length):
    chain = defaultdict(list)
    for i in range(len(words) - chain_length):
        key = tuple(words[i:i + chain_length])
        chain[key].append(words[i + chain_length])
    return chain

def generate_sentence(chain, start_words, chain_length):
    sentence = list(start_words)
    current_words = tuple(start_words)

    while len(sentence) < chain_length:
        if current_words not in chain:
            break
        next_word = random.choice(chain[current_words])
        sentence.append(next_word)
        current_words = tuple(sentence[-chain_length:])

    return ' '.join(sentence)

def generate(sample_text, start_words, chain_length, num_generated):
    words = tokenize(sample_text)
    chain = build_markov_chain(words, chain_length)

    generated_sentences = []
    for _ in range(num_generated):
        generated_sentence = generate_sentence(chain, start_words, chain_length)
        generated_sentences.append(generated_sentence)

    return ' '.join(generated_sentences)

# Example usage:
sample_text = """
Markov chains, named after Andrey Markov, are mathematical systems that exhibit random transitions between states according to certain probabilistic rules. They are widely used to model a variety of phenomena in diverse fields such as physics, biology, economics, computer science, and more.

A Markov chain consists of a set of states and transition probabilities between these states. The key property of a Markov chain is the memorylessness property, which states that the probability of transitioning to a particular state depends only on the current state and not on the sequence of previous states. This makes Markov chains useful for modeling processes where future states depend only on the present state and are independent of the past.

In practice, Markov chains can be represented using transition matrices or transition diagrams. Transition matrices encode the probabilities of transitioning between states, while transition diagrams visually represent the states and transitions between them.

One common application of Markov chains is in natural language processing, where they are used for tasks such as text generation, part-of-speech tagging, and machine translation. In text generation, Markov chains can be trained on a corpus of text and used to generate new text that mimics the style and structure of the original corpus.

Overall, Markov chains are powerful tools for modeling and analyzing stochastic processes, and they have a wide range of applications across various fields.
"""
start_words = ['The', 'quick', 'brown']
chain_length = 5
num_generated = 3
generated_text = generate(sample_text, start_words, chain_length, num_generated)
print(generated_text)


The quick brown The quick brown The quick brown


In [10]:
import random
from collections import defaultdict

def tokenize(text):
    return text.split()

def build_markov_chain(words, chain_length):
    chain = defaultdict(list)
    for i in range(len(words) - chain_length):
        key = tuple(words[i:i + chain_length])
        chain[key].append(words[i + chain_length])
    return chain

def generate_sentence(chain, chain_length):
    sentence = []
    current_words = random.choice(list(chain.keys()))
    sentence.extend(current_words)

    while len(sentence) < chain_length:
        if current_words not in chain:
            break
        next_word = random.choice(chain[current_words])
        sentence.append(next_word)
        current_words = tuple(sentence[-chain_length:])

    return ' '.join(sentence)

def generate(sample_text, chain_length, num_generated):
    words = tokenize(sample_text)
    chain = build_markov_chain(words, chain_length)

    generated_sentences = []
    for _ in range(num_generated):
        generated_sentence = generate_sentence(chain, chain_length)
        generated_sentences.append(generated_sentence)

    return ' '.join(generated_sentences)

# Example usage:
sample_text = """
Markov chains, named after Andrey Markov, are mathematical systems that exhibit random transitions between states according to certain probabilistic rules. They are widely used to model a variety of phenomena in diverse fields such as physics, biology, economics, computer science, and more.

A Markov chain consists of a set of states and transition probabilities between these states. The key property of a Markov chain is the memorylessness property, which states that the probability of transitioning to a particular state depends only on the current state and not on the sequence of previous states. This makes Markov chains useful for modeling processes where future states depend only on the present state and are independent of the past.

In practice, Markov chains can be represented using transition matrices or transition diagrams. Transition matrices encode the probabilities of transitioning between states, while transition diagrams visually represent the states and transitions between them.

One common application of Markov chains is in natural language processing, where they are used for tasks such as text generation, part-of-speech tagging, and machine translation. In text generation, Markov chains can be trained on a corpus of text and used to generate new text that mimics the style and structure of the original corpus.

Overall, Markov chains are powerful tools for modeling and analyzing stochastic processes, and they have a wide range of applications across various fields.
"""
chain_length = 2
num_generated = 3
generated_text = generate(sample_text, chain_length, num_generated)
print(generated_text)


text that makes Markov between them.


In [14]:
import random
from collections import defaultdict

def sliding_pairs(text):
    words = text.split()
    pairs = []
    for i in range(len(words) - 1):
        pairs.append((words[i], words[i + 1]))
    return pairs

def build_markov_chain(pairs):
    chain = defaultdict(list)
    for current_pair, next_word in pairs:
        chain[current_pair].append(next_word)
    return chain

def generate_sentence(chain, num_pairs):
    sentence = []
    current_pair = random.choice(list(chain.keys()))

    for _ in range(num_pairs):
        sentence.extend(current_pair)
        if current_pair not in chain:
            break
        next_word = random.choice(chain[current_pair])
        current_pair = (current_pair[1], next_word)

    sentence.append(current_pair[1])  # Append the last word of the last pair
    return ''.join(sentence)


# Example usage:
sample_text = """
Markov chains, named after Andrey Markov, are mathematical systems that exhibit random transitions between states according to certain probabilistic rules. They are widely used to model a variety of phenomena in diverse fields such as physics, biology, economics, computer science, and more.

A Markov chain consists of a set of states and transition probabilities between these states. The key property of a Markov chain is the memorylessness property, which states that the probability of transitioning to a particular state depends only on the current state and not on the sequence of previous states. This makes Markov chains useful for modeling processes where future states depend only on the present state and are independent of the past.

In practice, Markov chains can be represented using transition matrices or transition diagrams. Transition matrices encode the probabilities of transitioning between states, while transition diagrams visually represent the states and transitions between them.

One common application of Markov chains is in natural language processing, where they are used for tasks such as text generation, part-of-speech tagging, and machine translation. In text generation, Markov chains can be trained on a corpus of text and used to generate new text that mimics the style and structure of the original corpus.

Overall, Markov chains are powerful tools for modeling and analyzing stochastic processes, and they have a wide range of applications across various fields.
"""

pairs = sliding_pairs(sample_text)
chain = build_markov_chain(pairs)

num_sentences = 3
num_pairs_per_sentence = 10

for _ in range(num_sentences):
    generated_sentence = generate_sentence(chain, num_pairs_per_sentence)
    print(generated_sentence)


makesaMarkovMarkov
exhibitxrandomrandom
transitionrdiagrams.diagrams.
