In [147]:
import argparse
import random
import sys
import urllib.request as req
import re

In [148]:
def fetch_url(url, fname):
    'get contents of url put in fname'
    fin = req.urlopen(url)
    data = fin.read()
    with open(fname, mode='wb') as fout:
        fout.write(data)
    #  context mgr closes file

In [149]:
def from_file(fname, size=1, encoding='utf8'):
    with open(fname, encoding=encoding) as fin:
        data = fin.read()
    m = Markov(data, size=size)
    return m

In [150]:
def from_file_words(fname, size=1, encoding='utf8'):
    with open(fname, encoding=encoding) as fin:
        data = fin.read()
    m = MarkovWords(data, size=size)
    return m

In [151]:
class Markov:
    def __init__(self, data, size=1):
        '''This is the constructor'''
        # This is a comment
        self.tables = []
        for i in range(size):
            self.tables.append(get_table(data, size=i+1))
        #self.table = get_table(data)

    def predict(self, txt):  # this is a method
        table = self.tables[len(txt)-1]
        options = table.get(txt, {})
        #options = self.table.get(txt, {})
        if not options:
            raise KeyError('{} not found'.format(txt))
            #raise KeyError(f'{txt} not found')
        possibles = []   # list literal
        for key, count in options.items():
            for i in range(count):
                possibles.append(key)
        return random.choice(possibles)

In [336]:
def get_table(txt, size=1):  # this is a function
    """
    Returns a transition table for txt

    >>> get_table('ab')
    {'a': {'b': 1}}
    """
    results = {}  # dictionary literal
    for idx in range(len(txt)):
        chars = txt[idx:idx + size]
        try:
            out = txt[idx+size]
        except IndexError:
            break
        char_dict = results.get(chars, {})
        char_dict.setdefault(out, 0)
        char_dict[out] += 1
        results[chars] = char_dict
    return results

In [361]:
class MarkovWords:
    def __init__(self, data, size=1):
        '''This is the constructor'''
        # This is a comment
        self.tables = []
        for i in range(size):
            self.tables.append(get_table_words(data, size=i+1))
        #self.table = get_table(data)

    def predict(self, txt):  # this is a method
        #table = self.tables[len(txt)-1]
        ws = re.split('\s+', txt)
        table = self.tables[len(ws)-1]  # select dictionary based on number of words in tx
        options = table.get(txt, {})
        #options = self.table.get(txt, {})
        if not options:
            raise KeyError('{} not found'.format(txt))
            #raise KeyError(f'{txt} not found')
        possibles = []   # list literal
        for key, count in options.items():
            for i in range(count):
                possibles.append(key)
        return random.choice(possibles)

In [350]:
def get_table_words(txt, size=1):  # this is a function
    """
    Returns a transition table for txt

    >>> get_table('ab cd')
    {'ab': {'cb': 1}}
    """
    words = re.split('\s+', txt)
    words_count = len(words)
    results = {}  # dictionary literal
    for idx in range(words_count):
        chars = ' '.join(words[idx:idx + size])
        try:
            out = words[idx+size]
        except IndexError:
            break
        char_dict = results.get(chars, {})
        char_dict.setdefault(out, 0)
        char_dict[out] += 1
        results[chars] = char_dict
    return results

In [364]:
w = MarkovWords('the one quick brown fox jumps over the one lazy dog', size=2)

In [367]:
w.predict('the one')

'lazy'

In [155]:
len(re.split('\s+', "ab cd"))

2

In [156]:
get_table_words("ab cd", size=1)

{'ab': {'cd': 1}}

In [317]:
words = get_table_words("the one quick brown fox jumps over the one lazy dog", size=2)

In [318]:
words

{'the one': {'quick': 1, 'lazy': 1},
 'one quick': {'brown': 1},
 'quick brown': {'fox': 1},
 'brown fox': {'jumps': 1},
 'fox jumps': {'over': 1},
 'jumps over': {'the': 1},
 'over the': {'one': 1},
 'one lazy': {'dog': 1}}

In [314]:
type(words)

dict

In [160]:
words['the']

{'quick': 1, 'lazy': 1}

In [161]:
def repl(m):
    print("Welcome to the Markov REPL. (Hit Ctl-C to exit)")
    while True:
        try:
            txt = input('>')
        except KeyboardInterrupt:
            print("Goodbye")
            break
        try:
            res = m.predict(txt)
        except KeyError:
            print("Word not found")
        except IndexError:
            print('Try again')
        else:
            print(res)

In [170]:
m = Markov('the quick brown fox jumps over the lazy dog', size=3)

In [173]:
pp = from_file_words('pp.txt', size=4)

In [241]:
pp.predict('The')

'wisest'

In [184]:
get_table("the quick brown fox jumps over the lazy dog", size=2)

{'th': {'e': 2},
 'he': {' ': 2},
 'e ': {'q': 1, 'l': 1},
 ' q': {'u': 1},
 'qu': {'i': 1},
 'ui': {'c': 1},
 'ic': {'k': 1},
 'ck': {' ': 1},
 'k ': {'b': 1},
 ' b': {'r': 1},
 'br': {'o': 1},
 'ro': {'w': 1},
 'ow': {'n': 1},
 'wn': {' ': 1},
 'n ': {'f': 1},
 ' f': {'o': 1},
 'fo': {'x': 1},
 'ox': {' ': 1},
 'x ': {'j': 1},
 ' j': {'u': 1},
 'ju': {'m': 1},
 'um': {'p': 1},
 'mp': {'s': 1},
 'ps': {' ': 1},
 's ': {'o': 1},
 ' o': {'v': 1},
 'ov': {'e': 1},
 've': {'r': 1},
 'er': {' ': 1},
 'r ': {'t': 1},
 ' t': {'h': 1},
 ' l': {'a': 1},
 'la': {'z': 1},
 'az': {'y': 1},
 'zy': {' ': 1},
 'y ': {'d': 1},
 ' d': {'o': 1},
 'do': {'g': 1}}

In [209]:
get_table_words("the one quick brown fox jumps over the one lazy dog", size=1)

{'the': {'one': 2},
 'one': {'quick': 1, 'lazy': 1},
 'quick': {'brown': 1},
 'brown': {'fox': 1},
 'fox': {'jumps': 1},
 'jumps': {'over': 1},
 'over': {'the': 1},
 'lazy': {'dog': 1}}

In [277]:
a = "the one quick brown fox jumps over the one lazy dog"

In [282]:
words = re.split('\s+', a)
words

['the',
 'one',
 'quick',
 'brown',
 'fox',
 'jumps',
 'over',
 'the',
 'one',
 'lazy',
 'dog']

In [297]:
chars = words[0:0 + 3]
chars

['the', 'one', 'quick']

In [298]:
' '.join(chars)

'the one quick'

In [260]:
l = a.split(" ", 2)

In [252]:
a.split(" ", 2)[:2]

['the', 'one']

In [251]:
" ".join(a.split(" ", 2)[:2])

'the one'

In [262]:
ll = a.split(" ")

In [263]:
for i in ll:
    print(i)

the
one
quick
brown
fox
jumps
over
the
one
lazy
dog


In [266]:
for i in range(len(ll)-1):
    print(ll[i], ll[i+1])

the one
one quick
quick brown
brown fox
fox jumps
jumps over
over the
the one
one lazy
lazy dog
