# Markov Chains
## Sean Wade

In [9]:
import numpy as np
import scipy as sp

In [30]:
def random_markov(n):
    """Create and return a transition matrix for a random
    Markov chain with 'n' states as an nxn NumPy array.
    """
    A = np.random.dirichlet(np.ones(n),size=n)
    return A.T

In [31]:
def two_state_forecast(num_days, transition_matrix = np.array([[0.7, 0.6], [0.3, 0.4]])):
    """Run a simulation for the weather over 'num_days' days, with
    "hot" as the starting state. Return a list containing the day-by-day
    results, not including the starting day.

    Example:
        >>> forecast(5)
        [1, 0, 0, 1, 0]
    """
    cur_day = 0
    forecast = []
    for x in xrange(0, num_days):
        r = np.random.random()
        if r < tran_matrix[1][cur_day]:
            forecast.append(1)
            cur_day = 1
        else:
            forecast.append(0)
            cur_day = 0
    return forecast

In [32]:
def four_state_forecast(days=1, tran_matrix = np.array([[.5, .3, .1, 0], [.3, .3, .2, .3], [.2, .3, .4, .5], [0, .1, .2, .2]])):
    """Same as two_state_forecast(), but using the four-state transition matrix."""
    cur_day = 0
    forecast = []
    for x in xrange(0, days):
        probabilities = tran_matrix[:][cur_day]
        pos = np.random.multinomial(1, probabilities).argmax()
        cur_day = pos
        forecast.append(pos)
    return forecast

four_state_forecast(10)


[0, 0, 3, 3, 2, 1, 1, 0, 1, 1]

In [33]:
def analyze_simulation():
    """Analyze the results of the previous two problems. What percentage
    of days are in each state? Print your results to the terminal.
    """
    forecast = four_state_forecast(100000)
    total_days = float(len(forecast))
    hot = forecast.count(0)
    mild = forecast.count(1)
    cold = forecast.count(2)
    freezing = forecast.count(3)
    print "The percent of weather"
    print "===================================="
    print "Hot: %s" % (hot/total_days * 100)
    print "Mild: %s" % (mild/total_days * 100)
    print "Cold: %s" % (cold/total_days * 100)
    print "Freezing: %s" % (freezing/total_days * 100)

analyze_simulation()

The percent of weather
Hot: 22.813
Mild: 23.346
Cold: 22.434
Freezing: 31.407


In [49]:
def convert(in_file):
    """A helper function that takes a path to an input .txt file
    The file is parse and a file named 'output.txt' is createted with each 
    word assosiated with its number.  It also returns a set of all the words
    in the order in which they occure.
    
    Example:
        >>> convert('input.txt')
        ['$start','This','is','a','test','$end']
    """
    lines_list = []
    word_dict = {}
    words = ['$start']
    val_int = 1
    with open(in_file, 'r') as my_file:
        for line in my_file:
            lines_list.append(line)
            for word in line.split():
                if word.strip() not in word_dict:
                    word_dict[word] = val_int
                    val_int += 1
                    words.append(word)
    
    # write dictionary numbers to the output.txt file
    with open('_temp.txt', 'w') as my_file:
        for line in lines_list:
            for word in line.split():
                my_file.write(str(word_dict[word]) + " ")
            my_file.write('\n')

    words.append('$end')
    return words

def markov_creator(in_file, num_of_words):
    """A helper function for sentences().  Takes an input .txt file with the words 
    tokenized as numbers (see the convert() function).  It also takes the number of words
    in the sample file"""
    
    M = np.zeros((num_of_words, num_of_words))
    start_num = 0
    end_num = num_of_words - 1
    
    with open(in_file, 'r') as my_file:
        for line in my_file:
            prev = start_num
            now = start_num
            for num in line.split():
                now = int(num.strip())
                M[now][prev] += 1
                prev = now
            M[end_num][now] += 1

    M = M[:,:-1] / np.sum(M[:,:-1], axis=0)
    
    return M

SyntaxError: invalid syntax (<ipython-input-49-5ba3d2bce766>, line 53)

In [50]:
def sentences(infile, outfile, write=True, num_sentences=1):
    """Generate random sentences using the word list generated in
    Problem 5 and the transition matrix generated in Problem 6.
    Write the results to the specified outfile.

    Parameters:
        infile (str): The path to a filen containing a training set.
        outfile (str): The file to write the random sentences to.
        num_sentences (int): The number of random sentences to write.
        write (bool): Whether or not to write to a file.

    Returns:
        None
    """
    if write == True:
        with open(outfile, 'w') as my_file:
            my_file.write("MY POEM\n====================================\n")
        word_list = convert(infile)
        Markov = markov_creator('_temp.txt', len(word_list))
        sentence = 1
        while sentence <= num_sentences:
            result = []
            cur = 0
            while cur != len(word_list) - 1:
                probabilities = Markov[:, cur]
                pos = np.random.multinomial(1, probabilities).argmax()
                cur = pos
                result.append(word_list[pos])
            with open(outfile, 'a') as my_file:
                my_file.write(' '.join(result[:-1]) + "\n")

            sentence += 1
    else:
        word_list = convert(infile)
        Markov = markov_creator('_temp.txt', len(word_list))
        sentence = 1
        while sentence <= num_sentences:
            result = []
            cur = 0
            while cur != len(word_list) - 1:
                probabilities = Markov[:, cur]
                pos = np.random.multinomial(1, probabilities).argmax()
                cur = pos
                result.append(word_list[pos])
            print ' '.join(result[:-1]) + '\n'

            sentence += 1

In [51]:
sentences('in.txt', 'out.txt', False, 10)

but rejoices with the truth

Love is not boast

It keeps no record of wrongs

Love is not delight in evil

always perseveres

It is kind

Love does not envy It always protects always perseveres

It is not boast

It is not boast

but rejoices with the truth



