In [1]:
import numpy as np
from scipy import linalg as la


class MarkovChain:
    """A Markov chain with finitely many states.

    trans_matrix (n,n) ndarray): the column-stochastic transition matrix for a
            Markov chain with n states corresponding to the words in the file
    labels (list): list of state labels that are the words in the file
    states_dict (dict): a dictionary of states that are words in the file where each key has the correct index corresponding to the labels list
        
    """
    # Problem 1
    def __init__(self, A, states=None):
        """Check that A is column stochastic and construct a dictionary
        mapping a state's label to its index (the row / column of A that the
        state corresponds to). Save the transition matrix, the list of state
        labels, and the label-to-index dictionary as attributes.

        Parameters:
        A ((n,n) ndarray): the column-stochastic transition matrix for a
            Markov chain with n states.
        states (list(str)): a list of n labels corresponding to the n states.
            If not provided, the labels are the indices 0, 1, ..., n-1.

        Raises:
            ValueError: if A is not square or is not column stochastic.

        Example:
            >>> MarkovChain(np.array([[.5, .8], [.5, .2]], states=["A", "B"])
        corresponds to the Markov Chain with transition matrix
                                   from A  from B
                            to A [   .5      .8   ]
                            to B [   .5      .2   ]
        and the label-to-index dictionary is {"A":0, "B":1}.
        """
        m,n = A.shape
        states_dic = dict()
        if np.allclose(A.sum(axis=0), np.ones(A.shape[1])) == False:
            raise ValueError("The adjacency matrix is not column-stochastic")

        if states is None:
            labels = np.arange(n)
        else:
            labels = states

        for i in range(n):
            states_dic[labels[i]] = i

        self.trans_matrix = A
        self.labels = labels
        self.states_dict = states_dic
        

    # Problem 2
    def transition(self, state):
        """Transition to a new state by making a random draw from the outgoing
        probabilities of the state with the specified label.

        Parameters:
            state (str): the label for the current state.

        Returns:
            (str): the label of the state to transitioned to.
        """
        current_state_column = self.states_dict[state]          #find the column that our current state corresponds to

        new_index = np.argmax(np.random.multinomial(1, self.trans_matrix[:, current_state_column]))       #calculate the index of the new state that was deicded by multinomial draw with probabilities determined by the column that we were in 

        return self.labels[new_index]       #return the label that corresponds to the index that we generated

    # Problem 3
    def walk(self, start, N):
        """Starting at the specified state, use the transition() method to
        transition from state to state N-1 times, recording the state label at
        each step.

        Parameters:
            start (str): The starting state label.

        Returns:
            (list(str)): A list of N state labels, including start.
        """
        labels_list = []        #make empty list to which we will append the visited states
        labels_list.append(start)
        current_state = start       #start traversing at the given starting state
        for k in range(N-1):                #change states N-1 times using our transition method
            current_state = self.transition(current_state)
            labels_list.append(current_state)          #keep track of where we transitioned to by adding it to the list of visited states

        return labels_list


    # Problem 3
    def path(self, start, stop):
        """Beginning at the start state, transition from state to state until
        arriving at the stop state, recording the state label at each step.

        Parameters:
            start (str): The starting state label.
            stop (str): The stopping state label.

        Returns:
            (list(str)): A list of state labels from start to stop.
        """
        labels_list = []      #make a list to which we will add the visited states
        labels_list.append(start)     
        current_state = start

        while current_state != stop:            #transition from state to state until we have reached the stopping state
            current_state = self.transition(current_state)
            labels_list.append(current_state)              #at each step, add the state to which we transitioned to the list

        return labels_list


    # Problem 4
    def steady_state(self, tol=1e-12, maxiter=40):
        """Compute the steady state of the transition matrix A.

        Parameters:
            tol (float): The convergence tolerance.
            maxiter (int): The maximum number of iterations to compute.

        Returns:
            ((n,) ndarray): The steady state distribution vector of A.

        Raises:
            ValueError: if there is no convergence within maxiter iterations.
        """
        A = self.trans_matrix
        x0 = np.random.rand(np.shape(A)[0])      #initialize a random state distribution vector
        xk = x0 / np.sum(x0)              
        k = 0
        while la.norm(xk - (A @ xk), ord=1) >= tol:      #continue the loop until the vectors are sufficiently close
            xk = A @ xk
            k+=1                                  #count the number of iterations, and if we exceed the maxiter, throw a value error
            if k > maxiter:
                raise ValueError("A^k does not converge")

        return xk

In [3]:
class SentenceGenerator(MarkovChain):
    """A Markov-based simulator for natural language.

    Attributes:
        trans_matrix (n,n) ndarray): the column-stochastic transition matrix for a
            Markov chain with n states corresponding to the words in the file
        labels (list): list of state labels that are the words in the file
        states_dict (dict): a dictionary of states that are words in the file where each key has the correct index corresponding to the labels list
    """
    # Problem 5
    def __init__(self, filename):
        """Read the specified file and build a transition matrix from its
        contents. You may assume that the file has one complete sentence
        written on each line.
        """

        with open(filename, 'r') as myFile:
            lines = myFile.readlines()       

        unique_words = set()

        for line in lines:               #make a list of words in each line of the text
            word_list = line.split()
            
            for word in word_list:                 #add each word to our set of words(each will be unique because sets dont add same element twice)
                unique_words.add(word)

        T = np.zeros((len(unique_words) + 2, len(unique_words) + 2))            #create the basis for our transition matrix of correct amount of words

        word_dict = dict()                     #create a dictionary in which we will store our words(states) and their corresponding assignments

        word_dict["$tart"] = 0

        labels = []                           #create a list of lables and add the word that is always first
        
        labels.append("$tart")

        for line in lines:
            word_list = line.split()          #create a list of words for each line in the text

            for i in range(len(word_list)):      #if the word is not in the labels list, add it and create a spot in the dict with the corresponding index
                if word_list[i] not in labels:
                    labels.append(word_list[i])
                    word_dict[word_list[i]] = labels.index(word_list[i])

                if i != 0:
                    T[word_dict[word_list[i]], word_dict[word_list[i-1]]] += 1  #if i is not zero, add one to the entry T[i,j] when word with index j is followed by word i
                
            T[word_dict[word_list[0]] , 0] += 1              #add 1 to each A[i,0] where word with index i follows $tart
            T[-1, word_dict[word_list[-1]]] += 1             #add 1 to each A[-1,i] where $top follows word with index i

        T[-1, -1] = 1                           #make sure the stop state transitions to itself

        T = T / np.sum(T, axis=0)          #normalize each column of T
                
        labels.append("$top")                     #save the proper attributes
        word_dict["$top"] = labels.index("$top")
        self.trans_matrix = T
        self.labels = labels
        self.states_dict = word_dict

    # Problem 6
    def babble(self):
        """Create a random sentence using MarkovChain.path().

        Returns:
            (str): A sentence generated with the transition matrix, not
                including the labels for the $tart and $top states.

        Example:
            >>> yoda = SentenceGenerator("yoda.txt")
            >>> print(yoda.babble())
            The dark side of loss is a path as one with you.
        """
        
        sentence_list = self.path("$tart", "$top")    #use our path method to create a path of words been $tart and $top

        sentence_list.remove("$tart")                 #remove $tart and $top
        sentence_list.remove("$top")

        return " ".join(sentence_list)                #separate each entry in the path of words by a space, and return the total string

In [50]:
yoda = SentenceGenerator("yoda.txt")
yoda.babble()

'In you who transform into the deepest commitment, the dark path, forever will try.'