In [1]:
import numpy as np

# Define the MDP class
class MDP:
    def __init__(self, num_states, num_actions, transition_probabilities=None, reward=None):
        self.num_states = num_states
        self.num_actions = num_actions
        
        if transition_probabilities is None:
            # Randomly generate transition probabilities if not provided
            self.transition_probabilities = np.random.rand(num_states, num_actions, num_states)
            self.transition_probabilities /= self.transition_probabilities.sum(axis=2, keepdims=True)
        else:
            self.transition_probabilities = transition_probabilities
            
        if reward is None:
            self.rewards = np.random.rand(num_states, num_actions)
        else:
            self.rewards = reward
        
    def convert_to_tikz(self):
        tikz_str = "\\begin{tikzpicture}[->, >=stealth', auto, thick, node distance=2.8cm]\n"
        tikz_str += "\t\\tikzstyle{every state}=[fill=white,draw=black,thick,text=black,scale=1]\n"
        
        # Define nodes for each state
        for s in range(self.num_states):
            tikz_str += f"\t\\node[state]    (S{s}) at ({s*3},0) {{$s_{s}$}};\n"
        
        # Define edges for each transition
        for s in range(self.num_states):
            for a in range(self.num_actions):
                for s_prime in range(self.num_states):
                    prob = self.transition_probabilities[s, a, s_prime]
                    if prob > 0.05:  # Only include significant transitions
                        action_label = f"$a_{{{a}}}$"
                        
                        if s == s_prime:
                            # Self-loop when the state transitions to itself
                            tikz_str += f"\t\\path (S{s}) edge [loop above] node {{{action_label}}} (S{s});\n"
                        else:
                            # Use an offset to label actions differently when multiple actions go to the same state
                            pos_offset = 0.4 * (a - (self.num_actions - 1) / 2)
                            tikz_str += f"\t\\path (S{s}) edge [bend left] node[pos=0.5, above] {{{action_label}}} (S{s_prime});\n"

        tikz_str += "\\end{tikzpicture}"
        return tikz_str

# Get user inputs
num_states = int(input("Enter the number of states: "))
num_actions = int(input("Enter the number of actions: "))

# Get user input for transition probabilities
user_input_transition = input("Do you want to provide a transition probability matrix? (yes/no): ").strip().lower()
transition_probabilities = None

if user_input_transition == "yes":
    transition_probabilities = np.zeros((num_states, num_actions, num_states))
    for s in range(num_states):
        for a in range(num_actions):
            print(f"Enter transition probabilities for state {s}, action {a}:")
            for s_prime in range(num_states):
                transition_probabilities[s, a, s_prime] = float(input(f"P(s'={s_prime} | s={s}, a={a}): "))
            # Normalize the probabilities
            transition_probabilities[s, a] /= transition_probabilities[s, a].sum()
else:
    print("Random transition probabilities will be generated.")

# Optionally allow user to input rewards
user_input_reward = input("Do you want to provide a reward matrix? (yes/no): ").strip().lower()
reward = None
if user_input_reward == "yes":
    reward = np.zeros((num_states, num_actions))
    for s in range(num_states):
        for a in range(num_actions):
            reward[s, a] = float(input(f"Enter the reward for state {s}, action {a}: "))

# Create the MDP
mdp = MDP(num_states, num_actions, transition_probabilities, reward)

# Convert to TikZ code
tikz_code = mdp.convert_to_tikz()

# Save the TikZ code to a file with the specified document structure
with open("mdp_tikz.tex", "w") as f:
    f.write("\\documentclass{report}\n")
    f.write("\\author{Sreejeet Maity}\n")
    f.write("\\usepackage{tikz}\n")
    f.write("\\usetikzlibrary{automata,arrows,positioning,calc}\n")
    f.write("\\usepackage{amsmath}\n")
    f.write("\\usepackage{graphicx}\n")
    f.write("\\usepackage{float}\n")
    f.write("\\begin{document}\n\n")
    f.write(tikz_code + "\n\n")
    f.write("\\end{document}")

print("TikZ code has been saved to 'mdp_tikz.tex' with the specified document structure. You can now use this file in your LaTeX document.")

Enter the number of states:  3
Enter the number of actions:  2
Do you want to provide a transition probability matrix? (yes/no):  no


Random transition probabilities will be generated.


Do you want to provide a reward matrix? (yes/no):  no


TikZ code has been saved to 'mdp_tikz.tex' with the specified document structure. You can now use this file in your LaTeX document.
