### **Reinforcement Learning in Practice**
### **Programming Assignment 1**
##### Submitted by: Saurabh Kumar (SC22B146)
![part1](ques_part1.png)
![part2](ques_part2.png)


In [54]:
import numpy as np

def valueiteration_pi(S, A, P, R, gamma, epsilon, pi):
    """
    Parameters:
        S: states (array)
        A: actions (array)
        P(s'|s,a): transition matrix of shape (S x A x S)
        R(s,a): reward matrix of shape (S x A)
        gamma: discount factor
        epsilon: convergence tolerance
        pi(s,a): policy matrix of shape (S x A)
    Returns:
        V: value function (array of shape S)
    """
    # V = np.zeros(len(S)) # initialising with all zero
    V = np.full(len(S), 1e-8) # initialising with a small value to avoid 'divide by zero'
    
    while True:
        V_new = np.zeros_like(V)
        for s in range(len(S)):
            # value iteration
            V_new[s] = np.sum([ pi[s, a] * (R[s, a] + gamma * np.dot(P[s, a], V)) for a in range(len(A)) ])
        
        # checking error
        deltas = []
        for s in range(len(S)):
            deltas.append(abs(V_new[s] - V[s]) / abs(V[s]))
        
        if max(deltas) <= epsilon:
            break
        
        V = V_new
    
    return V

In [57]:
def read_input_file(filename):
    with open(filename, "r") as f:
        lines = [line.strip() for line in f.readlines()]
    
    S = lines[0].split(",")
    A = lines[1].split(",")
    gamma = float(lines[2])
    epsilon = float(lines[3])
    
    # policy
    pi_vals = list(map(float, lines[4].split(",")))
    pi = np.array(pi_vals).reshape(len(S), len(A))
    
    # rewards
    R_vals = list(map(float, lines[5].split(",")))
    R = np.array(R_vals).reshape(len(S), len(A))
    
    # transition probabilities
    P_vals = list(map(float, lines[6].split(",")))
    P = np.array(P_vals).reshape(len(S), len(A), len(S))
    
    return S, A, P, R, gamma, epsilon, pi

In [62]:
# reading file
filename = "input.txt"
S, A, P, R, gamma, epsilon, pi = read_input_file(filename)
V = valueiteration_pi(S, A, P, R, gamma, epsilon, pi)

# printing comma separated list of state values
print(",".join(map(str, V)))

31.897971659007922,31.897971659007922


In [63]:
# reading another file
filename = "input2.txt"
S, A, P, R, gamma, epsilon, pi = read_input_file(filename)
V = valueiteration_pi(S, A, P, R, gamma, epsilon, pi)

# printing comma separated list of state values
print(",".join(map(str, V)))

31.805740830268704,30.71595984707944,33.75461923086265
