In [None]:
import numpy as np

class QLearning:
    def __init__(self, n_states, n_actions, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.n_states = n_states
        self.n_actions = n_actions
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.Q = np.zeros((n_states, n_actions))

    def epsilon_greedy_policy(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(self.n_actions)
        else:
            return np.argmax(self.Q[state])

    def learn(self, num_episodes, display_interval=50):
        for episode in range(num_episodes):
            if episode % display_interval == 0:
                print(f"Episode {episode}:")

            state = 0  # Start from the initial state
            done = False
            total_reward = 0

            while not done:
                action = self.epsilon_greedy_policy(state)

                # Define transitions based on your custom environment
                if state == 1:
                    next_state = np.random.choice([3, 5])  # Randomly choose between 3 and 5
                elif state == 3:
                    next_state = np.random.choice([1, 4])  # Randomly choose between 1 and 4
                elif state == 5:
                    next_state = np.random.choice([1, 4, 5])  # Randomly choose between 1, 4, and 5
                elif state == 2:
                    next_state = 3
                elif state == 4:
                    next_state = np.random.choice([0, 3])  # Randomly choose between 0 and 3
                elif state == 0:
                    next_state = 4
                else:
                    next_state = state

                if next_state == 5:
                    reward = 100
                else:
                    reward = -1

                # Update the Q-table
                self.Q[state, action] += self.alpha * (reward + self.gamma * np.max(self.Q[next_state]) - self.Q[state, action])

                total_reward += reward
                state = next_state

                if next_state == 5:
                    done = True

            if episode % display_interval == 0:
                print("Q-table:")
                print(self.Q)
                print("---------------------------------")

if __name__ == "__main__":
    n_states = 6  # Number of states (0 to 5)
    n_actions = 6  # Number of actions (0 to 5)
    agent = QLearning(n_states, n_actions, alpha=0.1, gamma=0.9, epsilon=0.1)
    agent.learn(100, display_interval=50)



Episode 0:
Q-table:
[[-0.199 -0.1   -0.1   -0.1   -0.109 -0.109]
 [10.     0.     0.     0.     0.     0.   ]
 [ 0.     0.     0.     0.     0.     0.   ]
 [-0.1   -0.1   -0.109 -0.1    0.     0.   ]
 [-0.19  -0.19  -0.199 -0.19  -0.1   -0.1  ]
 [ 0.     0.     0.     0.     0.     0.   ]]
---------------------------------
Episode 50:
Q-table:
[[ 1.46788766  2.57237992 30.39771627  7.04467261  5.30626851 10.79372111]
 [71.63795237 12.98566176  0.          4.35876439 19.         10.        ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 5.94824958 14.50208866 -0.24267315  4.70358206 50.64801111 16.42302652]
 [ 7.06258309  6.32361544  2.67490075  7.41230204 34.11541343  4.95111996]
 [ 0.          0.          0.          0.          0.          0.        ]]
---------------------------------


In [None]:
import random

def monte_carlo_pi(num_samples):
    inside_circle = 0

    for _ in range(num_samples):
        x = random.uniform(0, 1)
        y = random.uniform(0, 1)

        # Check if the point is inside the unit circle
        if x**2 + y**2 <= 1:
            inside_circle += 1

    # Calculate the estimated value of π
    pi_estimate = (inside_circle / num_samples) * 4
    return pi_estimate

if __name__ == "__main__":
    num_samples = 1000000  # Number of random samples
    estimated_pi = monte_carlo_pi(num_samples)
    print(f"Estimated π using Monte Carlo: {estimated_pi}")


Estimated π using Monte Carlo: 3.143596
