# Dynamic Programming Algorithm for Optimal Stopping

In [None]:
import numpy as np

def optimal_stopping(values):
    """
    Dynamic Programming algorithm for Optimal Stopping.

    Args:
    - values (list): List of values (rewards) for each period.

    Returns:
    - optimal_value (float): The optimal expected value.
    - stopping_rule (list): List indicating the optimal stopping rule.
    """
    n = len(values)
    dp = np.zeros(n + 1)
    stopping_rule = [0] * n

    # Iterate backwards through the periods
    for i in range(n - 1, -1, -1):
        dp[i] = max(values[i], dp[i + 1])
        stopping_rule[i] = 1 if values[i] >= dp[i + 1] else 0

    optimal_value = dp[0]
    return optimal_value, stopping_rule

# Example usage
values = [3, 2, 5, 1, 7, 4, 8]  # Example values (rewards) for each period
optimal_value, stopping_rule = optimal_stopping(values)
print("Optimal Value:", optimal_value)
print("Stopping Rule:", stopping_rule)


# Monte Carlo Simulation for Estimating Stopping Rules

In [None]:
import numpy as np

def secretary_problem_simulation(n, trials):
    """
    Monte Carlo Simulation for the Secretary Problem to estimate the optimal stopping rule.

    Args:
    - n (int): Number of candidates.
    - trials (int): Number of simulation trials.

    Returns:
    - optimal_stop_point (int): The optimal stopping point estimated by the simulation.
    - success_probability (float): The estimated probability of selecting the best candidate.
    """
    stop_points = np.zeros(trials)
    successes = np.zeros(trials)

    for trial in range(trials):
        candidates = np.random.permutation(n)
        best_so_far = -1
        chosen_candidate = -1

        # Determine the optimal stopping point by simulation
        stop_point = int(n / np.e)

        # Simulate the interview process
        for i in range(n):
            if i < stop_point:
                if candidates[i] > best_so_far:
                    best_so_far = candidates[i]
            else:
                if candidates[i] > best_so_far:
                    chosen_candidate = candidates[i]
                    break

        stop_points[trial] = stop_point
        if chosen_candidate == n - 1:
            successes[trial] = 1

    optimal_stop_point = int(np.mean(stop_points))
    success_probability = np.mean(successes)
    return optimal_stop_point, success_probability

# Example usage
n = 100  # Number of candidates
trials = 10000  # Number of simulation trials

optimal_stop_point, success_probability = secretary_problem_simulation(n, trials)
print("Optimal Stopping Point:", optimal_stop_point)
print("Estimated Success Probability:", success_probability)

# Threshold-Based Stopping Criterion

In [None]:
import numpy as np

def threshold_stopping_simulation(threshold, n, trials):
    """
    Monte Carlo Simulation for a threshold-based stopping criterion.

    Args:
    - threshold (float): The threshold value for stopping.
    - n (int): Number of observations.
    - trials (int): Number of simulation trials.

    Returns:
    - stopping_times (list): List of stopping times for each trial.
    - success_probability (float): The estimated probability of stopping at the correct time.
    """
    stopping_times = []
    successes = 0

    for trial in range(trials):
        observations = np.random.rand(n)
        stopping_time = n  # Default to stopping at the last observation if threshold is not met

        for i in range(n):
            if observations[i] >= threshold:
                stopping_time = i + 1
                break

        stopping_times.append(stopping_time)
        if stopping_time < n:
            successes += 1

    success_probability = successes / trials
    return stopping_times, success_probability

# Example usage
threshold = 0.8  # Threshold value for stopping
n = 100  # Number of observations
trials = 10000  # Number of simulation trials

stopping_times, success_probability = threshold_stopping_simulation(threshold, n, trials)
print("Average Stopping Time:", np.mean(stopping_times))
print("Success Probability:", success_probability)

# Upper Confidence Bound (UCB) Algorithm

In [None]:
import numpy as np

class UCB:
    def __init__(self, n_arms):
        self.n_arms = n_arms
        self.counts = np.zeros(n_arms)  # Count of pulls for each arm
        self.values = np.zeros(n_arms)  # Average reward for each arm

    def select_arm(self):
        total_counts = np.sum(self.counts)
        if total_counts < self.n_arms:
            # Ensure each arm is selected at least once initially
            return int(total_counts)

        ucb_values = self.values + np.sqrt((2 * np.log(total_counts)) / self.counts)
        return np.argmax(ucb_values)

    def update(self, chosen_arm, reward):
        self.counts[chosen_arm] += 1
        n = self.counts[chosen_arm]
        value = self.values[chosen_arm]
        new_value = ((n - 1) / n) * value + (1 / n) * reward
        self.values[chosen_arm] = new_value

def ucb_simulation(n_arms, n_rounds, true_means):
    ucb = UCB(n_arms)
    rewards = np.zeros(n_rounds)

    for round in range(n_rounds):
        chosen_arm = ucb.select_arm()
        reward = np.random.randn() + true_means[chosen_arm]
        ucb.update(chosen_arm, reward)
        rewards[round] = reward

    return rewards, ucb

# Example usage
n_arms = 5  # Number of arms
n_rounds = 1000  # Number of rounds
true_means = [0.1, 0.2, 0.3, 0.4, 0.5]  # True means of the arms

rewards, ucb = ucb_simulation(n_arms, n_rounds, true_means)
print("Average Reward:", np.mean(rewards))
print("Counts of each arm:", ucb.counts)
print("Estimated values of each arm:", ucb.values)
