In [9]:
import numpy as np

In [10]:
class BaseEpsilonGreedy:
    def __init__(self, num_arms, epsilon):
        self.num_arms = num_arms
        self.epsilon = epsilon
        self.arm_counts = np.zeros(num_arms)

    def select_arm(self):
        if np.random.rand() < self.epsilon:
            return np.random.choice(self.num_arms)
        else:
            return np.argmax(self._get_values())

    def decay_epsilon(self, decay_rate):
        self.epsilon *= decay_rate

    def _get_values(self):
        raise NotImplementedError

    def update(self, chosen_arm, reward):
        self.arm_counts[chosen_arm] += 1
        count = self.arm_counts[chosen_arm]
        value = self._get_arm_value(chosen_arm)
        self._update_arm_value(chosen_arm, ((count - 1) / count) * value + (1 / count) * reward)

    def _get_arm_value(self, chosen_arm):
        raise NotImplementedError

    def _update_arm_value(self, chosen_arm, value):
        raise NotImplementedError

In [11]:
class EpsilonGreedy(BaseEpsilonGreedy):
    def __init__(self, num_arms, epsilon):
        super().__init__(num_arms, epsilon)
        self.arm_values = np.zeros(num_arms)

    def _get_values(self):
        return self.arm_values

    def _get_arm_value(self, chosen_arm):
        return self.arm_values[chosen_arm]

    def _update_arm_value(self, chosen_arm, value):
        self.arm_values[chosen_arm] = value

In [12]:
class ContextualEpsilonGreedy(BaseEpsilonGreedy):
    def __init__(self, num_arms, num_features, epsilon):
        super().__init__(num_arms, epsilon)
        self.num_features = num_features
        self.arm_values = np.zeros((num_arms, num_features))

    def _get_values(self, state):
        return np.dot(self.arm_values, state)

    def select_arm(self, state):
        self.state = state  # store the current state
        return super().select_arm()

    def _get_arm_value(self, chosen_arm):
        return self.arm_values[chosen_arm]

    def _update_arm_value(self, chosen_arm, value):
        self.arm_values[chosen_arm] = value * self.state

    def update(self, chosen_arm, reward):
        super().update(chosen_arm, reward)
        self.state = None  # clear the stored state

if we have previously engaged with this customer before -> contextual. else -> context-free

In [13]:
def calculate_reward(result):
  

SyntaxError: EOL while scanning string literal (1721270393.py, line 1)

In [None]:
def previously_engaged(state):
  

In [None]:
# Initialization
num_features = len(state)  # assuming state is a numpy array or list
contextual_bandit = ContextualEpsilonGreedy(num_arms=10, num_features=num_features, epsilon=1.0)
noncontextual_bandit = EpsilonGreedy(num_arms=10, epsilon=1.0)

# Initialize dictionaries to store rewards, clicks and spend for each bandit
data = {"contextual": {"rewards": [], "clicks": [], "spend": []},
        "noncontextual": {"rewards": [], "clicks": [], "spend": []}}

# Loop over ad opportunities
for opportunity in ad_opportunities:
    # Get the current state (user, ad, market information, etc.)
    state = opportunity.state

    # Determine if we have previously engaged with this customer
    if previously_engaged(state):
        bandit = contextual_bandit
        bandit_type = "contextual"
    else:
        bandit = noncontextual_bandit
        bandit_type = "noncontextual"

    # Let the bandit choose an arm (bid price range)
    arm = bandit.select_arm() if bandit_type == "noncontextual" else bandit.select_arm(state)

    # Submit the bid and get the result
    result = submit_bid(state, arm)

    # Calculate the reward
    reward = calculate_reward(result)

    # Update the bandit
    if bandit_type == "contextual":
        bandit.update(arm, reward, state)
    else:
        bandit.update(arm, reward)

    # Store the results for analysis
    data[bandit_type]["rewards"].append(reward)
    data[bandit_type]["clicks"].append(result.click)
    data[bandit_type]["spend"].append(result.cost)

# Analyze the results
for bandit_type, results in data.items():
    print(f"Results for {bandit_type} bandit:")
    analyze_results(results["rewards"], results["clicks"], results["spend"])
