<a href="https://colab.research.google.com/github/sjpritchard2001/test/blob/main/Pricing_Via_Reinforcement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# **Scenario:**
* The business sells a product online and wants to use reinforcement learning to determine the best price point for the product each day.

* The agent can choose a price from a set of possible prices (e.g., $10, $15, $20, $25).

* The environment responds with sales volume based on the price, and the agent receives a reward in the form of profit (price * quantity sold).

In [1]:
import random

class PricingAgent:
    def __init__(self):
        self.prices = [10, 15, 20, 25]  # Possible price points
        self.q_values = {price: 0 for price in self.prices}  # Q-values (initially 0)

    def choose_action(self):
        # Exploration vs. exploitation
        if random.random() < 0.1:  # 10% chance to explore
            return random.choice(self.prices)
        else:
            return max(self.q_values, key=self.q_values.get)  # Exploit best price

    def update_q_value(self, price, reward):
        # Update Q-value based on reward (simple Q-learning update rule)
        learning_rate = 0.1
        self.q_values[price] += learning_rate * (reward - self.q_values[price])

In [2]:
class BusinessEnvironment:
    def __init__(self):
        self.demand_factor = 100  # Base demand

    def get_sales(self, price):
        # Simulate sales based on price (simplified model)
        if price < 15:
            sales = self.demand_factor - (15 - price) * 10
        elif price > 20:
            sales = self.demand_factor - (price - 20) * 5
        else:
            sales = self.demand_factor
        return sales

    def get_reward(self, price):
        sales = self.get_sales(price)
        return sales * price  # Profit is price * quantity sold

In [3]:
# Initialize the environment and agent
environment = BusinessEnvironment()
agent = PricingAgent()

In [4]:
for day in range(100):
    price = agent.choose_action()  # Agent chooses a price
    reward = environment.get_reward(price)  # Environment gives reward based on sales
    agent.update_q_value(price, reward)  # Agent learns from the reward

    print(f"Day {day + 1}: Price set to {price}, Profit: {reward}")

Day 1: Price set to 10, Profit: 500
Day 2: Price set to 15, Profit: 1500
Day 3: Price set to 15, Profit: 1500
Day 4: Price set to 15, Profit: 1500
Day 5: Price set to 15, Profit: 1500
Day 6: Price set to 15, Profit: 1500
Day 7: Price set to 15, Profit: 1500
Day 8: Price set to 10, Profit: 500
Day 9: Price set to 15, Profit: 1500
Day 10: Price set to 15, Profit: 1500
Day 11: Price set to 15, Profit: 1500
Day 12: Price set to 15, Profit: 1500
Day 13: Price set to 15, Profit: 1500
Day 14: Price set to 15, Profit: 1500
Day 15: Price set to 15, Profit: 1500
Day 16: Price set to 15, Profit: 1500
Day 17: Price set to 15, Profit: 1500
Day 18: Price set to 10, Profit: 500
Day 19: Price set to 15, Profit: 1500
Day 20: Price set to 15, Profit: 1500
Day 21: Price set to 15, Profit: 1500
Day 22: Price set to 15, Profit: 1500
Day 23: Price set to 15, Profit: 1500
Day 24: Price set to 15, Profit: 1500
Day 25: Price set to 15, Profit: 1500
Day 26: Price set to 15, Profit: 1500
Day 27: Price set to 15,