In [34]:
import random
import math
import matplotlib.pyplot as plt

In [35]:
def argmaxall(gen):
    """gen is a generator of (element,value) pairs, where value is a real.
    argmaxall returns a list of all of the elements with maximal value.
    """
    maxv = -math.inf       # negative infinity
    maxvals = []      # list of maximal elements
    for (e,v) in gen:
        if v>maxv:
            maxvals,maxv = [e], v
        elif v==maxv:
            maxvals.append(e)
    return maxvals

In [36]:
def argmaxe(gen):
    """gen is a generator of (element,value) pairs, where value is a real.
    argmaxe returns an element with maximal value.
    If there are multiple elements with the max value, one is returned at random.
    """
    return random.choice(argmaxall(gen))


In [37]:
def argmax(lst):
    """returns maximum index in a list"""
    return argmaxe(enumerate(lst))

In [38]:
def flip(prob):
    """return true with probability prob"""
    return random.random() < prob

In [39]:
def select_from_dist(item_prob_dist):
    ranreal = random.random()
    for (it,prob) in item_prob_dist.items():
        if ranreal < prob:
            return it
        else:
            ranreal -= prob
    raise RuntimeError(f"{item_prob_dist} is not a probability distribution")

In [40]:
class Displayable:
    max_dispaly_level = 1
    def display(self, level=0, *args, **nargs):
        if level <= self.max_dispaly_level:
            self.__display(level, *args, **nargs)

In [41]:
class Plot_history(object):
    def __init__(self, ag, env):
        self.ag = ag
        self.env = env
        plt.ion()
        plt.xlabel('Time')
        plt.ylabel('Value')

    def plot_env_hist(self):
        num = len(env.stock_history)
        plt.plot(range(num),env.price_history,label="Price")
        plt.plot(range(num),env.stock_history,label="In stock")
        plt.legend()

    def plot_agent_hist(self):
        """plot history of buying"""
        num = len(ag.buy_history)
        plt.bar(range(1,num+1), ag.buy_history, label="Bought")
        plt.legend()




In [42]:
class Agent(Displayable):

    def initial_action(self, percept):
        return self.select_action(percept)
    def select_action(self, percept):
        raise NotImplementedError("go")

In [None]:

class Environment(Displayable):
    def initial_percept(self):
        """returns the initial percept for the agent"""
        raise NotImplementedError("initial_percept")

    def do(self, action):
        """does the action in the environment
        returns the next percept """
        raise NotImplementedError("Environment.do")

In [None]:
class Simulate(Displayable):

    def __init__(self, agent, environment):
        self.agent = agent
        self.env = environment
        self.percept = self.env.initial_percept()
        self.percept_history = [self.percept]
        self.action_history = []

    def go(self, n):
        for i in range(n):
            action = self.agent.select_action(self.percept)
            print(f"i={i} action={action}")

            self.percept = self.env.do(action, i)
            print(f"      percept={self.percept}")


In [45]:
class TP_env(Environment):
    price_delta = [0, 0, 0, 21, 0, 20, 0, -64, 0, 0, 23, 0, 0, 0, -35,
        0, 76, 0, -41, 0, 0, 0, 21, 0, 5, 0, 5, 0, 0, 0, 5, 0, -15, 0, 5,
       0, 5, 0, -115, 0, 115, 0, 5, 0, -15, 0, 5, 0, 5, 0, 0, 0, 5, 0,
       -59, 0, 44, 0, 5, 0, 5, 0, 0, 0, 5, 0, -65, 50, 0, 5, 0, 5, 0, 0,
       0, 5, 0]
    sd = 5

    def __init__(self):
        """paper buying agent"""
        self.time=0
        self.stock=20
        self.stock_history = []
        self.price_history = []

    def initial_percept(self):
        """return initial percept"""
        self.stock_history.append(self.stock)
        self.price = round(234+self.sd*random.gauss(0,1))
        self.price_history.append(self.price)
        return {'price': self.price,
                'instock': self.stock}

    def do(self, action, time_unit):
        """does action (buy) and returns percept consisting of price and instock"""
        used = select_from_dist({6:0.1, 5:0.1, 4:0.1, 3:0.3, 2:0.2, 1:0.2})
        print(f"i={time_unit} used={used}")
        bought = action['buy']
        self.stock = self.stock+bought-used
        self.stock_history.append(self.stock)
        self.time += 1
        self.price =  round(self.price
                        + self.price_delta[self.time%len(self.price_delta)]
                        + self.sd*random.gauss(0,1))
        self.price_history.append(self.price)
        return {'price': self.price,
                'instock': self.stock}

In [46]:
class PriceMonitoringController:
    """Monitors the price and determines if it is below the threshold."""
    def __init__(self, threshold_discount=0.2):
        self.threshold_discount = threshold_discount

    def is_price_below_threshold(self, current_price, average_price):
        """Check if the price is below the threshold."""
        return current_price < (1 - self.threshold_discount) * average_price

In [47]:
class InventoryMonitoringController:
    """Monitors inventory levels and checks if restocking is needed."""
    def __init__(self, critical_threshold=10):
        self.critical_threshold = critical_threshold

    def is_inventory_critical(self, current_stock):
        """Check if inventory is below the critical threshold."""
        return current_stock < self.critical_threshold

In [48]:
class OrderingController:
    """Decides the quantity to order based on inputs from other controllers."""
    def __init__(self):
        self.buy_history = []
        self.spent = 0

    def decide_order(self, current_price, average_price, current_stock, price_controller, inventory_controller):
        """Decide the quantity to order."""
        if price_controller.is_price_below_threshold(current_price, average_price) and current_stock >= 10:
            tobuy = 15
        elif inventory_controller.is_inventory_critical(current_stock):
            tobuy = 10
        else:
            tobuy = 0

        self.buy_history.append(tobuy)
        self.spent += tobuy * current_price
        return tobuy

In [49]:
class TP_agent(Agent):
    """Agent integrates all controllers and selects the action."""
    def __init__(self):
        percept = env.initial_percept()
        self.ave = self.last_price = percept['price']
        self.instock = percept['instock']

        # Initialize controllers
        self.price_controller = PriceMonitoringController(threshold_discount=0.2)
        self.inventory_controller = InventoryMonitoringController(critical_threshold=10)
        self.ordering_controller = OrderingController()

    def select_action(self, percept):
        """Return the next action to carry out."""
        self.last_price = percept['price']
        self.ave = self.ave + (self.last_price - self.ave) * 0.05
        self.instock = percept['instock']

        tobuy = self.ordering_controller.decide_order(
            self.last_price,
            self.ave,
            self.instock,
            self.price_controller,
            self.inventory_controller
        )
        return {'buy': tobuy}


In [51]:

env = TP_env()
ag = TP_agent()
sim = Simulate(ag,env)
sim.go(10)
ag.ordering_controller.spent/env.time

i=0 action={'buy': 0}
i=0 used=5
      percept={'price': 227, 'instock': 15}
i=1 action={'buy': 0}
i=1 used=1
      percept={'price': 217, 'instock': 14}
i=2 action={'buy': 0}
i=2 used=3
      percept={'price': 228, 'instock': 11}
i=3 action={'buy': 0}
i=3 used=1
      percept={'price': 227, 'instock': 10}
i=4 action={'buy': 0}
i=4 used=1
      percept={'price': 256, 'instock': 9}
i=5 action={'buy': 10}
i=5 used=2
      percept={'price': 249, 'instock': 17}
i=6 action={'buy': 0}
i=6 used=5
      percept={'price': 186, 'instock': 12}
i=7 action={'buy': 0}
i=7 used=1
      percept={'price': 194, 'instock': 11}
i=8 action={'buy': 0}
i=8 used=1
      percept={'price': 196, 'instock': 10}
i=9 action={'buy': 0}
i=9 used=3
      percept={'price': 228, 'instock': 7}


256.0

In [52]:
sim.go(100);



i=0 action={'buy': 10}
i=0 used=6
      percept={'price': 227, 'instock': 11}
i=1 action={'buy': 0}
i=1 used=3
      percept={'price': 230, 'instock': 8}
i=2 action={'buy': 10}
i=2 used=4
      percept={'price': 226, 'instock': 14}
i=3 action={'buy': 0}
i=3 used=6
      percept={'price': 191, 'instock': 8}
i=4 action={'buy': 10}
i=4 used=3
      percept={'price': 186, 'instock': 15}
i=5 action={'buy': 0}
i=5 used=2
      percept={'price': 263, 'instock': 13}
i=6 action={'buy': 0}
i=6 used=3
      percept={'price': 258, 'instock': 10}
i=7 action={'buy': 0}
i=7 used=1
      percept={'price': 215, 'instock': 9}
i=8 action={'buy': 10}
i=8 used=1
      percept={'price': 219, 'instock': 18}
i=9 action={'buy': 0}
i=9 used=4
      percept={'price': 230, 'instock': 14}
i=10 action={'buy': 0}
i=10 used=3
      percept={'price': 238, 'instock': 11}
i=11 action={'buy': 0}
i=11 used=1
      percept={'price': 268, 'instock': 10}
i=12 action={'buy': 0}
i=12 used=2
      percept={'price': 267, 'instoc