In [47]:
import numpy as np
import scipy.stats as sts
import pandas as pd
from functools import lru_cache

binomial_param_n = 20
binomial_param_p = 0.5
number_of_potential_buyers = 20

action_set_dict = {}

print("Action sets:")

# Cache to make program much faster
@lru_cache(maxsize=None)
def value(current_items: int, current_offer: int = 0, current_stage: int = 0, do_not_print=True, history = None):
    if current_stage > number_of_potential_buyers:
        return 0

    if current_items == 0:
        return 0

    # CASE do not take
    value_do_not_take = 0
    for x in range(binomial_param_n + 1):
        value_do_not_take += sts.binom.pmf(x, binomial_param_n, binomial_param_p) * value(current_items, x + 20, current_stage + 1)

    # CASE do take
    value_do_take = current_offer
    for x in range(binomial_param_n + 1):
        value_do_take += sts.binom.pmf(x, binomial_param_n, binomial_param_p) * value(current_items - 1, x + 20, current_stage + 1)

    print(f"Items left: {current_items}, stage: {current_stage}, current offer: {current_offer}")
    
    if value_do_take > value_do_not_take:
        print(" TAKE offer")
        action_set_dict[(current_items, current_stage, current_offer)] = 1

    else:
        print(" DO NOT TAKE offer")
        action_set_dict[(current_items, current_stage, current_offer)] = 0

    print("")

    return max(value_do_not_take, value_do_take)

value(10)

Action sets:
Items left: 10, stage: 20, current offer: 20
 TAKE offer

Items left: 10, stage: 20, current offer: 21
 TAKE offer

Items left: 10, stage: 20, current offer: 22
 TAKE offer

Items left: 10, stage: 20, current offer: 23
 TAKE offer

Items left: 10, stage: 20, current offer: 24
 TAKE offer

Items left: 10, stage: 20, current offer: 25
 TAKE offer

Items left: 10, stage: 20, current offer: 26
 TAKE offer

Items left: 10, stage: 20, current offer: 27
 TAKE offer

Items left: 10, stage: 20, current offer: 28
 TAKE offer

Items left: 10, stage: 20, current offer: 29
 TAKE offer

Items left: 10, stage: 20, current offer: 30
 TAKE offer

Items left: 10, stage: 20, current offer: 31
 TAKE offer

Items left: 10, stage: 20, current offer: 32
 TAKE offer

Items left: 10, stage: 20, current offer: 33
 TAKE offer

Items left: 10, stage: 20, current offer: 34
 TAKE offer

Items left: 10, stage: 20, current offer: 35
 TAKE offer

Items left: 10, stage: 20, current offer: 36
 TAKE offer

I

315.5626377620955

In [49]:
# This Python dictionary stores whether an action should be taken or not
action_set_dict

{(10, 20, 20): 1,
 (10, 20, 21): 1,
 (10, 20, 22): 1,
 (10, 20, 23): 1,
 (10, 20, 24): 1,
 (10, 20, 25): 1,
 (10, 20, 26): 1,
 (10, 20, 27): 1,
 (10, 20, 28): 1,
 (10, 20, 29): 1,
 (10, 20, 30): 1,
 (10, 20, 31): 1,
 (10, 20, 32): 1,
 (10, 20, 33): 1,
 (10, 20, 34): 1,
 (10, 20, 35): 1,
 (10, 20, 36): 1,
 (10, 20, 37): 1,
 (10, 20, 38): 1,
 (10, 20, 39): 1,
 (10, 20, 40): 1,
 (9, 20, 20): 1,
 (9, 20, 21): 1,
 (9, 20, 22): 1,
 (9, 20, 23): 1,
 (9, 20, 24): 1,
 (9, 20, 25): 1,
 (9, 20, 26): 1,
 (9, 20, 27): 1,
 (9, 20, 28): 1,
 (9, 20, 29): 1,
 (9, 20, 30): 1,
 (9, 20, 31): 1,
 (9, 20, 32): 1,
 (9, 20, 33): 1,
 (9, 20, 34): 1,
 (9, 20, 35): 1,
 (9, 20, 36): 1,
 (9, 20, 37): 1,
 (9, 20, 38): 1,
 (9, 20, 39): 1,
 (9, 20, 40): 1,
 (10, 19, 20): 1,
 (10, 19, 21): 1,
 (10, 19, 22): 1,
 (10, 19, 23): 1,
 (10, 19, 24): 1,
 (10, 19, 25): 1,
 (10, 19, 26): 1,
 (10, 19, 27): 1,
 (10, 19, 28): 1,
 (10, 19, 29): 1,
 (10, 19, 30): 1,
 (10, 19, 31): 1,
 (10, 19, 32): 1,
 (10, 19, 33): 1,
 (10, 19, 34)

## D

In [69]:
def simulate_one():
    bids: list = sts.binom.rvs(20, 0.5, size=20) #type:ignore
    items_available = 10
    profit = 0
    actions = []

    for stage in range(1, 21):
        current_bid = bids[stage-1]

        if items_available == 0:
            break

        if action_set_dict.get((items_available, stage, current_bid + 20), 0) == 1:
            profit += current_bid + 20
            items_available -= 1

            actions.append((items_available, stage, current_bid + 20, 1))
        
        else:
            actions.append((items_available, stage, current_bid + 20, 0))

    return profit, actions

def simulate_100():
    profit = np.zeros(100)
    sequences = np.zeros(100)
    for i in range(100):
        profit[i] = simulate_one()[0]

    return profit.mean()

simulate_100()

315.12