In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import time

In [2]:
# 17 states : 
    # state 0 : offer accepted 
    # state 1: offer rejected
    # state 2-16 : listing price raised in 1% increments till 15%

# 27 actions : 
    # action 0: bid same
    # action 1-26: bid higher in 1% increments till 25%

In [3]:
def generate(list_price):
    # generated 4 bids given the list price, sampled from a gaussian with a mean = listing_price
    return np.random.normal(list_price, 0.05*list_price, 4)

def raise_bid(list_price, agent_bids):
    # whether the seller asks for more offers depending on the average bid
    if np.median(agent_bids) > list_price:
        return 1
    else: 
        return 0
    
def choose_action():
    # chooses random action 
    return np.random.choice(26)

def choose_state():
    return np.random.choice(np.arange(2,17,1))

def new_list_price(s, list_price):
    return (1+(s-1)*0.01)*list_price

def simulate_bids(list_price, budget):
    count = 1
    main_bid = np.random.normal(list_price, 0.05*list_price, 1) # generates our bid
    agent_bids = np.append(generate(list_price), main_bid) # generates all the bids
    
    rounds = np.zeros((5,5)) # initializing matrix holding all bids for all rounds
    rounds[0,:] = agent_bids
    #print(f'Round {count} bids at listing price of ${list_price}: \n {agent_bids}\n Mean bid = {np.mean(agent_bids)}' )
    #print('------------------------------------------------------------------------- \n')
    
    s = []
    a = []
    r = []
 
    while raise_bid(list_price, agent_bids[:-1]) & (count < 5):
        
        # randomly choose continuation state and increase list price correspondingly
        state = choose_state() 
        s += [state] 
        list_price = new_list_price(state, list_price)
        
        # choose random action 
        actions = np.arange(1, 1.26, 0.01) # 26 actions. 
        action = choose_action() # randomly select action
        a += [action]
        main_bid = actions[action]*main_bid # Main-bid raised by the action
        
        if action > 0: # raise the bid by x%
            r += [float(budget - main_bid)/budget] #r(2,1:)
            
        else:
            r += [0] # Maintain the bid

        agent_bids = np.append(generate(list_price), main_bid)
        count += 1
        rounds[count - 1,:] = agent_bids
        #print(f'Asking for more offers - Round {count} ...')
        #print(f'Round {count} bids at listing price of ${list_price}: \n {agent_bids}\n Mean bid = {np.mean(agent_bids)}')
        #print('------------------------------------------------------------------------- \n')
    
    if main_bid == np.max(agent_bids):
        # if the our bid is accepted
        s += [0]
        r += [1 - 4*float(budget - main_bid)/budget]
        
    else:
        # if our bid is rejected
        s += [1]
        r += [0]
    
    if count > 1:
        return pd.DataFrame({'s':s[:-1], 'a':a, 'sp':s[1:], 'r':r[:-1]}).append(pd.DataFrame({'s':s[-1], 'a':[26], 'sp':s[-1], 'r':r[-1]}))
    else:
        return pd.DataFrame({'s':s[0], 'a':[26], 'sp':s[0], 'r':r[-1]})

In [4]:
def optimal_policy_dictionary():
    
    best_policy = [0, 0, 14, 13, 14, 16, 15, 16, 19, 24, 17, 22, 20, 25, 24, 23, 25]
    # set up dictionary that maps states to best actions
    policy_dict = {}
    # index through states
    for i in range(0, 17):
        policy_dict[i] = best_policy[i]
        
    return policy_dict

def choose_best_action(state): 
    return policy_dict[state]


def simulate_best_bids(list_price, budget):
    count = 1
    main_bid = np.random.normal(list_price, 0.05*list_price, 1) # generates our bid
    agent_bids = np.append(generate(list_price), main_bid) # generates all the bids
    
    rounds = np.zeros((5,5)) # initializing matrix holding all bids for all rounds
    rounds[0,:] = agent_bids
    #print(f'Round {count} bids at listing price of ${list_price}: \n {agent_bids}\n Mean bid = {np.mean(agent_bids)}' )
    #print('------------------------------------------------------------------------- \n')
    
    s = []
    a = []
    r = []
 
    while raise_bid(list_price, agent_bids[:-1]) & (count < 5):
        
        # randomly choose continuation state and increase list price correspondingly
        state = choose_state() 
        s += [state] 
        list_price = new_list_price(state, list_price)
        
        # choose random action 
        actions = np.arange(1, 1.26, 0.01) # 26 actions. 
        action = choose_best_action(state) # pick best action
        a += [action]
        main_bid = actions[action]*main_bid # Main-bid raised by the action
        
        if action > 0: # raise the bid by x%
            r += [float(budget - main_bid)/budget] #r(2,1:)
            
        else:
            r += [0] # Maintain the bid

        agent_bids = np.append(generate(list_price), main_bid)
        count += 1
        rounds[count - 1,:] = agent_bids
        #print(f'Asking for more offers - Round {count} ...')
        #print(f'Round {count} bids at listing price of ${list_price}: \n {agent_bids}\n Mean bid = {np.mean(agent_bids)}')
        #print('------------------------------------------------------------------------- \n')
    
    if main_bid == np.max(agent_bids):
        # if the our bid is accepted
        s += [0]
        r += [1 - 4*float(budget - main_bid)/budget]
        
    else:
        # if our bid is rejected
        s += [1]
        r += [0]
    
    if count > 1:
        return pd.DataFrame({'s':s[:-1], 'a':a, 'sp':s[1:], 'r':r[:-1]}).append(pd.DataFrame({'s':s[-1], 'a':[26], 'sp':s[-1], 'r':r[-1]}))
    else:
        return pd.DataFrame({'s':s[0], 'a':[26], 'sp':s[0], 'r':r[-1]})

In [5]:
policy_dict = optimal_policy_dictionary()

In [6]:
budget_austin = 6e5
budget_san_jose = 1.3e6
# from zillow website
# # accessed 03.17.2023
# https://www.zillow.com/home-values/33839/san-jose-ca/
average_san_jose_home_price = 1273535

In [7]:
simulate_best_bids(average_san_jose_home_price, budget_san_jose)

Unnamed: 0,s,a,sp,r
0,1,26,1,0


In [8]:
simulate_bids(average_san_jose_home_price, budget_san_jose)


Unnamed: 0,s,a,sp,r
0,4,0,14,0.0
1,14,17,11,-0.158324
2,11,21,0,-0.401571
0,0,26,0,2.606286


In [9]:
count_random_accept = 0
random_rewards = []
count_best_accept = 0
best_rewards = []
for i in range(0, 10000):
    df1 = simulate_bids(average_san_jose_home_price, budget_san_jose)
    last_row1 = df1.iloc[len(df1) - 1]
    if last_row1['s'] == 0:
        count_random_accept += 1
        random_rewards.append(last_row1['r'])
    df = simulate_best_bids(average_san_jose_home_price, budget_san_jose)
    last_row = df.iloc[len(df) - 1]
    if last_row['s'] == 0:
        count_best_accept += 1
        best_rewards.append(last_row['r'])

In [10]:
print('optimal policy leads to an accepted bid '+ str(count_best_accept - count_random_accept) + ' more times.')
print('the mean reward is ' + str(np.mean(best_rewards) - np.mean(random_rewards) )+ ' higher')

optimal policy leads to an accepted bid 1461 more times.
the mean reward is 0.39293156360317205 higher


In [13]:
print(count_best_accept)
print(count_random_accept)

5897
4436
