In [205]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [245]:
def generate(list_price):
    # generated 4 bids given the list price, sampled from a gaussian with a mean = listing_price
    return np.random.normal(list_price, 0.05*list_price, 4)

def raise_bid(list_price, agent_bids):
    # whether the seller asks for more offers depending on the average bid
    if agent_bids.mean() > list_price:
        return 1
    else: 
        return 0
    
def choose_action():
    return np.random.choice([0,1])

def simulate_bids(list_price, budget):
    count = 1
    main_bid = np.random.normal(list_price, 0.05*list_price, 1) # generates our bid
    agent_bids = np.append(generate(list_price), main_bid) # generates all the bids
    
    rounds = np.zeros((5,5)) # initializing matrix holding all bids for all rounds
    rounds[0,:] = agent_bids
    #print(f'Round {count} bids at listing price of ${list_price}: \n {agent_bids}\n Mean bid = {np.mean(agent_bids)}' )
    #print('------------------------------------------------------------------------- \n')
    
    s = []
    a = []
    r = []
 
    while raise_bid(list_price, agent_bids) & (count < 5):
        s += [2] # continuation state if more offers elicited
        action = choose_action() # randomly select action
        
        if action == 0: # raise the bid by 5%
            main_bid = 1.05*main_bid
            r += [float(budget - main_bid)/budget] #r(2,0)
        else:
            main_bid # maintain the bid
            r += [0] # r(2,1)
        
        a += [action]
        
        list_price += 0.05*list_price 

        agent_bids = np.append(generate(list_price), main_bid)
        count += 1
        rounds[count - 1,:] = agent_bids
        #print(f'Asking for more offers - Round {count} ...')
        #print(f'Round {count} bids at listing price of ${list_price}: \n {agent_bids}\n Mean bid = {np.mean(agent_bids)}')
        #print('------------------------------------------------------------------------- \n')
    
    if main_bid == np.max(agent_bids):
        s += [0]
        r += [1 - 4*float(budget - main_bid)/budget]
        
    else:
        s += [1]
        r += [0]
    
    #if count>5:
        #print(f'Stopping bids after 5 rounds. House sold to maximum bidder at {np.max(agent_bids)}')
    #else:
        #print(f'Bidding stopped after {count} round(s). House sold to maximum bidder at {np.max(agent_bids)}')
    if count > 1:
        return pd.DataFrame({'s':s[:-1], 'a':a, 'sp':s[1:], 'r':r[:-1]}).append(pd.DataFrame({'s':s[-1], 'a':[2], 'sp':s[-1], 'r':[1 - 4*float(budget - main_bid)/budget]}))
    else:
        return pd.DataFrame({'s':s[0], 'a':[2], 'sp':s[0], 'r':[1 - 4*float(budget - main_bid)/budget]})
    
    

In [247]:
states = []
actions = []

# simulate 10000 bidding scenarios for listing prices and budget of $1mn to $5mn  
df = pd.DataFrame({'s':[], 'a':[], 'sp':[], 'r':[]})
for i in np.random.choice(np.linspace(1e6, 2e6, 100), 100):
    for j in np.random.choice(np.linspace(1e6, 2e6, 100), 100):
        df = df.append(simulate_bids(i,j))


#df = df[df.a !=2]    

In [259]:
# Generate transition matrix (S,A,S) and reward matrix (S,A)
sa = df.groupby(['s', 'a']).size().reset_index().rename(columns = {0:'sa_counts'})
sas = df.groupby(['s', 'a', 'sp']).size().reset_index().rename(columns = {0:'sas_counts'})
sas = sas.merge(sa, on = ['s', 'a']) # state action state dataframe
sas['prob'] = sas.sas_counts/sas.sa_counts # transition probabilities

transition = np.zeros((3,3,3)) # initialize transition matrix
rewards = np.zeros((3,3)) # initialize rewards matrix


for i in range(len(sas)):
    transition[int(sas.s[i]), int(sas.a[i]), int(sas.sp[i])] = sas.prob[i]


transition = transition[:,:-1,:]
rewards[0,:] = df[df.s == 0].r.mean()
rewards[1,:] = df[df.s == 1].r.mean()
for a in range(3):
    rewards[2,a] = df[(df.s == 2)&(df.a == a)].r.mean()
rewards = rewards[:,:-1]

In [262]:
transition

array([[[0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ]],

       [[0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        ]],

       [[0.14011474, 0.34355693, 0.51632833],
        [0.05392599, 0.59025271, 0.3558213 ]]])

In [264]:
rewards

array([[ 1.46341208,  1.46341208],
       [ 1.16512905,  1.16512905],
       [-0.13743629,  0.        ]])