In [1]:
import pandas as pd
import numpy as np

In [2]:
guests = pd.read_excel("data/guests.xlsx", index_col=0)
guests.drop("guest", axis= 1, inplace = True)
guests = 1 - guests 
vec_discount = guests["discount"].values #converts the discount column to a vector

In [3]:
hotels = pd.read_excel("data/hotels.xlsx", index_col=0)
vec_prices = hotels["price"].values

In [4]:
preferences = pd.read_excel("data/preferences.xlsx", index_col=0)
preferences["guest"] = pd.to_numeric(preferences["guest"].str.replace('guest_','')) - 1
preferences["hotel"] = pd.to_numeric(preferences["hotel"].str.replace('hotel_','')) - 1
pref_pivoted = pd.pivot_table(preferences, index ='guest', columns ='hotel', values= "priority" , aggfunc="min") #it pivots the original dataframe to get to a matrix-like format
rank_matrix = pref_pivoted.rank(axis = 1, method="dense").values #reorders preferences

In [5]:
#This function creates a utility matrix from the ranked preferences matrix
def to_utility_matrix(rank_matrix):
  user_utility = []
  for row in rank_matrix:
    utility_values = np.linspace(start=1, stop= 0.1, num=len(set(row[~np.isnan(row)]))) #utility goes from 1 to 0.1 split evenly between preferences
    user_utility.append([utility_values[int(i)-1] if i == i else i for i in row ]) #i == i to check if i is not nan 
  return np.nan_to_num(np.array(user_utility), nan = 0)

In [6]:
utility_matrix = to_utility_matrix(rank_matrix)

In [7]:
# def print_output(res, guest_count, room_count, hotel_count, t):
#   res_avg = np.round_(np.array(res).mean(axis=0), 3)
#   print(f"With {t} iterations, on average we have: \n{res_avg[0]}/{guest_count} guests placed \n{res_avg[1]}/{room_count} rooms occupied \n{res_avg[2]}/{hotel_count} hotels occupied \n{res_avg[3]} total revenue \n{res_avg[4]} total satisfaction")

In [8]:
def results(choice_matrix, vec_prices=vec_prices, vec_discount=vec_discount, utility_matrix=utility_matrix):
  guests_placed = choice_matrix.sum(axis=1).sum()
  rooms_occupied = choice_matrix.sum(axis=0).sum()
  hotels_occupied = np.where(choice_matrix.sum(axis=0) > 0, 1, 0).sum()
  prices_matrix = vec_prices * choice_matrix #all 1s in the matrix will be replaced by that hotel's price
  discount_matrix = np.multiply(prices_matrix, vec_discount[:, np.newaxis]) #the room price is then discounted for each user
  revenue = discount_matrix.sum()
  utility = np.multiply(choice_matrix, utility_matrix).sum()
  return([guests_placed, rooms_occupied, hotels_occupied, revenue, utility])

In [9]:
def random_model(hotels, vec_prices, vec_discount, rank_matrix, t=1000):
  guest_count, room_count, hotel_count = rank_matrix.shape[0] , hotels["rooms"].sum(), rank_matrix.shape[1]
  least = min([guest_count, room_count])
  res = []
  vec_rooms = np.repeat(hotels["price"], hotels["rooms"]).index.values #Denormalized the dataframe by adding rows. This vec contains the hotel number as many times as its rooms 
  for _ in range(t):
    choice_matrix = np.zeros(rank_matrix.shape) #creates filled with zeros 4000 x 400
    random_rooms = np.random.choice(vec_rooms, least, replace=False ) #selects randomly 4000 rooms from all possible rooms
    choice_matrix[tuple(range(least)),tuple(random_rooms)] = 1 #assign 1 to all occupied rooms given their indices
    res.append(results(choice_matrix, vec_prices, vec_discount))
  return list(np.array(res).mean(axis=0))

In [10]:
def preference_model(hotels, vec_prices, rank_matrix):
  rank_matrix_filled, choice_matrix  = np.nan_to_num(rank_matrix), np.zeros(rank_matrix.shape) 
  rooms = hotels["rooms"].values
  for idx, row in enumerate(rank_matrix_filled):
    c = np.multiply(row, np.where(rooms > 0, 1, 0) ) #sets preferences values to 0 where there is no room availability
    try:
      user_choice = np.where(c == np.min(c[np.nonzero(c)]), 1, 0) #The minimum nonzero is converted to 1 and the rest to 0
      choice_matrix[(idx),] = user_choice
      rooms = rooms - user_choice #subtract 1 from the room availability where it's now occupied 
    except ValueError: #pass if array are all zeros
     pass
  return  results(choice_matrix)

In [11]:
def room_model(hotels,rank_matrix):

    pref_pivoted=pd.DataFrame(rank_matrix).T
    pref_pivoted["price"] = hotels["price"]
    pref_pivoted["rooms"] =  hotels["rooms"] #add a column with number of rooms per hotel
    h_rooms = pref_pivoted.sort_values(by=['rooms','price'], ascending=[False,True]) #sorted by number of rooms
    h_rooms = h_rooms.drop(columns=["rooms","price"]) # drop column with number of rooms
    #dup = h_rooms



    choice_matrix = np.zeros(rank_matrix.shape) 
    v = hotels["rooms"].values

    for i,r in dup.iterrows():
        x = r.sort_values()
        x = x.dropna()
        c = x[:v[i]]
        for idx in c.index:
            choice_matrix[idx][i] = 1
            #dup.drop([idx], axis=1) ??????
            dup.iloc[:][idx]= np.nan
        
        
        
        
    return  results(choice_matrix)

In [68]:
def price_model(hotels,rank_matrix):

    pref_pivoted = pd.DataFrame(rank_matrix).T
    pref_pivoted["price"] = hotels["price"]
    pref_pivoted["rooms"] =  hotels["rooms"] #add a column with number of rooms per hotel
    h_rooms = pref_pivoted.sort_values(by=['price', 'rooms'], ascending=[True, False]) #sorted by number of rooms
    h_rooms = h_rooms.drop(columns=["rooms","price"]) # drop column with number of rooms
    #dup = h_rooms

    choice_matrix = np.zeros(rank_matrix.shape) 
    v = hotels["rooms"].values

    for i, rows in h_rooms.iterrows():
        #print(type(rows))
        x = pd.DataFrame(rows.values, columns=['values'])
        #print(x.columns)
        x['index'] = x.index.values
        x = x.dropna().sort_values(by=['values','index'])['index']
        c = x[:v[i]].values
        
        for idx in c:
            choice_matrix[idx][i] = 1
            #dup.drop([idx], axis=1) ??????
            h_rooms.iloc[:][idx]= np.nan
        
        
        
        
    return  results(choice_matrix)

In [13]:
random_model(hotels, vec_prices, vec_discount, rank_matrix, t=1000)

[4000.0, 4000.0, 399.997, 640429.1715700013, 132.16244036769965]

In [14]:
preference_model(hotels, vec_prices, rank_matrix)

[3975.0, 3975.0, 400, 643597.5300000005, 3895.433198505531]

In [15]:
room_model(hotels,rank_matrix)

[4000.0, 4000.0, 343, 633445.5199999999, 3680.0579936329723]

In [69]:
price_model(hotels,rank_matrix)

[3990.0, 3990.0, 383, 578798.2299999997, 3685.490018624015]