In [1]:
import pandas as pd
import numpy as np

In [2]:
guests = pd.read_excel("/content/drive/MyDrive/guests.xlsx", index_col=0)
guests.drop("guest", axis= 1, inplace = True)
guests = 1 - guests 
vec_discount = guests["discount"].values #converts the discount column to a vector

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/guests.xlsx'

In [3]:
hotels = pd.read_excel("/content/drive/MyDrive/hotels.xlsx", index_col=0)
vec_prices = hotels["price"].values

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/hotels.xlsx'

In [9]:
preferences = pd.read_excel("/content/drive/MyDrive/preferences.xlsx", index_col=0)
preferences["guest"] = pd.to_numeric(preferences["guest"].str.replace('guest_',''))
preferences["hotel"] = pd.to_numeric(preferences["hotel"].str.replace('hotel_',''))
pref_pivoted = pd.pivot_table(preferences, index ='guest', columns ='hotel', values= "priority" ) #it pivots the original dataframe to get to a matrix-like format
pref_pivoted_ranked = pref_pivoted.rank(axis = 1, method="dense") #revrite preferences to not include decimal values. Equal preferences are kept dense 
rank_matrix = pref_pivoted_ranked.values #dataframe to matrix

In [10]:
#This function creates a utility matrix from the ranked preferences matrix
def to_utility_matrix(rank_matrix):
  user_utility = []
  for row in rank_matrix:
    utility_values = np.linspace(start=1, stop= 0.1, num=len(set(row[~np.isnan(row)]))) #utility goes from 1 to 0.1 split evenly between preferences
    user_utility.append([utility_values[int(i)-1] if i == i else i for i in row ]) #i == i to check if i is not nan 
  return np.nan_to_num(np.array(user_utility), nan = 0)

In [11]:
utility_matrix = to_utility_matrix(rank_matrix)

In [12]:
def print_output(res, guest_count, room_count, hotel_count, t):
  res_avg = np.round_(np.array(res).mean(axis=0), 3)
  print(f"With {t} iterations of the random model, on average we have: \n{res_avg[0]}/{guest_count} guests placed \n{res_avg[1]}/{room_count} rooms occupied \n{res_avg[2]}/{hotel_count} hotels occupied \n{res_avg[3]} total revenue \n{res_avg[4]} total satisfaction")

In [13]:
def results(choice_matrix, vec_prices, vec_discount, utility_matrix):
  guests_placed = choice_matrix.sum(axis=1).sum()
  rooms_occupied = choice_matrix.sum(axis=0).sum()
  hotels_occupied = np.where(choice_matrix.sum(axis=0) > 0, 1, 0).sum()
  prices_matrix = vec_prices * choice_matrix #all 1s in the matrix will be replaced by that hotel's price
  discount_matrix = np.multiply(prices_matrix, vec_discount[:, np.newaxis]) #the room price is then discounted for each user
  revenue = discount_matrix.sum()
  utility = np.multiply(choice_matrix, utility_matrix).sum()
  return([guests_placed, rooms_occupied, hotels_occupied, revenue, utility])

In [24]:
def random_model(hotels, vec_prices, vec_discount, rank_matrix, utility_matrix, t=1000):
  guest_count, room_count, hotel_count =  len(vec_discount), hotels["rooms"].sum(), len(hotels)
  res= []
  hotels_denorm = np.repeat(hotels["price"],hotels["rooms"]) #the room price is duplicated in a new row for each room in a hotel. This removes one dimension
  vec_rooms = hotels_denorm.index.values # vector containing an hotel number for as many room it has
  for _ in range(t):
    choice_matrix = np.zeros(rank_matrix.shape) #creates filled with zeros 4000 x 400
    random_rooms = np.random.choice(vec_rooms, min([room_count, guest_count]), replace=False ) #selects randomly 4000 rooms from all possible rooms
    for i, j in enumerate(random_rooms): 
      choice_matrix[i][j] = 1 #in this matrix we will have all zeros except a 1 per row
    res.append(results(choice_matrix, vec_prices, vec_discount, utility_matrix))
  return print_output(res, guest_count, room_count, hotel_count, t)

In [31]:
def preference_model(hotels, vec_prices, vec_discount, rank_matrix, utility_matrix, t=1000):
  rank_matrix_filled = np.nan_to_num(rank_matrix, 0) #fill nans with zeros
  guest_count, room_count, hotel_count =  len(vec_discount), hotels["rooms"].sum(), len(hotels)
  res = []
  for _ in range(t):
    rooms = hotels["rooms"].values
    choice_matrix = []
    for row in rank_matrix_filled:
      room_mask = np.where(rooms > 0, 1, 0) # converts room vec so that rooms with more than 0 availability equal 1
      c = np.multiply(row, room_mask ) #sets preferences values to 0 where there is no availability
      try:
        c = np.where(c > np.min(c[np.nonzero(c)]), 0, c) #replaces all values with 0 except the minimum nonzero
        c = np.where( c > 0, 1, c ) #converts all values bigger than 0 into 1
      except ValueError: #pass if array are all zeros
        pass
      if c.sum() > 1: #in case there are more than one preference
        idx = list(np.where(c != 0)[0]) # get the index of the non-zero values
        idx.pop(np.random.randint(0, len(idx))) #randomly eliminates one of those indexes
        c[idx] = 0 #values at the remaining indexes are set to 0
      rooms = rooms - c #subtract 1 from the room availability where it's now occupied 
      choice_matrix.append(c)
    res.append(results(np.array(choice_matrix), vec_prices, vec_discount, utility_matrix))
  return print_output(res, guest_count, room_count, hotel_count, t)

In [28]:
random_model(hotels, vec_prices, vec_discount, rank_matrix, utility_matrix, t=100)

With 100 iterations of the random model, on average we have: 
4000.0/4000 guests placed 
4000.0/4617 rooms occupied 
400.0/400 hotels occupied 
640422.159 total revenue 
131.154 total satisfaction


In [33]:
preference_model(hotels, vec_prices, vec_discount, rank_matrix, utility_matrix, t=10)

With 10 iterations of the random model, on average we have: 
3974.5/4000 guests placed 
3974.5/4617 rooms occupied 
400.0/400 hotels occupied 
643079.906 total revenue 
3892.052 total satisfaction
