In [1]:
import numpy as np

def seat_time_list(tot_seat,tot_day):
    '''
    Function to create a list based on total days and total seats available

    Args:
    tot_seat (int) -> total passenger seats the full electric flight could carry
    tot_day (int) -> total days since passenger seats can be bought before the flight

    Return:
    list_seat, list_day
    '''

    list_seat = list(range(0,tot_seat+1))
    list_day = list(range(0,tot_day+1))

    return list_seat, list_day

In [2]:
def exp_reward_matrix(remain_seat, remain_day):
    '''
    Function to calculate the expected reward matrix

    Args:
    remain_seat (int) -> total remaining available seats
    remain_day (int) -> total remaining days until the flight

    Return:
    total_reward (float) -> total expected reward
    '''

    reward_matrix = np.zeros((11,26))

    for seat in range(1,11):
        for day in range(1,26):
            
            policy = np.zeros((1,3))[0]
            for num in range(0,3):
                if num == 0:
                    # policy 1
                    policy[num] = reward_matrix[seat,day] = 0.4 * (3000 + reward_matrix[seat-1, day-1]) \
                        + 0.3 * (reward_matrix[seat, day-1]) + 0.3 * (reward_matrix[seat, day-1])
                elif num == 1:
                    # policy 2
                    policy[num] = reward_matrix[seat,day] = 0.4 * (3000 + reward_matrix[seat-1, day-1]) \
                        + 0.3 * (2000 + reward_matrix[seat-1, day-1]) + 0.3 * (reward_matrix[seat, day-1])
                else:
                    # policy 3
                    policy[num] = reward_matrix[seat,day] = 0.4 * (3000 + reward_matrix[seat-1, day-1]) \
                        + 0.3 * (2000 + reward_matrix[seat-1, day-1]) + 0.3 * (1000 + reward_matrix[seat-1, day-1])

            # get policy that will maximize the expected reward
            index = np.where(policy == max(policy))
            opt_policy = index[0][0]

            opt_reward = policy[opt_policy]
            reward_matrix[seat, day] = opt_reward
    
    return print("Expected reward value V(%s,%s) is %s" % (remain_seat, remain_day, reward_matrix[remain_seat, remain_day]))

In [3]:
exp_reward_matrix(10,25)

Expected reward value V(10,25) is 28769.208120707823
