In [2]:
import numpy as np
def seat_time_list(tot_seat,tot_day):
    '''
    Function to create a list based on total days and total seats available

    Args:
    tot_seat (int) -> total passenger seats the full electric flight could carry
    tot_day (int) -> total days since passenger seats can be bought before the flight

    Return:
    list_seat, list_day
    '''

    list_seat = list(range(0,tot_seat+1))
    list_day = list(range(0,tot_day+1))

    return list_seat, list_day

In [3]:
def set_policy(avail_seat, time_of_day):
    '''
    Function to chose a proposed policy made by the company's manager
    
    Rule to choose policy:
    1. if the avail_seat > 5, choose policy 3
    2. if the 1 <= avail_seat <= 5 and time_of_day <= 9, choose policy 2
    3. if the 1 <= avail_seat <= 5 and time_of_day >= 10, choose policy 1
    4. if avail_seat = 0, choose policy 0

    Policy description:
    Policy 3: sell to Type 1, Type 2, and Type 3 customers
    Policy 2: sell to Type 1 and Type 2 customers
    Policy 1: sell to Type 1 customers
    Policy 0: cannot sell to anybody

    Args:
    avail_seat (int) -> total remaining available seats
    time_of_day (int) -> days since the tickets are available

    Return:
    policy (int) -> chosen policy
    '''

    if avail_seat > 5:
        policy = 3
    elif (1 <= avail_seat <= 5) and (time_of_day <= 9):
        policy = 2
    elif (1 <= avail_seat <= 5) and (time_of_day >= 10):
        policy = 1
    else:
        policy = 0

    return policy

In [7]:
def exp_reward_matrix(remain_seat, remain_day):
    '''
    Function to calculate the expected reward matrix

    Args:
    remain_seat (int) -> total remaining available seats
    remain_day (int) -> total remaining days until the flight

    Return:
    total_reward (float) -> total expected reward
    '''

    reward_matrix = np.zeros((11,26))

    for seat in range(1,11):
        for day in range(1,26):
            policy = set_policy(seat, day)
            if policy == 1:
                reward_matrix[seat,day] = 0.4 * (3000 + reward_matrix[seat-1, day-1]) + 0.3 * (reward_matrix[seat, day-1]) \
                    + 0.3 * (reward_matrix[seat, day-1])
            elif policy == 2:
                reward_matrix[seat,day] = 0.4 * (3000 + reward_matrix[seat-1, day-1]) + 0.3 * (2000 + reward_matrix[seat-1, day-1]) \
                    + 0.3 * (reward_matrix[seat, day-1])
            else:
                reward_matrix[seat,day] = 0.4 * (3000 + reward_matrix[seat-1, day-1]) + 0.3 * (2000 + reward_matrix[seat-1, day-1]) \
                    + 0.3 * (1000 + reward_matrix[seat-1, day-1])
    
    return print("Expected reward value V(%s,%s) is %s" % (remain_seat, remain_day, reward_matrix[remain_seat, remain_day]))

In [8]:
import numpy as np

exp_reward_matrix(10, 25)

Expected reward value V(10,25) is 25074.65575951957
