In [1]:
import numpy as np
import random
import variables_CAI as var
import functions_CAI as func
import matplotlib.pyplot as plt
from tqdm import tqdm

Variable Initialization

In [5]:
T_Y = var.T_Y  #timing for yellow phase
T_G = var.T_G #timing for green phase
T_R = var.T_R #timing for red phase
total_T = T_Y + T_G + T_R

d_lower = var.d_lower #lower bound for the initial distribution of distance
d_upper = var.d_upper #upper bound for the initial distribution of distance
v_lower = var.v_lower #lower bound for the initial distribution of velocity
v_upper = var.v_upper #upper bound for the initial distribution of velocity

delta_t = var.delta_t

phi_to_T_dict = {'Y': T_Y, 'G': T_G, 'R': T_R} #dictionary to map each phase to its timing
phi_to_next_phi_dict = {'G':'Y', 'Y':'R', 'R':'G'} #dictionary to map each phase to its next phase
phi_prev_phi_dict = var.phi_prev_phi_dict #dictionary to map each phase to its previous phase

std_d= 0.5#var.std_d #standard deviation of distance noise in state process
std_v= 0.25#var.std_v #standard deviation of velocity noise in state process
std_t_phi= 0#var.std_t_phi #standard deviation of timing noise in state process

trials = var.trials #number of runs for Q-learning training phase
episodes = var.episodes #maximum number for the length of the trajectory

learning_rate = 0.001 #learning_rate for the training phase of Q-learning
discount_factor = var.discount_factor #discount factor for the training phase of Q-learning
epsilon = var.epsilon

actions = var.actions #action space
discrete_d = var.discrete_d #list of discrete distances, used for discretization of the distance in state space
v_max= var.v_max #maximum allowed velocity
discrete_v = var.discrete_v #list of discrete velocities, used for discretization of the velocity in state space
discrete_phi_t = var.discrete_phi_t #list of discrete pairs of (phi, t_phi), used for discretization of the (phi, t_phi) in state space


Q-Learning

In [None]:
Q_table = np.zeros((len(discrete_d)*len(discrete_v)*len(discrete_phi_t), len(actions))) #initialize the Q-table with zero values
np.random.seed()
accum_rewards = []
for trial in tqdm(range(trials)):

    idx_i = np.random.randint(0, Q_table.shape[0], 1) #randomly select a row in Q-table which indicates a discretized state
    state = func.map_idx_to_state(idx_i, discrete_d, discrete_v, discrete_phi_t) #find the state corresponding to the idx_i
    distance = state[0]
    velocity = state[1]
    accum_r = 0
    for episode in range(episodes):
        
        if distance<0: #if the vehicle passed the intersection
            break
            
        action, action_idx = func.action_selection(Q_table[idx_i], velocity, actions, epsilon, v_max,1) #select the action based on Q-table with epsilon-greedy approach
        q = Q_table[idx_i, action_idx] #Q-value of the state and the selected action
        
        state_new = func.update_state(state, action, delta_t, phi_to_T_dict, phi_to_next_phi_dict, std_d, std_v, std_t_phi,1)#update the state based on the selected action
        idx_i_new = func.map_state_to_idx(state_new, discrete_d, discrete_v, discrete_phi_t, T_Y, T_R, phi_prev_phi_dict)#index of the updated state in Q-table
        distance = state_new[0]
        velocity = state_new[1]
        phi = state_new[2]
        t_phi = state_new[3]
        
        reward = func.reward_function(distance, velocity, phi, t_phi, v_max, T_Y) #reward of moving to the new state

        td = func.TD_function(reward, discount_factor, Q_table[idx_i_new], q) #td term
        
        q = q + learning_rate*td #update q-value
        Q_table[idx_i,action_idx] = q #update q-value in Q-table
        
        idx_i = idx_i_new
        state = state_new

with open('Q_table_noise_0.5_0.25_0.npy','wb') as f:
    np.save(f, Q_table)

 27%|██▋       | 5457996/20000000 [2:16:08<5:48:55, 694.60it/s]

Q-Table Loading

In [5]:
with open('Q_table_noise_0.5_0.25_0.npy','rb') as f:
    Q_table = np.load(f)
print(f'Percentage of non-zero elements in Qtable:{len(Q_table[Q_table!=0])/(Q_table.shape[0]*Q_table.shape[1])}')

Percentage of non-zero elements in Qtable:0.8906512605042017


Test Scenario

In [6]:

state = (90.55001067682015, 12.032548969195476, 'Y', 1)
print('Initial State')
print(state)
print()
idx_i = func.map_state_to_idx(state, discrete_d, discrete_v, discrete_phi_t, T_Y, T_R, phi_prev_phi_dict)
mapped_state = func.map_idx_to_state(idx_i, discrete_d, discrete_v, discrete_phi_t)
distance = state[0]

while (distance>0):
    print(f'state: {state}')
    
    action, action_idx = func.action_selection(Q_table[idx_i], actions, 0)
    
    print(f'Q_table for this state:{Q_table[idx_i]}')
    print(f'action: {action}\n')

    state_new = func.update_state(state, action, delta_t, phi_to_T_dict, phi_to_next_phi_dict, std_d, std_v, std_t_phi)
    idx_i_new = func.map_state_to_idx(state_new, discrete_d, discrete_v, discrete_phi_t, T_Y, T_R, phi_prev_phi_dict)
    distance = state_new[0]

    state=state_new
    idx_i=idx_i_new

print('Final State')
if state[2]=='R':
    print(f"\x1b[31m{state}\x1b[0m")

elif state[2]=='G':
    print(f"\x1b[32m{state}\x1b[0m")
else:
    print(f"\x1b[33m{state}\x1b[0m")
#     print(Q_table[idx_i]) 
print('******************************************************')


Initial State
(90.55001067682015, 12.032548969195476, 'Y', 1)

state: (90.55001067682015, 12.032548969195476, 'Y', 1)
Q_table for this state:[-0.5223302  -0.48154721 -0.06841143 -0.49871669 -0.48365505 -0.48111337
 -0.48036667]
action: -1

state: (81.31305954792794, 11.558205959770374, 'Y', 2)
Q_table for this state:[ 3.52633125 -0.52395344 -0.56158144 -0.53785171 -0.67747086 -0.57537215
 -0.5268102 ]
action: -3

state: (71.37711719093248, 10.960503660204338, 'Y', 3)
Q_table for this state:[-0.42950685 -0.43544475 -0.45464775  8.70276134 -0.51050754 -0.46171831
  0.53702861]
action: 0

state: (60.759659790174744, 11.074092041163508, 'Y', 4)
Q_table for this state:[24.60873898 -0.42617818 -0.52069574  0.75222118  1.55062264 -0.44776428
 -0.64472918]
action: -3

state: (51.830212684261596, 6.749197457648314, 'R', 1)
Q_table for this state:[ 2.68762477 56.61098997  4.7861571  -0.61331505 -0.64639659 14.18942982
 -0.60704195]
action: -2

state: (47.0739409605469, 3.3272784590704276, 'R', 2