In [24]:
import numpy as np
import math
import random
from scipy.stats import norm
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [25]:
class KinematicModel:

    def __init__(self, L, y, theta, Lmax, l, d, p1, sigmaC, p2):
        self.L = L
        self.y = y
        self.theta = theta
        self.Lmax = Lmax
        self.l = l
        self.d = d
        self.p1 = p1
        self.sigmaC = sigmaC
        self.p2 = p2

    def input(self, action):
        noise = norm.rvs(loc=0, scale=action[0] / 4)
        self.y = self.y + action[0] * math.sin(math.radians(action[1] + noise + self.theta))
        nextVehicleEvent = random.uniform(0, 1)
        if nextVehicleEvent < self.p2:
            self.d = self.d + (self.l - action[0] * math.cos(math.radians(action[1] + noise + self.theta)))
        else:
            self.d = 40
        self.theta = self.theta + (action[0] / self.L) * math.tan(math.radians(action[1] + noise))
        curveEvent = random.uniform(0, 1)
        if curveEvent < self.p1:
            curveAngle = norm.rvs(loc=0, scale=self.sigmaC)
            self.theta = self.theta + curveAngle
            
            
    def __str__(self):
        return f"y = {self.y} / theta = {self.theta} / d = {self.d}"

In [26]:
# Probability of going out of a straight road given an action 
def P1(action, environment, sigma, show = True):
    if (environment.Lmax - environment.y) / action[0] > 1 or (environment.Lmax - environment.y) / action[0] < -1:
        term1 = 0
    # Term 1 is the probability that y > Lmax
    else:
        term1 = math.degrees(math.asin((environment.Lmax - environment.y) / action[0]))
        term1 = 1 - norm.cdf((1 / sigma) * (term1 - action[1] - environment.theta))
    #print(f"Term 1: {term1}")
    # Term 2 is the probability that y < -Lmax
    if (- environment.Lmax - environment.y) / action[0] > 1 or (- environment.Lmax - environment.y) / action[0] < -1:
        term2 = 0
    else:
        term2 = math.degrees(math.asin((- environment.Lmax - environment.y) / action[0]))
        term2 = norm.cdf((1 / sigma) * (term2 - action[1] - environment.theta))
    #print(f"Term 2: {term2}")
    value = (1 - environment.p1) * (term1 + term2)
    if show:
        print(f"Value: {value}")
    return value, term1, term2
    
# Probability of going out of a curved road given an action
def P2(action, environment, sigma, show = True):
    # Term 1 is the probability that y > Lmax
    if (environment.Lmax - environment.y) / action[0] > 1 or (environment.Lmax - environment.y) / action[0] < -1:
        term1 = 0
    else:
        term1 = math.degrees(math.asin((environment.Lmax - environment.y) / action[0]))
        term1 = 1 - norm.cdf((1 / sigma) * (term1 - action[1] - environment.theta))
    #print(f"Term 1: {term1}")
    # Term 2 is the probability that y < -Lmax
    if (- environment.Lmax - environment.y) / action[0] > 1 or (- environment.Lmax - environment.y) / action[0] < -1:
        term2 = 0
    else:
        term2 = math.degrees(math.asin((- environment.Lmax - environment.y) / action[0]))
        term2 = norm.cdf((1 / sigma) * (term2 - action[1] - environment.theta))
    #print(f"Term 2: {term2}")
    value = environment.p1 * (term1 + term2)
    if show:
        print(f"Value: {value}")
    return value, term1, term2

# Probability of risking a crash in a straight road given an action 
def P3(action, environment, sigma, show = True):
    if (1 / action[0]) * (environment.l + ( ((action[0] * 3.6) / -2) + environment.d)) > 1:
        return 0
    value = math.degrees(math.acos((1 / action[0]) * (environment.l + (((action[0] * 3.6) / -2) + environment.d))))
    value = norm.cdf((1 / sigma) * (value - math.fabs(action[1]) - math.fabs(environment.theta)))
    value = environment.p2 * (1 - environment.p1) * value
    if show:
        print(f"Value: {value}")
    return value

# Probability of risking a crash in a curved road given an action
def P4(action, environment, sigma, show = True):
    if (1 / action[0]) * (environment.l + ( ((action[0] * 3.6) / -2) + environment.d)) > 1:
        return 0
    value = math.degrees(math.acos((1 / action[0]) * (environment.l + (((action[0] * 3.6) / -2) + environment.d))))
    value = norm.cdf((1 / sigma) * (value - math.fabs(action[1]) - math.fabs(environment.theta)))
    value = environment.p2 * environment.p1 * value
    if show:
        print(f"Value: {value}")
    return value

In [27]:
# Transition probabilities

def anyToG(action, environment):
    P1_val, _, _ = P1(action, environment, action[0]/4, False)
    P2_val, _, _ = P2(action, environment, math.sqrt(action[0]/4 + environment.sigmaC), False)
    # Term 1 is the probability of NOT going out of the road
    term1 = 1 - (P1_val + P2_val)
    # Term 2 is the probability of NOT risking a crash
    term2 = 1 - (P3(action, environment, action[0]/4, False) + P4(action, environment, math.sqrt(action[0]/4 + environment.sigmaC), False))
    return term1 * term2
    
def anyToX(action, environment):
    P1_val, _, _ = P1(action, environment, action[0]/4, False)
    P2_val, _, _ = P2(action, environment, math.sqrt(action[0]/4 + environment.sigmaC), False)
    # Term 1 is the probability of going out of the road
    term1 = P1_val + P2_val
    # Term 2 is the probability of NOT risking a crash
    term2 = 1 - (P3(action, environment, action[0]/4, False) + P4(action, environment, math.sqrt(action[0]/4 + environment.sigmaC), False))
    return term1 * term2

def anyToI(action, environment):
    P1_val, _, _ = P1(action, environment, action[0]/4, False)
    P2_val, _, _ = P2(action, environment, math.sqrt(action[0]/4 + environment.sigmaC), False)
    # Term 1 is the probability of NOT going out of the road
    term1 = 1 - (P1_val + P2_val)
    # Term 2 is the probability of risking a crash
    term2 = P3(action, environment, action[0]/4, False) + P4(action, environment, math.sqrt(action[0]/4 + environment.sigmaC), False)
    return term1 * term2

def anyToXI(action, environment):
    P1_val, _, _ = P1(action, environment, action[0]/4, False)
    P2_val, _, _ = P2(action, environment, math.sqrt(action[0]/4 + environment.sigmaC), False)
    # Term 1 is the probability of going out of the road
    term1 = P1_val + P2_val
    # Term 2 is the probability of risking a crash
    term2 = P3(action, environment, action[0]/4, False) + P4(action, environment, math.sqrt(action[0]/4 + environment.sigmaC), False)
    return term1 * term2

def transitionProbabilitiesMatrix(actions, environment):
    transitions = [anyToG, anyToX, anyToI, anyToXI]
    transitionMatrix = np.array([[transition(action, environment) for transition in transitions] for action in actions])
    return transitionMatrix

In [31]:
# Reward for staying at the center of the road
def rewardCenterProbability(action, environment, ratio, show = True):
    # Term 1 is the probability that y < Lmax * ratio
    if ((environment.Lmax * ratio) - environment.y) / action[0] > 1 or ((environment.Lmax * ratio) - environment.y) / action[0] < -1:
        term1 = 0
        term1C = 0
    else:
        term1 = math.degrees(math.asin(((environment.Lmax * ratio) - environment.y) / action[0]))
        term1C = norm.cdf((1 / math.sqrt(action[0]/4 + environment.sigmaC)) * (term1 - action[1] - environment.theta))
        term1 = norm.cdf((1 / (action[0]/4)) * (term1 - action[1] - environment.theta))
    #print(f"Term 1: {term1}")
    # Term 2 is the probability that y > -Lmax * ratio
    if ((-environment.Lmax * ratio) - environment.y) / action[0] > 1 or ((-environment.Lmax * ratio) - environment.y) / action[0] < -1:
        term2 = 0
        term2C = 0
    else:
        term2 = math.degrees(math.asin(((-environment.Lmax * ratio) - environment.y) / action[0]))
        term2C = norm.cdf((1 / math.sqrt(action[0]/4 + environment.sigmaC)) * (term2 - action[1] - environment.theta))
        term2 = norm.cdf((1 / (action[0]/4)) * (term2 - action[1] - environment.theta))
    #print(f"Term 2: {term2}")
    value = ((1 - environment.p1) * (term1 - term2)) + (environment.p1 * (term1C - term2C))
    if show:
        print(f"Value: {value}")
    return value

# Reward for going at the correct speed
def rewardSpeed(action, environment):
    return -0.5 * (math.fabs(environment.l - action[0]))

# Reward for staying at the correct distance
def rewardDistanceProbability(action, environment):
    # Probability of risking a crash in a straight road given an action 
    term1 = P3(action, environment, action[0] / 4, False)
    #Probability of risking a crash in a curved road given an action
    term2 = P4(action, environment, math.sqrt(action[0]/4 + environment.sigmaC), False)
    return term1 + term2

# Expected reward given an action
def expectedReward(action, environment, r1, r2, r3, r4, show = True):
    termR1 = r1 * rewardCenterProbability(action, environment, 0.5, False)
    #print(f"Term r1: {termR1}")
    termR2 = r2 * rewardCenterProbability(action, environment, 0.25, False)
    #print(f"Term r2: {termR2}")
    rSpeed = rewardSpeed(action, environment)
    termR3 = r3 * (1 - rewardDistanceProbability(action, environment))
    #print(f"Term r3: {termR3}")
    termR4 = r4 * rewardDistanceProbability(action, environment)
    #print(f"Term r4: {termR4}")
    if show:
        print(f"Reward for action [{action[0]}, {action[1]}]: {termR1 + termR2 + rSpeed + termR3 + termR4}")
    return termR1 + termR2 + rSpeed + termR3 + termR4

def expectedRewardMatrix(action, environment, r1, r2, r3, r4):
    rewardMatrix = np.array([expectedReward(action, kinematicModel, r1, r2, r3, r4, False)for action in actions])
    return rewardMatrix

In [29]:
# Environment starts:
# - At center of the road y = 0 m
# - Aligned with the road theta = 0°
# - Road width 4 m Lmax = 2 m
# - Road limit 90 km/h l = 25 m/s
# - Distance from next vehicle d = 40 m
# - Probability of a road curve p1 = 5%
# - Variance of the curve sigmaC = 5
# - Probability of a vehicle in front p2 = 30%
kinematicModel = KinematicModel(1, 0, 0, 2, 25, 40, 0.05, 5, 0.3)

# State space
# 0 = S_G: good state
# 1 = S_X: out of road state
# 2 = S_I: risk of crash state
# 3 = S_XI: out of road and risk of crash state
states = ['G', 'X', 'I', 'XI']

# Action space
anglesDeg = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]
velocities = [20, 25, 30]
actions = [(v, delta) for v in velocities for delta in anglesDeg]
#print(f"{actions}")

# State-action transition probability matrix
T = transitionProbabilitiesMatrix(actions, kinematicModel)
#print(f"{T}")

# State-action expected reward matrix
r1 = 50
r2 = 100
r3 = -1
r4 = 0
R = expectedRewardMatrix(actions, kinematicModel, r1, r2, r3, r4)
#print(f"{R}")

print(f"{actions[0]}")

# Transition function for the MDP
def transition_func(s, a, T):
    action_index = actions.index(a) # Get the index of the action in the action list
    probabilities = T[action_index] # Retrieve the row of probabilities from the transition matrix
    # Crea la lista di tuple (stato, probabilità) usando lo zip tra 'states' e le probabilità
    transitions = [(state, prob) for state, prob in zip(states, probabilities)] 
    return transitions

#print(f"{transition_func('G', (20, -30), T)}")

# Reward function for the MDP
def reward_func(s, a, s_prime, R):
    action_index = actions.index(a)
    return R[action_index]

#print(f"{reward_func('G', (20, -30), 'G', R)}")

(20, -5)


In [None]:
def value_iteration(states, actions, transition_func, reward_func, gamma=0.9, theta=1e-4):
    """
    Perform value iteration to compute the optimal policy and value function.
    
    Parameters:
    - states: List of all states
    - actions: List of all possible actions
    - transition_func: Function to calculate transition probabilities
    - reward_func: Function to calculate rewards
    - gamma: Discount factor for future rewards
    - theta: Convergence threshold
    """

    V = {s: 0 for s in states}  # Initialize V(s) = 0 for all states
    policy = {s: None for s in states} # Initialize an empty policy

    # Environment starts:
    # - His lenght is L = 1 m
    # - At center of the road y = 0 m
    # - Aligned with the road theta = 0°
    # - Road width 4 m Lmax = 2 m
    # - Road limit 90 km/h l = 25 m/s
    # - Distance from next vehicle d = 40 m
    # - Probability of a road curve p1 = 5%
    # - Variance of the curve sigmaC = 1
    # - Probability of a vehicle in front p2 = 30%
    kinematicModelSim = KinematicModel(1, 0, 0, 2, 25, 40, 0.05, 5, 0.3)

    T = transitionProbabilitiesMatrix(actions, kinematicModel) # Initial transition matrix
    
    r1 = 50
    r2 = 100
    r3 = -1
    r4 = 0
    R = expectedRewardMatrix(actions, kinematicModel, r1, r2, r3, r4) # Initial reward matrix

    i = 0
    while True:
        delta = 0
        best_action_update = 0
        print(f"ITERAZIONE {i}")
        i += 1
        # Per ogni stato s, aggiorniamo V(s)
        for s in states:
            v_old = V[s]
            action_values = {}
            # Per ogni azione, calcoliamo il valore atteso
            for a in actions:
                expected_value = 0
                # transition_func(s, a) restituisce una lista di coppie (s_prime, prob)
                for s_prime, p in transition_func(s, a, T):
                    expected_value += p * (reward_func(s, a, s_prime, R) + gamma * V[s_prime])
                action_values[a] = expected_value
            for value in action_values.items():
                print(f"Azione: {value[0]}, Valore atteso: {round(value[1],2)}")
            # Seleziona il massimo valore tra tutte le azioni possibili
            best_action_value = max(action_values.values())
            best_action = max(action_values, key=action_values.get)
            best_action_update = best_action
            print(f"Best action for state {s} is: {best_action}")            
            # Aggiorna il valore dello stato e la politica
            V[s] = best_action_value
            policy[s] = best_action
            # Aggiorna delta per verificare la convergenza
            delta = max(delta, abs(v_old - V[s]))
            
        # Aggiornamento del modello tra le iterazioni:
        kinematicModelSim.input(best_action_update)
        print(f"{kinematicModelSim}")
        T = transitionProbabilitiesMatrix(actions, kinematicModelSim)
        R = expectedRewardMatrix(actions, kinematicModelSim, r1, r2, r3, r4)
        
        if delta < theta:
            break

    return V, policy

V, policy = value_iteration(states, actions, transition_func, reward_func, gamma=0.9, theta=1e-4)

ITERAZIONE 0
Azione: (20, -5), Valore atteso: 23.98
Azione: (20, -4), Valore atteso: 29.34
Azione: (20, -3), Valore atteso: 34.28
Azione: (20, -2), Valore atteso: 38.3
Azione: (20, -1), Valore atteso: 40.93
Azione: (20, 0), Valore atteso: 41.85
Azione: (20, 1), Valore atteso: 40.93
Azione: (20, 2), Valore atteso: 38.3
Azione: (20, 3), Valore atteso: 34.28
Azione: (20, 4), Valore atteso: 29.34
Azione: (20, 5), Valore atteso: 23.98
Azione: (25, -5), Valore atteso: 20.33
Azione: (25, -4), Valore atteso: 23.13
Azione: (25, -3), Valore atteso: 25.62
Azione: (25, -2), Valore atteso: 27.6
Azione: (25, -1), Valore atteso: 28.88
Azione: (25, 0), Valore atteso: 29.32
Azione: (25, 1), Valore atteso: 28.88
Azione: (25, 2), Valore atteso: 27.6
Azione: (25, 3), Valore atteso: 25.62
Azione: (25, 4), Valore atteso: 23.13
Azione: (25, 5), Valore atteso: 20.33
Azione: (30, -5), Valore atteso: 13.02
Azione: (30, -4), Valore atteso: 14.61
Azione: (30, -3), Valore atteso: 16.01
Azione: (30, -2), Valore att