# Q-Learning Prototype for Hurricane Evacuation

This notebook demonstrates a simplified reinforcement learning prototype for learning optimal evacuation routes during a hurricane event. The environment consists of five locations with varying hurricane severity, and the agent learns to avoid danger zones while reaching a safe location.


In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import networkx as nx

: 

In [None]:
# Graph structure: adjacency list
graph = {
    0: [1, 2],   # 0 = Start
    1: [3],
    2: [3, 4],
    3: [4],      # 4 = Safe Zone
    4: []        # Terminal
}

location_names = {
    0: "marsh_harbor",
    1: "road_a",
    2: "road_b",
    3: "junction",
    4: "nassua (safe zone)"
}

hurricane_levels = {
    0: 4,
    1: 2,
    2: 3,
    3: 1,
    4: 0
}

hurricane_impact_map = {
    1: -1,
    2: -3,
    3: -5,
    4: -8,
    5: -10
}

evacuation_threshold = 3

In [None]:
# Parameters
gamma = 0.8  # Discount factor
alpha = 0.1  # Learning rate
episodes = 500
epsilon = 0.2  # Exploration factor

# Q-table: [states x actions]
q_table = np.zeros((len(graph), len(graph)))

def get_reward(current, next):
    if next == 4:
        return 10
    elif hurricane_levels[next] >= evacuation_threshold:
        return hurricane_impact_map[hurricane_levels[next]]
    else:
        return -1

In [None]:
# Training loop
for ep in range(episodes):
    state = 0
    while state != 4:
        if random.uniform(0, 1) < epsilon:
            action = random.choice(graph[state])
        else:
            q_vals = [q_table[state][a] for a in graph[state]]
            best_action_idx = np.argmax(q_vals)
            action = graph[state][best_action_idx]

        reward = get_reward(state, action)
        future = max([q_table[action][a] for a in graph[action]] or [0])
        q_table[state][action] += alpha * (reward + gamma * future - q_table[state][action])

        state = action

In [None]:
# Show final Q-table
print("Learned Q-Table:")
print(q_table.round(2))

# Trace optimal route
state = 0
path = [location_names[state]]
while state != 4:
    next_states = graph[state]
    next_state = max(next_states, key=lambda a: q_table[state][a])
    path.append(location_names[next_state])
    state = next_state

print("\nOptimal route from marsh_harbor:")
print(" → ".join(path))

In [None]:
# Visualize the graph with hurricane levels
G = nx.DiGraph()
for node, neighbors in graph.items():
    for neighbor in neighbors:
        G.add_edge(location_names[node], location_names[neighbor])

pos = nx.spring_layout(G, seed=42)
node_colors = [hurricane_levels[i] for i in graph]

# Create color normalization based on hurricane level range
norm = mcolors.Normalize(vmin=min(hurricane_levels.values()), vmax=max(hurricane_levels.values()))
sm = plt.cm.ScalarMappable(cmap=plt.cm.coolwarm, norm=norm)
sm.set_array([])  # Needed to avoid warning

# Draw graph
nx.draw(G, pos, with_labels=True, node_color=[hurricane_levels[n] for n in graph],
        cmap=plt.cm.coolwarm, node_size=1500, font_size=10)

nx.draw_networkx_edge_labels(
    G,
    pos,
    edge_labels={(location_names[i], location_names[j]): f"to {location_names[j]}" for i in graph for j in graph[i]}
)

plt.title("Evacuation Graph with Hurricane Severity")

# Attach colorbar to the current plot
plt.colorbar(sm, label="Hurricane Level")
plt.show()

## Discussion

This prototype demonstrates the use of Q-learning to model adaptive agent behavior in response to environmental risk factors like hurricanes. The model successfully learns to avoid high-risk locations and prioritize safer routes based on the reward structure.
