# Blog 9: AI-powered Maintenance Scheduling Optimizer using Reinforcement Learning

In [None]:

# AI-powered Maintenance Scheduling Optimizer using Reinforcement Learning

## 1. Imports and Setup
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random

# Define the maintenance environment (custom MDP)
class MaintenanceEnv:
    def __init__(self):
        self.state_space = ['Healthy', 'Degraded', 'Faulty']
        self.action_space = ['DoNothing', 'Maintain']
        self.reset()

    def reset(self):
        self.state = 0  # Start from 'Healthy'
        return self.state

    def step(self, action):
        if self.state == 0:  # Healthy
            if action == 0:
                self.state = 1 if random.random() < 0.3 else 0
                reward = 1
            else:
                self.state = 0
                reward = -1
        elif self.state == 1:  # Degraded
            if action == 0:
                self.state = 2 if random.random() < 0.5 else 1
                reward = -2
            else:
                self.state = 0
                reward = -1
        else:  # Faulty
            if action == 0:
                reward = -10
            else:
                self.state = 0
                reward = -5
        done = False
        return self.state, reward, done, {}

## 2. Q-Learning Implementation
env = MaintenanceEnv()
q_table = np.zeros((3, 2))  # 3 states x 2 actions

alpha = 0.1
gamma = 0.95
epsilon = 0.1
episodes = 500

rewards = []

for ep in range(episodes):
    state = env.reset()
    total_reward = 0
    for step in range(20):
        if random.random() < epsilon:
            action = random.choice([0, 1])
        else:
            action = np.argmax(q_table[state])
        next_state, reward, done, _ = env.step(action)
        q_table[state, action] += alpha * (reward + gamma * np.max(q_table[next_state]) - q_table[state, action])
        state = next_state
        total_reward += reward
    rewards.append(total_reward)

## 3. Visualize Training Rewards
plt.figure(figsize=(10, 4))
plt.plot(rewards)
plt.title("Total Rewards per Episode")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.grid(True)
plt.show()

## 4. Visualize Learned Policy
actions = ['DoNothing', 'Maintain']
states = ['Healthy', 'Degraded', 'Faulty']

best_actions = np.argmax(q_table, axis=1)
policy = {states[i]: actions[best_actions[i]] for i in range(3)}
print("Learned Maintenance Policy:", policy)

sns.heatmap(q_table, annot=True, cmap="YlGnBu", xticklabels=actions, yticklabels=states)
plt.title("Q-Table (State-Action Values)")
plt.xlabel("Action")
plt.ylabel("State")
plt.show()
