In [2]:
import pandas as pd
import numpy as np
import random

In [3]:
# Load datasets
boiler_input = pd.read_csv("boilerinput.csv")
boiler_output = pd.read_csv("boileroutput.csv")
coal_data = pd.read_csv("coaldf.csv")
o2_data = pd.read_csv("O2_dataset.csv")

In [5]:
# Preprocess the timestamps
boiler_input['dates'] = pd.to_datetime(boiler_input['dates'])
boiler_output['dates'] = pd.to_datetime(boiler_output['dates'])

In [6]:
# Merge datasets based on timestamps
data = pd.merge_asof(boiler_input.sort_values('dates'), boiler_output.sort_values('dates'), on='dates')


In [7]:
# Select relevant features for states
states = list(zip(
    data["PRIMARY AIR FLOW"].round(1),
    data["SECOND AIR FLOW TOTAL"].round(1),
    data["Main Steam Flow"].round(1)
))

In [8]:
# Define actions
actions = ["increase_primary", "decrease_primary", "increase_secondary", "decrease_secondary"]


In [9]:
# Initialize Q-table
# Q_table = {state: {action: 0 for action in actions} for state in states}
# Initialize Q-table dynamically
Q_table = {}


In [10]:
# Define hyperparameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration rate

In [11]:
# Simulated environment function
def get_next_state(state, action):
    primary, secondary, steam = state

    if action == "increase_primary":
        primary = min(primary + 2, data["PRIMARY AIR FLOW"].max())  
    elif action == "decrease_primary":
        primary = max(primary - 2, data["PRIMARY AIR FLOW"].min())  
    elif action == "increase_secondary":
        secondary = min(secondary + 2, data["SECOND AIR FLOW TOTAL"].max())  
    elif action == "decrease_secondary":
        secondary = max(secondary - 2, data["SECOND AIR FLOW TOTAL"].min())

    return (primary, secondary, steam)

In [12]:
# Reward function based on boiler output
def get_reward(state):
    primary, secondary, steam = state
    row = data[(data["PRIMARY AIR FLOW"] == primary) & (data["SECOND AIR FLOW TOTAL"] == secondary)]
    
    if row.empty:
        return -1  # Default penalty for unknown state
    
    efficiency = row["Boiler Efficiency"].values[0]
    total_loss = row["Loss Total"].values[0]

    reward = 0
    if efficiency >= 90:
        reward += 10  # Good efficiency
    elif efficiency < 85:
        reward -= 5  # Poor efficiency

    if total_loss > 12:
        reward -= 10  # High losses
    elif total_loss <= 10:
        reward += 5  # Low losses

    return reward

In [15]:
# Training loop
for episode in range(1000):
    state = random.choice(states)

    # Ensure the state is in Q-table
    if state not in Q_table:
        Q_table[state] = {action: 0 for action in actions}

    for _ in range(50):  
        if random.uniform(0, 1) < epsilon:
            action = random.choice(actions)  # Explore
        else:
            action = max(Q_table[state], key=Q_table[state].get)  # Exploit

        next_state = get_next_state(state, action)
        reward = get_reward(next_state)

        # Ensure next_state exists in Q_table
        if next_state not in Q_table:
            Q_table[next_state] = {action: 0 for action in actions}

        # Q-learning update
        best_next_action = max(Q_table[next_state], key=Q_table[next_state].get)
        Q_table[state][action] += alpha * (reward + gamma * Q_table[next_state][best_next_action] - Q_table[state][action])

        state = next_state  # Move to next state

In [28]:
# Print trained Q-table (first 10 entries for simplicity)
for state, actions in list(Q_table.items())[-3:]:
    print(f"State: {state}, Actions: {actions}")

State: (196.1, 525.1, 605.5), Actions: {'increase_primary': -0.1, 'decrease_primary': 0, 'increase_secondary': 0, 'decrease_secondary': 0}
State: (198.1, 525.1, 605.5), Actions: {'increase_primary': -0.1, 'decrease_primary': 0, 'increase_secondary': 0, 'decrease_secondary': 0}
State: (200.1, 525.1, 605.5), Actions: {'increase_primary': 0, 'decrease_primary': 0, 'increase_secondary': 0, 'decrease_secondary': 0}


In [27]:
# Simulate a real-time scenario
# initial_state = (229.0, 522.2, 537.3)
# initial_state = (231.2, 524.1, 547.2)
initial_state = (178.1, 527.1, 605.5)
print(f"Initial State: {initial_state}")

# Choose the best action from Q-table
best_action = max(Q_table[initial_state], key=Q_table[initial_state].get)
print(f"Agent Suggests Action: {best_action}")

# Apply action and get new state
new_state = get_next_state(initial_state, best_action)
print(f"New State After Action: {new_state}")

# Compute the reward
reward = get_reward(new_state)
print(f"New Reward (Efficiency - Loss): {reward}")

Initial State: (178.1, 527.1, 605.5)
Agent Suggests Action: increase_primary
New State After Action: (180.1, 527.1, 605.5)
New Reward (Efficiency - Loss): -1
