# Problem: Predict Optimal Daily Electricity Usage

Using RL to determine the optimal daily electricity usage to minimize cost, given varying energy prices and household demand.

In [1]:
import numpy as np

# Define the environment
prices = [5, 10, 20, 30]  # Energy price per unit (arbitrary scale)
demand = [2, 4, 6, 8]  # Energy demand in units (arbitrary scale)
actions = [0, 1, 2]  # Actions: 0=Low usage, 1=Medium usage, 2=High usage
states = len(prices)

# Define rewards: Higher usage during low price is better, penalize otherwise
def get_reward(state, action):
    if action == 0:  # Low usage
        return 10 - prices[state]
    elif action == 1:  # Medium usage
        return 20 - prices[state]
    elif action == 2:  # High usage
        return 30 - prices[state]
    return -100  # Invalid

# Q-learning Parameters
q_table = np.zeros((states, len(actions)))
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.995
episodes = 1000

# Q-learning Algorithm
for _ in range(episodes):
    state = np.random.choice(states)  # Start with a random price level
    done = False
    while not done:
        # Choose action
        if np.random.rand() < epsilon:
            action = np.random.choice(actions)
        else:
            action = np.argmax(q_table[state])

        # Compute reward and next state
        reward = get_reward(state, action)
        next_state = (state + 1) % states  # Simulate next price level
        done = next_state == 0

        # Update Q-value
        best_next_action = np.max(q_table[next_state])
        q_table[state, action] += alpha * (reward + gamma * best_next_action - q_table[state, action])
        state = next_state

    # Decay epsilon
    epsilon = max(epsilon * epsilon_decay, 0.1)

# Optimal policy
optimal_policy = np.argmax(q_table, axis=1)
print(f"Optimal Policy (action for each price level): {optimal_policy}")


Optimal Policy (action for each price level): [2 2 2 2]


## Metrics

- **Optimal Policy:** Check if the policy aligns with minimizing costs.
- **Convergence:** Ensure the Q-values stabilize.
- **Energy Cost:** Calculate the total cost for a simulated day using the learned policy.
