In [2]:
import numpy as np
import pandas as pd


In [5]:
# Load the data
df = pd.read_csv('../data/synthetic_microgrid_data.csv', parse_dates=['Timestamp'])

In [6]:
# Reinforcement Learning Setup
states = []
actions = ['charge', 'discharge', 'idle']
Q_table = {}


In [7]:
# Define state discretization
def get_state(solar, load, soc):
    return (int(solar // 0.5), int(load // 0.5), int(soc // 10))

In [8]:
# Training parameters
episodes = 1000
alpha = 0.1
gamma = 0.8
epsilon = 0.1
battery_capacity_kwh = 10
battery_efficiency = 0.95


for episode in range(episodes):
    battery_soc = 0.5 * battery_capacity_kwh
    for idx, row in df.iterrows():
        state = get_state(row['Solar_Power_Generated (kW)'], row['Load_Demand (kW)'], battery_soc * 10 / battery_capacity_kwh)
        if state not in Q_table:
            Q_table[state] = np.zeros(len(actions))

        if np.random.rand() < epsilon:
            action_idx = np.random.choice(len(actions))
        else:
            action_idx = np.argmax(Q_table[state])

        action = actions[action_idx]

        reward = 0
        if action == 'charge' and row['Solar_Power_Generated (kW)'] > row['Load_Demand (kW)']:
            charge_possible = min(row['Solar_Power_Generated (kW)'] - row['Load_Demand (kW)'], (battery_capacity_kwh - battery_soc))
            battery_soc += charge_possible * battery_efficiency
            reward = charge_possible
        elif action == 'discharge' and row['Load_Demand (kW)'] > row['Solar_Power_Generated (kW)']:
            discharge_possible = min(row['Load_Demand (kW)'] - row['Solar_Power_Generated (kW)'], battery_soc)
            battery_soc -= discharge_possible / battery_efficiency
            reward = discharge_possible
        elif action == 'idle':
            reward = -row['Grid_Usage (kW)']

        battery_soc = max(0, min(battery_soc, battery_capacity_kwh))

        next_state = get_state(row['Solar_Power_Generated (kW)'], row['Load_Demand (kW)'], battery_soc * 10 / battery_capacity_kwh)
        if next_state not in Q_table:
            Q_table[next_state] = np.zeros(len(actions))

        Q_table[state][action_idx] = Q_table[state][action_idx] + alpha * (reward + gamma * np.max(Q_table[next_state]) - Q_table[state][action_idx])

print("Training complete.")
print("Sample Q-values:")
for key in list(Q_table.keys())[:5]:
    print(f"State {key}: {Q_table[key]}")

Training complete.
Sample Q-values:
State (0, 1, 0): [ 5.45799464e-04  5.51607697e-04 -8.49344996e-01]
State (0, 5, 0): [6.11879817 7.72235338 5.26499814]
State (2, 4, 0): [ 4.57883001e-16  4.51192745e-16 -1.17356104e+00]
State (3, 5, 0): [ 1.59794255e-17  1.57754380e-17 -1.01479411e+00]
State (5, 4, 0): [2.46534543 2.00931684 1.99410891]


In [None]:
class MicrogridSimulator:
    def __init__(self):
        self.battery_soc = 5.0  # Start at 50%
        self.grid_usage = []
    
    def step(self, action, solar, load):
        if action == "charge":
            charge_amount = min(solar - load, 10 - self.battery_soc)
            self.battery_soc += charge_amount * 0.95  # 95% efficiency
        
        elif action == "discharge":
            discharge_amount = min(load - solar, self.battery_soc)
            self.battery_soc -= discharge_amount / 0.95
        
        grid_used = max(0, load - solar - (self.battery_soc if action=="discharge" else 0))
        self.grid_usage.append(grid_used)
        return self.battery_soc