<a href="https://colab.research.google.com/github/keerthana-nc/Reinforcement-learning-Actorcritic/blob/main/Actor_Critic_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas
!pip install torch




In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

# Define the actor network
class Actor(nn.Module):
    def __init__(self, input_size, output_size):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        action_probs = torch.softmax(self.fc2(x), dim=-1)  # Use softmax for multi-allocation
        return action_probs

# Define the critic network
class Critic(nn.Module):
    def __init__(self, input_size):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        value = self.fc2(x)
        return value

# Load the dataset from a CSV file
#data = pd.read_csv('predicted_power.csv')

from google.colab import files
uploaded = files.upload()

filename = next(iter(uploaded))
data = pd.read_csv(filename)

# Initialize actor and critic with input size 1
actor = Actor(input_size=1, output_size=201)  # Input size should match the number of features in your dataset (1 in this case)
critic = Critic(input_size=1)                # Input size should match the number of features in your dataset

# Define optimizers
actor_optimizer = optim.Adam(actor.parameters(), lr=0.001)
critic_optimizer = optim.Adam(critic.parameters(), lr=0.001)

num_epochs = 100  # You can set this to your desired number of training epochs

# Training loop for actor-critic
for epoch in range(num_epochs):
    for _, row in data.iterrows():  # Iterate over your dataset
        # Convert the row to a PyTorch tensor
        state = torch.tensor([row['Active Power']], dtype=torch.float32)

        # Compute the actor's action probabilities
        action_probs = actor(state)

        # Scenario 1: Decide whether to cut power (0) or reduce power supply (1)
        cut_action_prob = action_probs[0]
        reduce_action_prob = action_probs[1]

        # Sample an action based on the probabilities
        '''
        cut_action = torch.bernoulli(cut_action_prob)
        reduce_action = torch.bernoulli(reduce_action_prob)
        '''
        # Convert cut_action and reduce_action to tensors
        cut_action = torch.tensor([cut_action], dtype=torch.float32)
        reduce_action = torch.tensor([reduce_action], dtype=torch.float32)

        # Combine the two actions into a single action vector
        combined_action = torch.cat((cut_action, reduce_action, action_probs[2:]), dim=0)

        # Scenario 2: Properly distribute power to 200 houses, including 50 VIP houses
        # Define the priority levels based on your schema
        num_vvip = 50
        num_vip = 50
        num_high_priority = 50
        num_medium_priority = 50
        num_low_priority = 50

        # Sample an allocation vector based on the actor's output probabilities
        allocation_probs = action_probs[2:]  # Assuming the first two actions are for the binary decision
        sampled_allocation = torch.multinomial(allocation_probs, 200, replacement=True)

        # Split the sampled allocation into priority groups
        vvip_allocation = sampled_allocation[:num_vvip]
        vip_allocation = sampled_allocation[num_vvip:num_vvip + num_vip]
        high_priority_allocation = sampled_allocation[num_vvip + num_vip:num_vvip + num_vip + num_high_priority]
        medium_priority_allocation = sampled_allocation[num_vvip + num_vip + num_high_priority:num_vvip + num_vip + num_high_priority + num_medium_priority]
        low_priority_allocation = sampled_allocation[num_vvip + num_vip + num_high_priority + num_medium_priority:]

        required_power_per_vip = 100

        # Implement your allocation logic here based on your specific priorities and fairness criteria
        def is_action_efficient(allocation_vector):
          # Consider your criteria for efficiency based on priority and other factors
          num_vvip = 50
          num_vip = 50

          # Calculate total power allocated to each priority group
          vvip_power = allocation_vector[:num_vvip].sum()
          vip_power = allocation_vector[num_vvip:num_vvip + num_vip].sum()
          # Calculate total power allocated to lower-priority groups if applicable

          # Define your efficiency criteria here
          # You can consider efficiency based on priorities, fairness, power wastage, etc.

          # Example: Efficiency if VIP houses receive at least 80% of their power requirement
          efficiency_threshold = 0.8
          if (vip_power / (num_vip * required_power_per_vip)) >= efficiency_threshold:
            return True
          else:
            return False

          allocation_vector = sampled_allocation  # Replace with your allocation vector
          action_efficient = is_action_efficient(allocation_vector)

        # Update critic's value estimate based on the result of the allocation
        # Calculate advantage and reward based on the effectiveness of the allocation
        # Define a placeholder reward function for Scenario 2
        def reward(action_efficient):
          # Reward with 1 if the action is efficient, else reward with 0
          return 1 if action_efficient else 0

        gamma = 0.95

        # Compute the critic's value estimate
        critic_value = critic(state)

        # Calculate the advantage based on the critic's value estimate and the expected return
        # You need to define your advantage calculation based on your algorithm
        expected_return = reward(is_action_efficient) + gamma * critic_value.detach()
        advantage = expected_return - critic_value

        # Calculate the mean actor loss across the batch
        actor_loss = -torch.log(combined_action) * advantage
        mean_actor_loss = torch.mean(actor_loss)  # Compute the mean loss

        # Calculate critic loss using Mean Squared Error (MSE) loss
        critic_loss = nn.MSELoss()(critic_value, expected_return)

        # Backpropagate through both actor and critic losses
        actor_optimizer.zero_grad()
        mean_actor_loss.backward(retain_graph=True)
        actor_optimizer.step()

        critic_optimizer.zero_grad()
        critic_loss.backward()
        critic_optimizer.step()


        # Compute actor and critic losses and update the networks
        '''
        actor_loss = -torch.log(combined_action) * advantage
        critic_loss = nn.MSELoss()(critic_value, expected_return)

        actor_optimizer.zero_grad()
        actor_loss.backward()
        actor_optimizer.step()

        critic_optimizer.zero_grad()
        critic_loss.backward()
        critic_optimizer.step()
        '''




Saving LSTM_pred.csv to LSTM_pred (11).csv


KeyboardInterrupt: ignored

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

# Define the actor network
class Actor(nn.Module):
    def __init__(self, input_size, output_size):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        action_probs = torch.softmax(self.fc2(x), dim=-1)  # Use softmax for multi-allocation
        return action_probs

# Define the critic network
class Critic(nn.Module):
    def __init__(self, input_size):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        value = self.fc2(x)
        return value

# Load the dataset from a CSV file
# data = pd.read_csv('predicted_power.csv')

from google.colab import files
uploaded = files.upload()

filename = next(iter(uploaded))
data = pd.read_csv(filename)

# Initialize actor and


Saving LSTM_pred.csv to LSTM_pred (12).csv


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

# Define the actor network
class Actor(nn.Module):
    def __init__(self, input_size, output_size):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        action_probs = torch.softmax(self.fc2(x), dim=-1)  # Use softmax for multi-allocation
        return action_probs

# Define the critic network
class Critic(nn.Module):
    def __init__(self, input_size):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        value = self.fc2(x)
        return value

# Load the dataset from a CSV file
#data = pd.read_csv('predicted_power.csv')

from google.colab import files
uploaded = files.upload()

filename = next(iter(uploaded))
data = pd.read_csv(filename)

# Initialize actor and critic with input size 1
actor = Actor(input_size=1, output_size=201)  # Input size should match the number of features in your dataset (1 in this case)
critic = Critic(input_size=1)                # Input size should match the number of features in your dataset

# Define optimizers
actor_optimizer = optim.Adam(actor.parameters(), lr=0.001)
critic_optimizer = optim.Adam(critic.parameters(), lr=0.001)

num_epochs = 10  # You can set this to your desired number of training epochs

# Training loop for actor-critic
for epoch in range(num_epochs):
    for _, row in data.iterrows():  # Iterate over your dataset
        # Convert the row to a PyTorch tensor
        state = torch.tensor([row['Active Power']], dtype=torch.float32)

        # Compute the actor's action probabilities
        action_probs = actor(state)

        # Scenario 1: Decide whether to cut power (0) or reduce power supply (1)
        cut_action_prob = action_probs[0]
        reduce_action_prob = action_probs[1]

        # Sample an action based on the probabilities
        '''
        cut_action = torch.bernoulli(cut_action_prob)
        reduce_action = torch.bernoulli(reduce_action_prob)
        '''
        cut_action = torch.tensor([cut_action_prob.item()], dtype=torch.float32)
        reduce_action = torch.tensor([reduce_action_prob.item()], dtype=torch.float32)

        # Combine the two actions into a single action vector
        combined_action = torch.cat((cut_action, reduce_action, action_probs[2:]), dim=0)

        # Scenario 2: Properly distribute power to 200 houses, including 50 VIP houses
        # Define the priority levels based on your schema
        num_vvip = 50
        num_vip = 50
        num_high_priority = 50
        num_medium_priority = 50
        num_low_priority = 50

        # Sample an allocation vector based on the actor's output probabilities
        allocation_probs = action_probs[2:]  # Assuming the first two actions are for the binary decision
        sampled_allocation = torch.multinomial(allocation_probs, 200, replacement=True)

        # Split the sampled allocation into priority groups
        vvip_allocation = sampled_allocation[:num_vvip]
        vip_allocation = sampled_allocation[num_vvip:num_vvip + num_vip]
        high_priority_allocation = sampled_allocation[num_vvip + num_vip:num_vvip + num_vip + num_high_priority]
        medium_priority_allocation = sampled_allocation[num_vvip + num_vip + num_high_priority:num_vvip + num_vip + num_high_priority + num_medium_priority]
        low_priority_allocation = sampled_allocation[num_vvip + num_vip + num_high_priority + num_medium_priority:]

        required_power_per_vip = 100

        # Implement your allocation logic here based on your specific priorities and fairness criteria
        def is_action_efficient(allocation_vector):
            # Consider your criteria for efficiency based on priority and other factors
            num_vvip = 50
            num_vip = 50

            # Calculate total power allocated to each priority group
            vvip_power = allocation_vector[:num_vvip].sum()
            vip_power = allocation_vector[num_vvip:num_vvip + num_vip].sum()
            # Calculate total power allocated to lower-priority groups if applicable

            # Define your efficiency criteria here
            # You can consider efficiency based on priorities, fairness, power wastage, etc.

            # Example: Efficiency if VIP houses receive at least 80% of their power requirement
            efficiency_threshold = 0.8
            if (vip_power / (num_vip * required_power_per_vip)) >= efficiency_threshold:
                return True
            else:
                return False

        allocation_vector = sampled_allocation  # Replace with your allocation vector
        action_efficient = is_action_efficient(allocation_vector)

        # Update critic's value estimate based on the result of the allocation
        # Calculate advantage and reward based on the effectiveness of the allocation
        # Define a placeholder reward function for Scenario 2
        def reward(action_efficient):
          # Reward with 1 if the action is efficient, else reward with 0
          return 1 if action_efficient else 0

        # Update critic's value estimate based on the result of the allocation
        # Calculate advantage and reward based on the effectiveness of the allocation
        gamma = 0.95

        # Compute the critic's value estimate
        critic_value = critic(state)

        # Calculate the advantage based on the critic's value estimate and the expected return
        expected_return = reward(is_action_efficient) + gamma * critic_value.detach()
        advantage = expected_return - critic_value

        # Calculate the mean actor loss across the batch
        actor_loss = -torch.log(combined_action) * advantage
        mean_actor_loss = torch.mean(actor_loss)  # Compute the mean loss

        # Calculate critic loss using Mean Squared Error (MSE
        # Calculate critic loss using Mean Squared Error (MSE) loss
        critic_loss = nn.MSELoss()(critic_value, expected_return)

        # Backpropagate through both actor and critic losses
        actor_optimizer.zero_grad()
        mean_actor_loss.backward(retain_graph=True)
        actor_optimizer.step()

        critic_optimizer.zero_grad()
        critic_loss.backward(retain_graph=True)
        critic_optimizer.step()

# Calculate and print power allocation for different batches of 50 houses
num_houses = len(data)
num_batches = num_houses // 50
for batch in range(num_batches):
    start_idx = batch * 50
    end_idx = (batch + 1) * 50
    power_allocation = sum(combined_action[start_idx:end_idx])
    print(f"Power allocation for batch {batch + 1}: {power_allocation}")


Saving LSTM_pred.csv to LSTM_pred (2).csv
Power allocation for batch 1: 0.9999998211860657
Power allocation for batch 2: 4.84336304396038e-08
Power allocation for batch 3: 4.878986104017713e-08
Power allocation for batch 4: 1.7435893084893905e-07
Power allocation for batch 5: 6.280025943716794e-10
Power allocation for batch 6: 0
Power allocation for batch 7: 0
Power allocation for batch 8: 0
Power allocation for batch 9: 0
Power allocation for batch 10: 0
Power allocation for batch 11: 0
Power allocation for batch 12: 0
Power allocation for batch 13: 0
Power allocation for batch 14: 0
Power allocation for batch 15: 0
Power allocation for batch 16: 0
Power allocation for batch 17: 0
Power allocation for batch 18: 0
Power allocation for batch 19: 0
Power allocation for batch 20: 0
Power allocation for batch 21: 0
Power allocation for batch 22: 0
Power allocation for batch 23: 0
Power allocation for batch 24: 0
Power allocation for batch 25: 0
Power allocation for batch 26: 0
Power alloca