In [17]:
#
# Imports and settings
# 
import gymnasium as gym
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count

import matplotlib.pyplot as plt
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import threading
from Cryptodome.PublicKey import RSA
from Cryptodome.Signature import PKCS1_v1_5
from Cryptodome.Hash import SHA256
from secrets import SystemRandom

env = gym.make("CartPole-v1")

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if GPU is to be used
device = torch.device(
    "cuda" if torch.cuda.is_available() else
    "mps" if torch.backends.mps.is_available() else
    "cpu"
)

# Global variable


In [18]:
# n : number of processors
n = 150
# s - where 2^s is the number of committees
s = 4
# c - size of committee
c = 2
# D - difficulty level , leading bits of PoW must have D 0's (keep w.r.t to hex)
D = 1 
# r - number of bits in random string 
r = 5
# fin_num - final committee id
fin_num = 0
# identityNodeMap- mapping of identity object to Elastico node
identityNodeMap = dict()
# commitmentSet - set of commitments S
commitmentSet = set()
# ledger - ledger is the database that contains the set of blocks where each block comes after an epoch
ledger = []
# NtwParticipatingNodes - list of nodes those are the part of some committee
NtwParticipatingNodes = []
# network_nodes - list of all nodes 
network_nodes = []
# ELASTICO_STATES - states reperesenting the running state of the node
ELASTICO_STATES = {"NONE": 0, "PoW Computed": 1, "Formed Identity" : 2,"Formed Committee": 3, "RunAsDirectory": 4 ,"Receiving Committee Members" : 5,"Committee full" : 6 , "PBFT Finished" : 7, "Intra Consensus Result Sent to Final" : 8, "Final Committee in PBFT" : 9, "FinalBlockSent" : 10, "FinalBlockReceived" : 11, "RunAsDirectory after-TxnReceived" : 12, "RunAsDirectory after-TxnMulticast" : 13, "Final PBFT Start" : 14, "Merged Consensus Data" : 15, "PBFT Finished-FinalCommittee" : 16 , "CommitmentSentToFinal" : 17, "BroadcastedR" : 18, "ReceivedR" :  19, "FinalBlockSentToClient" : 20}

# Initialize DQN parameters
state_size = env.observation_space.shape[0] # size of state space
action_size = env.action_space.n # size of action space
replay_memory = deque(maxlen=1000)
epsilon = 0.1  # exploration rate

# Defining the Environment


## State


In [19]:
class State(object):
    def __init__(self, transmission_rate, computing_capabilities, consensus_history):
        self.transmission_rate = transmission_rate  # R(i,j) between nodes i and j
        self.computing_capabilities = computing_capabilities  # Computing power of nodes
        self.consensus_history = consensus_history  # H: Consensus validity history
        
    def compute_malicious_node_probability(self): 
        """
        Compute p̄ (malicious node probability) based on consensus history (H)
        """
        trust = self.compute_network_trust()
        return 1 - trust
        
    def compute_network_trust(self):
        """
        Normalize consensus history H and calculate trust level (binary encoding 1 = valid, 0 = invalid)
        """
        total_nodes = len(self.consensus_history)
        valid_consensus_count = sum(self.consensus_history)
        return valid_consensus_count / total_nodes

## Action


In [20]:
class Actions(object):
    def __init__(self, max_size, max_interval, max_shards):
        self.max_size = max_size
        self.max_interval = max_interval
        self.max_shards = max_shards

    def select_actions(self):
        """
        DQN selects block size, block interval, and number of shards
        """
        block_size = random.randint(1, self.max_size)
        block_interval = random.randint(1, self.max_interval)
        num_shards = random.randint(1, self.max_shards)
        return block_size, block_interval, num_shards

## Blockchain environment (BCenv)


## DQN Agent


In [21]:
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

# Replay Memory


In [22]:
Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):
    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

# Training

## Hyper parameters and utilities


In [23]:
# Hyperparameters
BATCH_SIZE = 128  # Reduced batch size for faster updates and less memory usage
GAMMA = 0.95  # Discount factor for future rewards, slightly reduced to prioritize immediate rewards
EPS_START = 1.0  # Start with full exploration
EPS_END = 0.05  # End with a small amount of exploration
EPS_DECAY = 5000  # Faster decay to reduce exploration over time
TAU = 0.01  # Increase soft update rate for target network
LR = 5e-4  # Learning rate, increased for faster learning

# Replay memory capacity
memory = ReplayMemory(5000)  # Reduced capacity to save memory

# Initialize networks
main_q_network = DQN(state_size, action_size).to(device)
target_q_network = DQN(state_size, action_size).to(device)
target_q_network.load_state_dict(main_q_network.state_dict())
target_q_network.eval()  # Set target network to evaluation mode

# Optimizer and loss function
optimizer = optim.Adam(main_q_network.parameters(), lr=LR)

# Training loop


$Q∗(S, A) = \max_π E[\sum_{t = 0}^{\infty} γ^tR(S^t, A^t) | S^0 = S, A^0 = A; π]$


In [24]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
steps_done = 0
def select_action(state):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        with torch.no_grad():
            return main_q_network(state).max(1)[1].view(1, 1)
    else:
        return torch.tensor([[random.randrange(action_size)]], device=device, dtype=torch.long)

def optimize_model(memory, BATCH_SIZE, GAMMA, main_q_network, target_q_network, optimizer):
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    batch = Transition(*zip(*transitions))

    non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), device=device, dtype=torch.bool)
    non_final_next_states = torch.cat([s for s in batch.next_state if s is not None])
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action)
    reward_batch = torch.cat(batch.reward)

    state_action_values = main_q_network(state_batch).gather(1, action_batch)

    next_state_values = torch.zeros(BATCH_SIZE, device=device)
    with torch.no_grad():
        next_state_values[non_final_mask] = target_q_network(non_final_next_states).max(1)[0]
    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))

    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_value_(main_q_network.parameters(), 100)
    optimizer.step()

# Initialize a list to store throughput data
throughput_data = []

def tps_throughput_optimization_model():
    num_episodes = 1000
    for i_episode in range(num_episodes):
        state = env.reset()
        total_reward = 0
        for t in count():
            action = select_action(state)
            next_state, reward, done, _ = env.step(action)
            reward = torch.tensor([reward], device=device)

            if done:
                next_state = None

            memory.push(state, action, next_state, reward)

            state = next_state
            total_reward += reward.item()

            optimize_model()

            target_net_state_dict = target_q_network.state_dict()
            policy_net_state_dict = main_q_network.state_dict()
            for key in policy_net_state_dict:
                target_net_state_dict[key] = policy_net_state_dict[key]*TAU + target_net_state_dict[key]*(1-TAU)
            target_q_network.load_state_dict(target_net_state_dict)

            if done:
                break

        # Calculate throughput (TPS) for the episode
        throughput = calculate_throughput(total_reward, t)
        throughput_data.append(throughput)

    print('Complete')
    env.render()
    env.close()

def calculate_throughput(total_reward, steps):
    if steps == 0:
        return 0
    return total_reward / steps  # Example calculation

# Run the model
tps_throughput_optimization_model()

# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(throughput_data, label='DQN-based Scheme')
plt.xlabel('Episode')
plt.ylabel('Throughput (TPS)')
plt.title('TPS Performance and Convergence Trend Analysis')
plt.legend()
plt.show()

AssertionError: tensor([[1]]) (<class 'torch.Tensor'>) invalid