In [4]:
import numpy as np
import random
from collections import deque
import matplotlib.pyplot as plt

class DQRAP:
    def __init__(self, n_nodes, arrival_rate, minislots = 3):
        self.n_nodes = n_nodes
        self.nodes = list(range(n_nodes))
        self.minislots = minislots
        self.arrival_rate = arrival_rate
        self.VTQ = deque()   #vitual transmission queue
        self.VRQ = [deque() for _ in range(minislots)]  #vitual resolution queue
        self.physical_queue = deque()
        self.new_active_node = []
        self.successful_transmission = 0
        self.packet_entry_times = {} # A dictionary to track when each packet enters contention
        self.t_sar_entry_times = {} # To track when each packet successfull contention
        self.packet_delays = []  # To store delays for successfully transmitted packets
        self.current_round = 0
        self.unique_counter = 0
        self.unique_counter_t_sar = 0
        self.delay_per_timeslot = []
        self.t_sar_per_timeslot = [] 
        self.count_transmitted_node = 0
        self.total_contentions = 0      # Tổng số lần all nodes tham gia tranh chấp (cả thành công và thất bại)
        self.total_successful_transmission = 0  # Tổng số lần truyền thành công


    def generation_node(self):
        new_arrivals = []
        for node in self.nodes:
            if random.random() < self.arrival_rate:
                new_arrivals.append(node)
        for node in new_arrivals:
            while True:
                unique_key = (node, self.unique_counter)
                if unique_key not in self.packet_entry_times:
                    self.packet_entry_times[unique_key] = self.current_round
                    self.unique_counter += 1
                    break  # Đã gán thành công key mới, thoát vòng lặp
                else:
                    self.unique_counter += 1  
        return new_arrivals


    def contention_phase(self):
        active_nodes = []
        if all((len(VRQ)) == 0 for VRQ in self.VRQ):
            active_nodes.extend(self.physical_queue)
            self.physical_queue.clear()
            new_nodes = self.generation_node()
            active_nodes.extend(new_nodes)
            #print(f"Active nodes: {active_nodes}")    
        else:
             new_nodes = self.generation_node()
             self.physical_queue.extend(new_nodes)
        for i in range(self.minislots):
            if self.VRQ[i]:
                active_nodes.extend(self.VRQ[i].popleft())
                break
        slot_map = {i:[] for i in range(self.minislots)}
        for node in active_nodes:
            slot = random.randint(0, (self.minislots - 1))
            slot_map[slot].append(node)
        #print(f"Minislot: {slot_map}")
        for slot, node_list in slot_map.items():
            #print(f"Minislot {slot}: {node_list}")
            if len(node_list) == 1:
                self.VTQ.append(node_list[0])
                if node_list[0] not in self.t_sar_entry_times:
                    unique_key_t_sar = (node_list[0], self.unique_counter_t_sar)
                    self.t_sar_entry_times[unique_key_t_sar] = self.current_round
                    self.unique_counter_t_sar += 1
                #print(f"Node {node_list[0]} successful transmissted.")
            elif len(node_list) > 1:
                self.VRQ[slot].appendleft(node_list)
                #print(f"Minislot {slot}: {len(node_list)} collision nodes: {node_list}")
            #else:
                #print("No node appeared!")
        #print(f"VRQ: {self.VRQ}")
        #print(f"VTQ: {self.VTQ}")
        return len(self.VTQ), sum(len(VRQ) for VRQ in self.VRQ), active_nodes

    def resolve_collision_phase(self):
        for i in range(self.minislots):
            if self.VRQ[i]:
                #print(f"VRQ {i}: {self.VRQ[i]}")
                colliding_nodes = self.VRQ[i].popleft()
                if isinstance(colliding_nodes, list) and len(colliding_nodes) > 1:
                    self.VRQ[i].appendleft(colliding_nodes)
                    #print(f"VRQ {i} after resolved: {self.VRQ[i]}")
                else:
                    self.VTQ.append(colliding_nodes[0] if isinstance(colliding_nodes, list) else colliding_nodes)
                    #print(f"VTQ: {self.VTQ}")
                break

    def transmission_phase(self):
        current_slot_delay = 0
        current_slot_t_sar = 0
        transmitted_node = 0
        num_transmitted = 0
        num_trans = 0
        if self.VTQ:
            transmitted_node = self.VTQ.popleft()
            self.count_transmitted_node += 1
            self.successful_transmission = 1
            for key in list(self.packet_entry_times.keys()):
                if key[0] == transmitted_node:
                    delay = self.current_round - self.packet_entry_times[key]
                    self.packet_delays.append(delay)
                    #print(f'node {transmitted_node} start at timeslot {self.packet_entry_times[key]}  and contend successfully at timeslot {self.current_round}')
                    del self.packet_entry_times[key]  # remove packet from list
                    current_slot_delay = delay
                    num_transmitted = 1
                    #print(f"Delay: {current_slot_delay}")
                    break
            for key in list(self.t_sar_entry_times.keys()):
                if key[0] == transmitted_node:
                    t_sar = self.current_round - self.t_sar_entry_times[key]
                    # print(f'timeslot successfull contention: {self.t_sar_entry_times[key]}')
                    del self.t_sar_entry_times[key]  # remove packet from list
                    current_slot_t_sar = t_sar
                    num_trans = 1
                    #print(f"T_sar: {current_slot_t_sar}")
                    break
        # Store average delay for this timeslot
        avg_slot_delay = current_slot_delay / num_transmitted if num_transmitted > 0 else 0
        avg_slot_t_sar = current_slot_t_sar / num_trans if num_trans > 0 else 0
        #print(f"Avg_slot_delay: {avg_slot_delay}; avg_slot_t_sar: {avg_slot_t_sar}")

        self.delay_per_timeslot.append(avg_slot_delay)
        self.t_sar_per_timeslot.append(avg_slot_t_sar)  
        return self.count_transmitted_node, transmitted_node # One packet transmitted  
        
    def reset(self):
        self.VTQ.clear()
        self.physical_queue.clear()
        for VRQ in self.VRQ:
            VRQ.clear()
        self.successful_transmission = 0
        self.packet_entry_times.clear()
        self.t_sar_entry_times.clear()
        self.current_round = 0
        self.successful_transmission = 0
        self.packet_delays.clear()
        self.delay_per_timeslot.clear()
        self.t_sar_per_timeslot.clear()
        self.count_transmitted_node = 0
        # # thêm dòng dưới để reset delay sau mỗi trial:
        # self.current_slot_delay = 0
        # self.current_slot_t_sar = 0

    def vrq_status(self):
        minislot_groups = [[] for _ in range(self.minislots)]
        for i in range(self.minislots):
            minislot_groups[i] = list(self.VRQ[i])
        return minislot_groups

    def run_simulation(self):
        successful_contentions, collisions, active_nodes = self.contention_phase()
        self.resolve_collision_phase()
        num_transmitted, transitted_node = self.transmission_phase()

        # Get current delay for this timeslot
        current_delay = self.delay_per_timeslot[-1] if self.delay_per_timeslot else 0   # Retrieve the delay value from the most recent timeslot;
        #print(f"Current delay: {current_delay}")

        # Get current t_sar for this timeslot
        current_t_sar = current_delay - self.t_sar_per_timeslot[-1] if self.t_sar_per_timeslot else 0
        #print(f"Current t_sar: {current_t_sar}")

        if transitted_node:
            current_delay = current_delay + 1
            current_t_sar = current_t_sar + 1

        # check delay for current timeslot again
        self.current_round += 1
        self.total_contentions += len(active_nodes)
        self.total_successful_transmission += num_transmitted

        if len(self.physical_queue) == 0:
            self.new_active_node.extend(active_nodes)
        return successful_contentions, collisions, num_transmitted, active_nodes, current_delay, current_t_sar, self.total_contentions, self.total_successful_transmission

    def get_avg_contentions_per_success(self):
        # Tính số lần tranh chấp tbinh trước khi truyền thành công (n_CP)
        if self.total_successful_transmission == 0:
            return 0
        n_CP = self.total_contentions / self.total_successful_transmission
        return n_CP
    
    def run_multiple_trials(self, num_trials, num_timeslot):
        pass       

# if __name__ == "__main__":
#     random.seed(42)
#     dqrap = DQRAP(n_nodes=100, minislots=3, arrival_rate=1)
#     num_trials = 100
#     num_timeslots = 1000

#     throughput_per_timeslot = [[] for _ in range(num_timeslots)]
#     delay_per_timeslot = [[] for _ in range(num_timeslots)]

#     for value in dqrap.arrival_rate:
#         for trial in range(num_trials):
#             dqrap.reset()
#             for i in range(num_timeslots):
#                 #print(f"======== Timeslot {i+1} ========")
#                 successful_contentions, collisions, num_transmitted, active_nodes, current_delay, current_t_sar = dqrap.run_simulation()
#                 #dqrap.unique_counter += 1 
#                 throughput = dqrap.successful_transmission
#                 throughput_per_timeslot[i].append(throughput)       
#                 delay_per_timeslot[i].append(current_delay)          # delay trung bình tại timeslot i
#                 #print(f"delay_per_timeslot: {delay_per_timeslot}")
#         # Tính trung bình
#         avg_throughput_per_timeslot = [sum(values) / len(values) for values in throughput_per_timeslot]
#         avg_delay_per_timeslot = [sum(values) / len(values) for values in delay_per_timeslot]

#         # Vẽ biểu đồ
#     import matplotlib.pyplot as plt

#     time_slots = list(range(1, num_timeslots + 1))
#     #plt.figure(figsize=(12, 5))

#     # plt.subplot(1, 2, 1)
#     plt.figure(figsize=(10, 5))
#     plt.plot(time_slots, avg_throughput_per_timeslot, 'k-', linewidth=2)
#     plt.xlabel("Timeslot")
#     plt.ylabel("Throughput")
#     plt.title("Average Throughput per Timeslot")

#     # plt.subplot(1, 2, 2)
#     plt.figure(figsize=(10, 5))
#     plt.plot(time_slots, avg_delay_per_timeslot, color='orange', marker='x')
#     plt.xlabel("Timeslot")
#     plt.ylabel("Average Delay")
#     plt.title("Average Delay per Timeslot")

#     plt.tight_layout()
#     plt.show()


# # Plot figure
# plt.figure(figsize=(12, 6))
# plt.plot(time_slots, average_throughput_per_timeslot, 'kx', markersize=1)
# plt.xlabel('Timeslot')
# plt.ylabel('Throughput')
# plt.title('Simulation')
# plt.grid(True)
# plt.tight_layout()
# plt.show()


In [5]:
import numpy as np
import random
import matplotlib.pyplot as plt
from collections import defaultdict
class QL_DQRAP:
        def __init__(self, env, learning_rate=0.1, discount_factor=0.9, epsilon=1, d=1):
                self.env = env
                self.learning_rate = learning_rate
                self.discount_factor = discount_factor
                self.epsilon = epsilon
                self.d = d  # Variation in number of contention nodes
                self.q_table = self.initialize_q_table(env.n_nodes, 100)
                self.new_state_node = 0
                self.new_dtq_length = 0
                self.avg_delay = 0
                self.throughput_1 = 0
                self.a = self.initialize_q_table(env.n_nodes,100)
                
        def initialize_q_table(self, max_nodes, dtq_length):
                q_table = np.zeros((max_nodes + 1, dtq_length + 1, 3))  # Three possible actions Xt-d, Xt, Xt+d
                for Yt in range(dtq_length+1):
                        for Xt in range(max_nodes + 1):
                                if Xt == 0:
                                        q_table[Xt][Yt][0] = -20000  # Invalid, should never visit
                                elif Xt == max_nodes:
                                        q_table[Xt][Yt][2] = -20000  # Invalid, should never visit
                return q_table

        def get_state(self):
                return self.new_state_node
        
        def get_dtq_length(self):
                return self.new_dtq_length
        
        def choose_action(self, state_node, dtq_length):
                if random.uniform(0, 1) < self.epsilon:
                        action = random.choice([- self.d, 0, self.d])
                else:
                        action = np.argmax(self.q_table[state_node][dtq_length]) - 1
                return action
        
        def update_q_table(self, state, dtq_length, action, reward, next_state, next_dtq_length):
                action_idx = action + 1  # Convert action (-1, 0, 1) to index (0, 1, 2)
                current_q = self.q_table[state,dtq_length,action_idx]
                max_next_q = max(self.q_table[next_state,next_dtq_length, :])
                new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_next_q - current_q)
                self.q_table[state,dtq_length,action_idx] = new_q

        def calculate_reward(self, dtq_length, crq_length, delay, transmitted, throughput_1, throughput_2):
                reward = transmitted * 100 # High reward for successful transmission
                reward -= crq_length *10 # Penalty for unresolved collisions
                reward -= dtq_length * 5 # Penalty for long data transmission queue
                reward -= delay * 0.1
                return reward
        
        def decay_epsilon(self, current_round):
                min_epsilon = 0.1
                decay_rate = 0.01
                self.epsilon = min_epsilon + (self.epsilon - min_epsilon) * np.exp(-decay_rate * current_round)
                return self.epsilon
                        
        def train(self, rounds=1000):
                metrics = {
                'throughput': np.zeros(rounds),
                'dtq_length': np.zeros(rounds),
                'crq_length': np.zeros(rounds),
                'reward': np.zeros(rounds),
                'delay': np.zeros(rounds),
                'n_CP': np.zeros(rounds)
                }
                for round_num in range(rounds):
                        delay = 0
                        state = self.get_state()
                        dtq_length = self.get_dtq_length()
                        action = self.choose_action(state,dtq_length)
                        new_node_count = max(0,min(100,state+action))
                        # print(f'state:{state} action:{action}')
                        self.new_state_node = new_node_count
                        successful_contentions, collisions, transmitted, active_nodes, current_delay, current_t_sar, total_cont, total_succ = self.env.run_simulation()
                        self.new_dtq_length = len(self.env.VTQ)
                        for i,j in enumerate(active_nodes):
                                delay += i
                        throughput = transmitted / (round_num + 1)
                        next_state = self.get_state()
                        next_dtq_length = self.get_dtq_length()
                        reward = self.calculate_reward(len(self.env.VTQ), sum(len(VRQ) for VRQ in self.env.VRQ), delay, transmitted,throughput, self.throughput_1)
                        self.update_q_table(state,len(self.env.VTQ), action, reward, next_state, next_dtq_length) #affect to transmission rate
                        self.env.n_nodes = new_node_count 
                        # print(f'n_nodes:{self.env.n_nodes}')
                        self.decay_epsilon(round_num)
                        self.throughput_1 = transmitted / (round_num + 1)

                        current_n_CP = self.env.get_avg_contentions_per_success()
                        metrics['throughput'][round_num] = throughput
                        metrics['dtq_length'][round_num] = len(self.env.dtq)
                        metrics['crq_length'][round_num] = sum(len(crq) for crq in self.env.crqs)
                        metrics['delay'][round_num] = delay
                        metrics['reward'][round_num] = reward
                        metrics['n_CP'][round_num] = current_n_CP

                f = open('data.txt','w')
                f.write(str(dict((self.q_table))))
                
                for state, dtq_dict in self.q_table.items():
                        print(f"State (Xt): {state}")
                for dtq_length, actions in dtq_dict.items():
                        print(f"  DTQ Length (Yt): {dtq_length} -> Actions: {actions}")
                print("\n")
                # Calculate the average number of new arrivals per slot
                avg_new_arrivals_per_slot = len(self.env.new_active_node) / (rounds * rounds )

                # Calculate the input rate: ratio of average new arrivals to transmission capacity
                self.env.arrival_rate = avg_new_arrivals_per_slot
                # total_delay = self.env.calculate_total_delay(self.env.input_rate)
                return metrics

        def result(self, rounds=1000):
                metrics = {
                        'n_CP': np.zeros(rounds) # Mảng để lưu giá trị n_CP sau mỗi round
                }
                
                for round_num in range(rounds):
                        # Chạy mô phỏng 1 round
                        successful_contentions, collisions, transmitted, active_nodes, current_delay, current_t_sar, total_cont, total_succ = self.env.run_simulation()
                        
                        # Tính n_CP cho round hiện tại
                        current_n_CP = self.env.get_avg_contentions_per_success()
                        metrics['n_CP'][round_num] = current_n_CP
                        
                return metrics
        
        def run_multiple_trials(self, rounds=1000, trials=5):
                avg_metrics = {
                        'n_CP': np.zeros(rounds) # Mảng để lưu giá trị n_CP trung bình qua các trials
                }
                
                # Mảng để lưu giá trị n_CP CUỐI CÙNG của mỗi trial
                final_n_CP_values = np.zeros(trials)
                
                for trial in range(trials):                        
                        # Reset môi trường và Q-table cho mỗi trial
                        self.env.reset()
                        self.env.total_contentions = 0          # Reset bộ đếm tranh chấu
                        self.env.total_successful_transmissions = 0  # Reset bộ đếm truyền thành công
                        self.q_table = self.initialize_q_table(self.env.n_nodes, 100)
                        
                        # Chạy evaluation và thu thập metrics
                        trial_metrics = self.result(rounds=rounds)
                        
                        # Lưu giá trị n_CP cuối cùng của trial này
                        final_n_CP_values[trial] = trial_metrics['n_CP'][-1]
                        
                        # Cộng dồn metrics để tính trung bình sau này
                        for key in avg_metrics:
                                avg_metrics[key] += trial_metrics[key]
                
                # Tính giá trị trung bình của các metrics qua tất cả trials
                for key in avg_metrics:
                        avg_metrics[key] /= trials
                
                return avg_metrics, final_n_CP_values

# # Khởi tạo môi trường và agent
# env = DQRAP(n_nodes=50, arrival_rate=0.05)
# agent = QL_DQRAP(env)

# # Chạy multiple trials để có kết quả ổn định
# rounds = 10000
# trials = 5
# avg_metrics, final_n_cp_values = agent.run_multiple_trials(rounds=rounds, trials=trials)

# # In kết quả n_CP cuối cùng của từng trial
# # print("n_CP values from each trial:", final_n_cp_values)
# # print("Average final n_CP:", np.mean(final_n_cp_values))

# # Vẽ biểu đồ n_CP qua các round (sử dụng giá trị trung bình)
# plt.figure(figsize=(10, 6))
# plt.plot(avg_metrics['n_CP'], label='Average n_CP')
# plt.xlabel('Simulation Rounds')
# plt.ylabel('Average Contentions per Success (n_CP)')
# plt.title('Evolution of n_CP over Time')
# plt.legend()
# plt.grid(True)
# plt.show()

# Chạy song song 2 mô phỏng: 1 có agent, 1 không có agent
env_ql = DQRAP(n_nodes=100, arrival_rate=0.1)  # Môi trường cho QL-DQMAC
env_traditional = DQRAP(n_nodes=100, arrival_rate=0.1)  # Môi trường cho Traditional

agent = QL_DQRAP(env_ql)
rounds = 1000

# Chạy Traditional DQMAC (không có agent)
traditional_n_cp = []
for round_num in range(rounds):
    successful_contentions, collisions, transmitted, active_nodes, current_delay, current_t_sar, total_cont, total_succ = env_traditional.run_simulation()
    current_n_CP = env_traditional.get_avg_contentions_per_success()
    traditional_n_cp.append(current_n_CP)

# Chạy QL-DQMAC
metrics = agent.train(rounds=rounds)

# Vẽ biểu đồ so sánh
plt.figure(figsize=(12, 6))
plt.plot(traditional_n_cp, 'r-', label='Traditional DQMAC', alpha=0.8)
plt.plot(metrics['n_CP'], 'b-', label='QL-based DQMAC', linewidth=1.5)
plt.xlabel('Simulation Rounds')
plt.ylabel('Average Contentions per Success (n_CP)')
plt.title('Comparison of MAC Contention Level (100 IoT Nodes)')
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.show()

AttributeError: 'DQRAP' object has no attribute 'dtq'