-
Notifications
You must be signed in to change notification settings - Fork 5
/
WF_RL.m
132 lines (126 loc) · 4.33 KB
/
WF_RL.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Simulation of Power Allocation in multichannel communication system
% Reinforcement Learning; Cooperative Learning (CL)
% Solving water-filling with RL
%
% function PA_CL(Npower, bs_count, BS_Max, bs_permutation, NumRealization, saveNum, CL)
%% Initialization
clc;
clear;
total = tic;
%% Parameters
Pmin = 0; %dBm
Pmax = 6;
Npower = 31;
%% Minimum Rate Requirements for users
% q_ue = 10.0;
%% Q-Learning variables
% Actios
actions = linspace(Pmin, Pmax, Npower);
% States
states = [1,2]; % states = (dMUE , dBS)
% Q-Table
% Q = zeros(size(states,1) , size(actions , 2));
Q_init = ones(1 , Npower) * 0.0;
Q1 = ones(1 , Npower) * inf;
sumQ = ones(1, Npower) * 0.0;
% meanQ = ones(size(states,1) , Npower) * 0.0;
alpha = 0.5; gamma = 0.9; epsilon = 0.1 ; Iterations = 50000;
CL = 0;
%% Main Loop
% fprintf('Loop for %d number of FBS :\t', fbsCount);
% textprogressbar(sprintf('calculating outputs:'));
count = 0;
errorVector = zeros(1,Iterations);
agents = cell(1,4);
agents{1} = agent(1,1.0); % Power Allocation Agent (PA)
agents{2} = agent(2,2.0);
agents{3} = agent(3,5.0);
agents{4} = agent(4,3.0);
for i=1:size(agents,2)
PA = agents{i};
PA = PA.setQTable(Q_init);
agents{i} = PA;
end
extra_time = 0.0;
textprogressbar(sprintf('calculating outputs:'));
for episode = 1:Iterations
textprogressbar((episode/Iterations)*100);
sumQ = sumQ * 0.0;
for j=1:size(agents,2)
PA = agents{j};
sumQ = sumQ + PA.Q;
end
if (episode/Iterations)*100 < 80
% Action selection with epsilon=0.1
for j=1:size(agents,2)
PA = agents{j};
if rand<epsilon
size_action = size(actions,2);
index = floor(rand*Npower+1);
PA.index = index;
PA.P = actions(index);
else
a = tic;
if CL == 1
[M, index] = max(sumQ(1,:)); % CL method
else
[M, index] = max(PA.Q(1,:)); %IL method
end
a1 = toc(a);
PA.index = index;
PA.P = actions(index);
end
agents{j} = PA;
end
else
for j=1:size(agents,2)
PA = agents{j};
if CL == 1
[M, index] = max(sumQ(1,:)); % CL method
else
[M, index] = max(PA.Q(1,:)); %IL method
end
PA.index = index;
PA.P = actions(index);
agents{j} = PA;
end
end
% Calculate Reward
sum_p = 0;
for j=1:size(agents,2)
PA = agents{j};
sum_p = sum_p + PA.P;
end
for j=1:size(agents,2)
PA = agents{j};
index = PA.index;
R = Reward(PA.P, sum_p, Pmax, PA.noise_level);
PA.Q(index) = PA.Q(index) + alpha*(R-PA.Q(index));
agents{j} = PA;
end
% break if convergence: small deviation on q for 1000 consecutive
errorVector(episode) = sum(sum(abs(Q1-sumQ)));
if sum(sum(abs(Q1-sumQ)))<0.001 && sum(sum(sumQ >0))
if count>1000
% episode; % report last episode
break % for
else
count=count+1; % set counter if deviation of q is small
end
else
Q1=sumQ;
count=0; % reset counter when deviation of q from previous q is large
end
end
% Q = sumQ;
answer.Q = sumQ;
answer.Error = errorVector;
answer.agents = agents;
answer.episode = episode;
tt = toc(total);
answer.time = tt - extra_time;
QFinal = answer;
save(sprintf('DATA/WF_RL/pro_%d.mat',Npower),'QFinal');
% FBS_out = BS_list;
% end