-
Notifications
You must be signed in to change notification settings - Fork 2
/
thesis.tex
274 lines (232 loc) · 10.3 KB
/
thesis.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
\documentclass[12pt]{strath_thesis}
\usepackage{apacite}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage[mathscr]{eucal}
\usepackage{ifpdf}
\usepackage{multirow}
\usepackage{dcolumn}
\usepackage{eurosym}
\usepackage{rotating}
\usepackage{pdflscape}
\usepackage{paralist}
\usepackage{ifthen}
\usepackage{soul} % defines an \ul command
%\usepackage{pslatex}
%\usepackage{palatino}
% \ifpdf
% \usepackage[pdftex]{graphicx}
% \graphicspath{{./pdf/}{./jpg/}}
% \DeclareGraphicsExtensions{.pdf,.jpg,.png}
% \else
% \usepackage[dvips]{graphicx}
% \graphicspath{{./eps/}}
% \DeclareGraphicsExtensions{.eps}
% \fi
\usepackage[ruled]{algorithm}
\usepackage{algorithmic}
\usepackage{ulem}
\usepackage{tikz}
\usetikzlibrary{arrows,calc}
\usetikzlibrary{shadows}
\usetikzlibrary{decorations,decorations.pathmorphing}
\usetikzlibrary{positioning,shapes}
\tikzset{
>=stealth', % standard arrow tip
help lines/.style={dashed, thick}, % different line styles
generation lines/.style={thick},
transmission lines/.style={dotted, thick},
distribution lines/.style={densely dotted, thick},
axis/.style={<->},
important line/.style={thick},
connection/.style={thick, dotted},
}
% PGF/TikZ sequence diagrams.
\usepackage[underline=true,rounded corners=false]{pgf-umlsd}
\newcolumntype{d}[1]{D{.}{\cdot}{#1}}
\newcolumntype{.}[1]{D{.}{.}{#1}}
\makeatletter
\newcommand{\rmnum}[1]{\romannumeral #1}
\newcommand{\Rmnum}[1]{\expandafter\@slowromancap\romannumeral #1@}
\makeatother
\newcommand{\abs}[1]{\lvert#1\rvert}
\newcommand{\diag}{\mathop{\mathbf{diag}}}
\newcommand{\matpower}{\textsc{Matpower~}}
\newcommand{\matlab}{Matlab~}
\newcommand{\psat}{\textsc{Psat~}}
\newcommand{\pylon}{Pylon~}
\newcommand{\stable}{$\bullet$}
\newcommand{\unstable}{$\bullet$}
\providecommand{\e}[1]{\ensuremath{\times 10^{#1}}}
% \newcommand{\CC}{C\nolinebreak\hspace{-.05em}\raisebox{.4ex}{\tiny\bf
% +}\nolinebreak\hspace{-.10em}\raisebox{.4ex}{\tiny\bf +}}
\def\CC{{C\nolinebreak[4]\hspace{-.05em}\raisebox{.4ex}{\tiny\bf ++}}}
% Optional argument for the parenthesis in bordermatrix.
\makeatletter
\newif\if@borderstar
\def\bordermatrix{\@ifnextchar*{%
\@borderstartrue\@bordermatrix@i}{\@borderstarfalse\@bordermatrix@i*}%
}
\def\@bordermatrix@i*{\@ifnextchar[{\@bordermatrix@ii}{\@bordermatrix@ii[()]}}
\def\@bordermatrix@ii[#1]#2{%
\begingroup
\m@th\@tempdima8.75\p@\setbox\z@\vbox{%
\def\cr{\crcr\noalign{\kern 2\p@\global\let\cr\endline }}%
\ialign {$##$\hfil\kern 2\p@\kern\@tempdima & \thinspace %
\hfil $##$\hfil && \quad\hfil $##$\hfil\crcr\omit\strut %
\hfil\crcr\noalign{\kern -\baselineskip}#2\crcr\omit %
\strut\cr}}%
\setbox\tw@\vbox{\unvcopy\z@\global\setbox\@ne\lastbox}%
\setbox\tw@\hbox{\unhbox\@ne\unskip\global\setbox\@ne\lastbox}%
\setbox\tw@\hbox{%
$\kern\wd\@ne\kern -\@tempdima\left\@firstoftwo#1%
\if@borderstar\kern2pt\else\kern -\wd\@ne\fi%
\global\setbox\@ne\vbox{\box\@ne\if@borderstar\else\kern 2\p@\fi}%
\vcenter{\if@borderstar\else\kern -\ht\@ne\fi%
\unvbox\z@\kern-\if@borderstar2\fi\baselineskip}%
\if@borderstar\kern-2\@tempdima\kern2\p@\else\,\fi\right\@secondoftwo#1 $%
}\null \;\vbox{\kern\ht\@ne\box\tw@}%
\endgroup
}
\makeatother
% Remove figures to speed up build (should be possible using 'draft').
\newboolean{includefigures}
\setboolean{includefigures}{true}
% Select chapters to build.
\includeonly{
introduction,
background,
related_work,
method,
results,
conclusion,
appendix
}
\ifpdf
\pdfinfo{/Author (Richard W. Lincoln)
/Title (Learning to Trade Power)
/Subject (Electrical Power Engineering)}
\fi
\title{Learning to Trade Power}
%\title{Learning Power Trade}
\author{Richard W. Lincoln}
\pagestyle{plain}
\begin{document}
\maketitle
\setcounter{page}{1}
\pagenumbering{roman}
\declaration
\begin{acknowledgements}
I wish to thank Professor Jim McDonald for giving me the opportunity to study
at The Institute for Energy and Environment and for permitting me the freedom
to pursue my own research interests. I also wish to thank my supervisors,
Professor~Graeme~Burt and Dr~Stuart~Galloway, for their guidance and
scholarship. Most of all, I wish to thank my parents, my big brother and my
little sister for all of their support throughout my PhD.
This thesis leverages several open source software projects developed by
researchers from other institutions. I wish to thank the researchers from
Cornell University involved in the development of the optimal power flow
formulations in \textsc{Matpower}, most especially Dr Ray Zimmerman. I am
similarly grateful for the work by researchers at Dalle Molle Institute for
Artificial Intelligence (IDSIA) and the Technical University of Munich on
reinforcement learning algorithm and artificial neural network implementations.
This research was funded by the United Kingdom Engineering and Physical Sciences
Research Council through the Supergen Highly Distributed Power Systems
consortium under grant GR/T28836/01.
\end{acknowledgements}
\begin{abstract}
In electrical power engineering, learning algorithms can be used to model
the strategies of electricity market participants. The objective of this thesis
is to establish if policy gradient reinforcement learning algorithms
can be used to create participant models superior to those using previously
applied value function based methods.
% Electricity is an unique commodity. It must be supplied at the presise moment
% that it is demanded, as there are no practical methods for significant levels
% of storage in most systems. There are also few options for controlling the
% direction in which power flows around networks, so generation or consumption
% at one point in the system has an effect at all other points.
Supply of electricity involves technology, money, people, natural resources and
the environment. All of these aspects are changing and electricity market
architectures must be suitably researched to ensure that those used are fit for
purpose. In this thesis electricity markets are modelled as non-linear
constrained optimisation problems, which are solved using a primal-dual interior
point method. Policy gradient reinforcement learning algorithms are used to
adjust the parameters of multi-layer feed-forward artificial neural networks
that approximate each market participant's policy for selecting power quantities
and prices that are offered in the simulated marketplace. Traditional
reinforcement learning methods, that learn a value function, have been
previously applied in simulated electricity trade, but they are mostly
restricted to use with discrete representations of a market environment. Policy
gradient methods have been shown to offer convergence guarantees in continuous
multi-dimensional environments and avoid many of the problems that mar value
function based methods.
This thesis presents the first application of unsupervised policy gradient
reinforcement methods in simulated electricity trade. It also presents the
first use of a non-linear AC optimal power flow formulation in agent-based
electricity market simulation. Policy gradient methods are found to be a valid
option for modelling participant strategies in complex and continuous
multivariate market environments. They are outperformed by traditional
action-value function based algorithms in many of the tests conducted, but
several possibilities for improving the approach taken are identified. Further
development of this research could lead to unsupervised learning algorithms
being used in new decision support applications and in automated energy trade.
% Five types of learning algorithm are compared in a series of Nash equilibrium
% and constraint exploitation simulations. Policy gradient methods are found to
% be a valid option for modelling the strategies of electricity market
% participants, but they are outperformed by a traditional action-value function
% algorithm in all of the tests. Further development of this research could
% provide opportunities for advanced learning algorithms to be used in descision
% support and automated energy trade applications.
% The benefits of using policy gradient methods in electricity market simulation
% are explored and the results demonstrate their superior trading ability when
% operating in large constrained networks. By introducing advanced unsupervised
% learning methods to electricity market simulation, previous research in the
% field may be revisited and enhanced and an opportunity to use policy gradient
% methods in decision support and automated energy trading applications is
% opened.
% Reinforcement learning methods that use connectionist systems for value
% function approximation offer few convergence guarantees, even in simple
% systems. Table-based value function reinforcement learning methods have been
% used previously for the simulation of electricity markets, but they operate
% only in discrete action and sensor domains. If learning algorithms are to
% deliver on their potential for application in operational settings then it
% will be necessary for them to operate in continuous domains. The principle
% contribution of this thesis is the demonstration of policy-gradient
% reinforcement learning algorithms being applied to continuous representations
% of electricity trading problems, showing that superior use of sensor data
% results in improved overall performance when compared with previously applied
% value-function methods. From this it follows that learning methods which
% search directly in the policy space will be better suited to decision support
% applications and automated electric power trade.
\end{abstract}
\tableofcontents
\newpage
\listoffigures
\addcontentsline{toc}{chapter}{List of Figures}
\newpage
\listoftables
\addcontentsline{toc}{chapter}{List of Tables}
\newpage
\pagenumbering{arabic}
\setcounter{page}{1}
\onehalfspacing
% Set the scene and problem statement. Introduce structure of thesis, state
% contributions (3-5).
\include{introduction}
% Demonstrate wider appreciation (context). Provide motivation.
\include{background}
% Survey and critical assessment. Relation to own work.
\include{related_work}
% Analysis, design, implementation and interpretation of results.
\include{method}
% Critical assessment of own work.
\include{results}
% State hypothesis. Further Work. Restate contribution.
\include{conclusion}
\bibliographystyle{apacite}
\renewcommand{\bibname}{Bibliography}
\bibliography{literature}
%\addcontentsline{toc}{chapter}{Bibliography}
\appendix
\include{appendix}
\end{document}