Permalink
Browse files

adjustments to lfa-rl

  • Loading branch information...
1 parent 54cba97 commit 8db19dc9132528cdeaf91eddf9e537195ba11da0 @schaul schaul committed Jan 26, 2011
Showing with 13 additions and 12 deletions.
  1. +2 −4 docs/tutorials/rl.py
  2. +1 −3 examples/rl/valuebased/td.py
  3. +10 −5 pybrain/rl/learners/valuebased/linearfa.py
@@ -16,16 +16,14 @@
components from PyBrain:
"""
-from scipy import *
-import sys, time
+from scipy import * #@UnusedWildImport
import pylab
from pybrain.rl.environments.mazes import Maze, MDPMazeTask
from pybrain.rl.learners.valuebased import ActionValueTable
from pybrain.rl.agents import LearningAgent
-from pybrain.rl.learners import Q, SARSA
+from pybrain.rl.learners import Q, SARSA #@UnusedImport
from pybrain.rl.experiments import Experiment
-from pybrain.rl.environments import Task
"""
@@ -6,8 +6,7 @@
fully observable MDP maze task. The goal point is the top right free
field. """
-from scipy import *
-import sys, time
+from scipy import * #@UnusedWildImport
import pylab
from pybrain.rl.environments.mazes import Maze, MDPMazeTask
@@ -16,7 +15,6 @@
from pybrain.rl.learners import Q, QLambda, SARSA #@UnusedImport
from pybrain.rl.explorers import BoltzmannExplorer #@UnusedImport
from pybrain.rl.experiments import Experiment
-from pybrain.rl.environments import Task
# create the maze with walls (1)
@@ -30,7 +30,7 @@ class LinearFALearner(ValueBasedLearner):
learningRate = 0.5 # aka alpha: make sure this is being decreased by calls from the learning agent!
learningRateDecay = 100 # aka n_0, but counting decay-calls
- randomInit = False
+ randomInit = True
rewardDiscount = 0.99 # aka gamma
@@ -44,7 +44,7 @@ def __init__(self, num_actions, num_features, **kwargs):
self.num_actions = num_actions
self.num_features = num_features
if self.randomInit:
- self._theta = randn(self.num_actions, self.num_features) / 100.
+ self._theta = randn(self.num_actions, self.num_features) / 10.
else:
self._theta = zeros((self.num_actions, self.num_features))
self._additionalInit()
@@ -83,11 +83,12 @@ def _boltzmannProbs(qvalues, temperature=1.):
return tmp / sum(tmp)
def reset(self):
- ValueBasedLearner.reset(self)
- self.newEpisode()
+ ValueBasedLearner.reset(self)
self._callcount = 0
+ self.newEpisode()
- def _decayLearningRate(self):
+ def newEpisode(self):
+ ValueBasedLearner.newEpisode(self)
self._callcount += 1
self.learningRate *= ((self.learningRateDecay + self._callcount)
/ (self.learningRateDecay + self._callcount + 1.))
@@ -99,6 +100,9 @@ class Q_LinFA(LinearFALearner):
def _updateWeights(self, state, action, reward, next_state):
""" state and next_state are vectors, action is an integer. """
td_error = reward + self.rewardDiscount * max(dot(self._theta, next_state)) - dot(self._theta[action], state)
+ #print action, reward, td_error,self._theta[action], state, dot(self._theta[action], state)
+ #print self.learningRate * td_error * state
+ #print
self._theta[action] += self.learningRate * td_error * state
@@ -109,6 +113,7 @@ class QLambda_LinFA(LinearFALearner):
def newEpisode(self):
""" Reset eligibilities after each episode. """
+ LinearFALearner.newEpisode(self)
self._etraces = zeros((self.num_actions, self.num_features))
def _updateEtraces(self, state, action, responsibility=1.):

0 comments on commit 8db19dc

Please sign in to comment.