-
Notifications
You must be signed in to change notification settings - Fork 1
/
environment.py
166 lines (132 loc) · 5.66 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
from copy import deepcopy
import pandas as pd
debug = not True
class Environment :
def __init__(self, Gamma, eta, day_chunk, total_years) :
self.eta = eta #(battery efficiency)
self.Gamma = Gamma
self.start = 0 #pick season
self.day_chunk = day_chunk
self.df_solar = pd.read_csv('./agrregate_solar.csv')
self.df_load = pd.read_csv('./load_data_peak6.csv')
self.training_time = total_years
self.diff = (self.df_load.ix[0:self.day_chunk-1] - self.df_solar.ix[0:self.day_chunk-1]) #change here by Siddharth, just take first 15 days
#self.diff = (self.df_load - self.df_solar)
self.net_load = pd.concat([self.diff]*self.training_time, ignore_index=True).values.tolist()
self.currentState = None
def setCurrentState(self, episode_number, E_init):
'''
Set's the initialState (0th hour) for episode_number.
episode_number
E_init - passed by agent class
CHANGE HERE: returns Initial State (currentState in this case)
'''
net_load = float(self.net_load[episode_number][0])
energy_level = E_init
price = self.getPrice(0)
initialState = [net_load, energy_level, price]
self.currentState = initialState
return initialState
### CHANGE HERE --- 3rd October ---
def nextStep(self, episode_number, time_step, action_sequence, k, FA, agent) :
'''
Perform constraint checking (energy, grid) and assign penalty/rewards.
Output: reward/penalty, next state, constraint satisfaction (boolean)
CHANGE HERE by Siddharth:
currentStateBackup passed from main(), this ensures coherent state updates between learningAgent, main() and environment classes.
PLEASE CHECK.
'''
#self.currentState = currentStateBackup
lastState, cumulativeReward = self.getCumulativeReward(episode_number, time_step, k, action_sequence)
self.currentState, reward, isValid = self.getNextState(episode_number, time_step, self.currentState, action_sequence[0])
if lastState == None:
# bad action P_grid < 0
return self.currentState, cumulativeReward, isValid
qValueLastState = FA.predictQvalue(lastState, agent, agent.getLegalActions(lastState))
#if k == 0: ### CHANGE HERE
# return self.currentState, cumulativeReward + self.Gamma * qValueLastState, isValid
return self.currentState, cumulativeReward + (self.Gamma**(k+1))*qValueLastState, isValid
def getCumulativeReward(self, episode_number, time_step, k, actions) :
#print("actions in cumulativeReward",actions)
gamma = 1
state = deepcopy(self.currentState)
cr = 0
### CHANGE HERE -- 3rd October --
for i in range(0, k+1) :
#print("actions in get nextstate",actions)
lastState, reward, isValid = self.getNextState(episode_number, time_step, state, actions[i])
#print 'state', state, reward, isValid
time_step += 1
if (isValid) :
return None, reward
cr += gamma*reward
gamma *= self.Gamma
state = deepcopy(lastState) ### CHANGE HERE -- 3rd October --
return state, cr # target = (R1 + gamma*R2 + gamma2*Qmax)
def getP_grid(self, state, action) :
if action >= 0:
P_charge, P_discharge = action, 0.0
else:
P_charge, P_discharge = 0.0, action
P_grid = state[0] + P_charge + P_discharge
return P_grid
def getNextState(self, episode_number, time_step, state_k, action_k) :
#print "state in getNextState",state_k
current_netload = state_k[0]
current_energy = state_k[1]
if action_k >= 0:
P_charge, P_discharge = action_k, 0.0
else:
P_charge, P_discharge = 0.0, action_k
E_next = current_energy + self.eta * P_charge + P_discharge
P_grid = current_netload + P_charge + P_discharge
isValid = (P_grid < 0)
reward = - P_grid*self.getPrice(time_step)
if isValid:
reward = -10
nextState = None
price = self.getPrice(time_step+1)
nextState = [self.getNetload(episode_number, time_step+1), E_next, price]
#print "nextstate after ",nextState
return nextState, reward, isValid
def getMaxQvalue(self, state, agent) :
legalActions = agent.getLegalActions(state, self)
flag=0
QValue=None
optimalAction=None
for action in legalActions:
if flag==0:
QValue=model.predictQvalue(state,action)
optimalAction=action
flag=1
else:
if QValue < model.predictQvalue(state, action):
QValue = model.predictQvalue(state, action)
optimalAction=action
return [optimalAction], QValue
def getNetload(self, episode_number, timeStep) :
"change here by Siddharth "
if timeStep > 23 :
episode_number += 1
episode_number %= self.day_chunk*self.training_time
timeStep %= 24
return self.net_load[episode_number][timeStep]
def getPrice(self, timeStep) :
price = [.040,.040,.040,.040,.040,.040,.080,.080,.080,.080,.040,.040,.080,.080,.080,.040,.040,.120,.120,.040,.040,.040,.040,.040]
if timeStep > 23 :
timeStep %= 24
return price[timeStep]
def getSolar(self, episode_number,timeStep):
if timeStep > 23 :
episode_number += 1
episode_number %= self.day_chunk*self.training_time
timeStep %= 24
solar_chunk = self.df_solar.ix[self.start:self.start+self.day_chunk-1].reset_index(drop=True)
return solar_chunk.ix[0][timeStep]
def getLoad(self, episode_number, timeStep):
if timeStep > 23 :
episode_number += 1
episode_number %= self.day_chunk*self.training_time
timeStep %= 24
load_chunk = self.df_load.ix[self.start:self.start+self.day_chunk-1].reset_index(drop=True)
return load_chunk.ix[episode_number][timeStep]