In [1]:
from math import exp
import numpy as np
from gym import spaces
from collections import defaultdict
import gym

In [2]:
# states is defined by the current level of the batterie and the time
# actions are defined by the charing rate (None, low, medium, high; 0-35)
alpha = [0.5, 0.3, 0.6, 0.2, 0.5, 0.8, 0.2, 0.1]
cap = 0 
cap_max = 80
p_mi = 0 
p_max = 35
p = 0
𝜇= 30
𝜎 = 5
penalty = -5000

In [3]:
def cost_func(t, p):
    '''
    This method returns the costs for charging
    @param t:  time
    @param p:  charging rate
    '''
    return alpha[t] * exp(p)

In [4]:
def power_consumed(𝜇= 30, 𝜎 = 5, n=1):
    '''
    This method returns power consumed by the taxi driver
    @param 𝜇: mean
    @param 𝜎: standard deviation
    @param n: number of random variables
    '''
    return int(np.random.normal(𝜇,𝜎,n)[0])

In [5]:
print(power_consumed())

34


In [6]:
class SmartCharging(gym.Env):
    def __init__(self, actions, cap_max, penalty, loading_rate):
        super(SmartCharging, self).__init__()
        
        self.action_space = spaces.Discrete(actions)
        
        self.observation_space = spaces.Dict({
            'cap': spaces.Discrete(cap_max+1),
            'time': spaces.Discrete(8)
        })
        
        self.state = {
            'cap': 0,
            'time': 0
        }
        self.done = False
        
    def reset(self,cap):
        self.state = {
            'cap' : cap,
            'time' : 0
        }
        self.done = False
        return self.state
        
    def step(self,action):
        # translate action into kwh loaded in 15min
        loaded = loading_rate(action)
        reward = 0
        if loaded + cap > cap_max:
            print("1")
            # if charing would exceed totally capacity cap is set to cap_max
            
            reward -= cost_func(self.state['time'], cap_max - cap)
            self.state['cap'] = cap_max
            self.state['time'] += 1
        else:
            reward -= cost_func(self.state['time'], action)
            
            self.state['cap'] += loaded
            self.state['time'] += 1
        if self.state['time'] == 8:
            # if the final stage is reached reset the state

            consumed = power_consumed()
            if self.state['cap'] - consumed > 0:
                
                self.reset(self.state['cap'] - consumed)
            else:
                self.reset(0)
                reward += penalty
            self.done = True
                
        return reward, self.done 

In [None]:
def plot_q_values(q_table):
    

In [7]:
def loading_rate(action):
    if action == 0:
        return 0
    elif action == 1:
        return 3
    elif action == 2:
        return 6
    elif action == 3:
        return 10

In [8]:
class MC_agent():

    def __init__(self, epsilon, alpha, action_space, env):
        self.env = env
        self.q_table= defaultdict(lambda: np.zeros(action_space))
        self.epsilon = epsilon
        self.alpha = alpha
        self.action_space = action_space

    def pick_action(self, state):
        if (state['time'], state['cap']) in self.q_table.keys():
            actions = self.q_table[(state['time'],state['cap'])] 
            return self.epsilon_greedy(actions)
        else:
            return env.action_space.sample()
            
    def epsilon_greedy(self, actions):
        
        action_space = self.action_space
        epsilon = self.epsilon
        
        policy_s = np.ones(action_space) * epsilon / action_space
        best_a = np.argmax(actions)
        policy_s[best_a] = 1 - epsilon + (epsilon / action_space)

        return np.random.choice(np.arange(action_space), p=policy_s)

    def update_q_values(self, state, action, goal):
        time = state['time']
        cap = state['cap']
        self.q_table[time, cap][action] = self.q_table[time, cap][action] + self.alpha* (goal - self.q_table[time, cap][action])
        #print(f" New q-value for state: {state} action: {action} and goal: {goal} : {self.q_table[time, cap][action]}")

        

In [9]:
env = SmartCharging(4,cap_max, penalty, loading_rate)
agent = MC_agent(0.1, 0.01, 4, env)

In [11]:
for i in range(50000):
    state = env.reset(cap)
    episode = []
    rate = 0
    done = False
    # create episode
    while not done:
        # pick action
        action = agent.pick_action(state)
        #print(f"Currently in state: {state}")
        #print(f"Agent took action: {action}")
        # receive reward
        reward, done = env.step(action)
        #rint(f"And received reward: {reward}")
        # add the reward, state and action to the episode
        if reward < -1000:
            rate +=1
            print(f"emptied batterie rate: {rate/i}")
        episode.append({
            "reward": reward,
            "state": dict(state),
            "action": action
        })
        # update state
        state = env.state

    goal = 0
    # go through the episode from the end
    for observation in reversed(range(len(episode))):
        goal += episode[observation]["reward"]
        agent.update_q_values(
            state=episode[observation]["state"], 
            action=episode[observation]["action"], 
            goal = goal
        )
        
        

emptied batterie rate: 0.1111111111111111
emptied batterie rate: 0.047619047619047616
emptied batterie rate: 0.03571428571428571
emptied batterie rate: 0.03125
emptied batterie rate: 0.024390243902439025
emptied batterie rate: 0.023809523809523808
emptied batterie rate: 0.02127659574468085
emptied batterie rate: 0.020833333333333332
emptied batterie rate: 0.018867924528301886
emptied batterie rate: 0.01694915254237288
emptied batterie rate: 0.01639344262295082
emptied batterie rate: 0.014925373134328358
emptied batterie rate: 0.0136986301369863
emptied batterie rate: 0.012658227848101266
emptied batterie rate: 0.011764705882352941
emptied batterie rate: 0.010752688172043012
emptied batterie rate: 0.01
emptied batterie rate: 0.008771929824561403
emptied batterie rate: 0.007246376811594203
emptied batterie rate: 0.007042253521126761
emptied batterie rate: 0.006711409395973154
emptied batterie rate: 0.006289308176100629
emptied batterie rate: 0.005780346820809248
emptied batterie rate: 0.