In [20]:
import numpy as np 
import os
import random
import tensorflow
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.models import load_model
from tqdm import tqdm

# Building the Environment

In [21]:
class Environment(object):
    def __init__(self, optimal_temperature = [18.0,24.0], initial_month = 0, 
              initial_number_users = 10, initial_rate_data = 60):
        
        self.initial_month = initial_month
        self.monthly_atmospheric_temperatures = [1.0, 5.0, 7.0, 10.0, 11.0, 20.0, 23.0, 24.0, 22.0, 10.0, 5.0, 1.0]
        self.atmospheric_temperature = self.monthly_atmospheric_temperatures[initial_month]
        self.optimal_temperature = optimal_temperature
        self.min_temperature = -20
        self.max_temperature = 80
        
        self.min_number_users = 10
        self.max_number_users = 100
        self.max_update_users = 5
        self.initial_number_users = initial_number_users
        self.current_number_users = initial_number_users
        
        self.min_rate_data = 20
        self.max_rate_data = 300
        self.max_update_data = 10
        self.initial_rate_data = initial_rate_data
        self.current_rate_data = initial_rate_data
        
        self.intrinisic_temperature = self.atmospheric_temperature + 1.25 * self.current_number_users + 1.25 * self.current_rate_data
        self.temperature_ai = self.intrinisic_temperature
        self.temperature_no_ai = (self.optimal_temperature[0] + self.optimal_temperature[1]) / 2.0  # mid of optimal range
        
        self.total_energy_ai = 0.0
        self. total_energy_no_ai = 0.0
        
        self.reward = 0.0
        self.train_over = 0
        self.train = 1 # train or inference mode
        
    # Update environment after AI plays an action
    def update_env(self, direction, energy_ai, month):
        # direction = change of temperature by AI incr or decr +1 or -1
        
        # GETTING THE REWARD
        # Computing the energy spent by the server's cooling system when there is no AI
        energy_no_ai = 0
        if(self.temperature_no_ai < self.optimal_temperature[0]):
            energy_no_ai = self.optimal_temperature[0] - self.temperature_no_ai
            self.temperature_no_ai = self.optimal_temperature[0]
        elif (self.temperature_no_ai > self.optimal_temperature[1]):
            energy_no_ai = self.temperature_no_ai - self.optimal_temperature[1]
            self.temperature_no_ai = self.optimal_temperature[1]
        # Computing the reward and scaling the reward
        self.reward = energy_no_ai - energy_ai
        self.reward = 1e-3 * self.reward
        
        # GETTING NEXT STATE
        # Updating the atmospheric temperature
        self.atmospheric_temperature = self.monthly_atmospheric_temperatures[month]
        # Updating the number of users between the min / max range
        self.current_number_users += np.random.randint(-self.max_update_users, self.max_update_users)
        if(self.current_number_users > self.max_number_users):
            self.current_number_users = self.max_number_users
        elif(self.current_number_users < self.min_number_users):
            self.current_number_users = self.min_number_users
        # Updating the rate of data between the min/max range
        self.current_rate_data += np.random.randint(-self.max_update_data, self.max_update_data)
        if(self.current_rate_data > self.max_rate_data):
            self.current_rate_data = self.max_rate_data
        elif(self.current_rate_data < self.min_rate_data):
            self.current_rate_data = self.min_rate_data
        # Computing the Delta of Intrinsic Temperature
        past_intrinsic_temperature = self.intrinisic_temperature  #T° of server before action
        self.intrinisic_temperature = self.atmospheric_temperature + 1.25 * self.current_number_users \
            + 1.25 * self.current_rate_data #T° of server before action
        delta_intrinsic_temperature = self.intrinisic_temperature - past_intrinsic_temperature
        # Computing the Delta of temperature caused by the AI Action
        if(direction == -1):
            delta_temperature_ai = -energy_ai  # energy cost = abs delta of T° change by assumption
        elif(direction == 1):
            delta_temperature_ai = energy_ai
        # Updating the new Server's Temperature when there is the AI
        self.temperature_ai += delta_intrinsic_temperature + delta_temperature_ai
        # Updating the new Server's Temperature when tehre is no AI
        self.temperature_no_ai += delta_intrinsic_temperature
        
        # GETTING TRAIN OVER (allows to end of an epoch if T° out of bound during training)
        if(self.temperature_ai < self.min_temperature):
            if self.train == 1:
                self.train_over = 1
            else : 
                self.total_energy_ai += self.optimal_temperature[0] - self.temperature_ai
                self.temperature_ai = self.optimal_temperature[0]
        elif (self.temperature_ai > self.max_temperature):
            if self.train == 1:
                self.train_over = 1
            else : 
                self.total_energy_ai += self.temperature_ai - self.optimal_temperature[1]
                self.temperature_ai = self.optimal_temperature[1]
        
        # UPDATING THE SCORES
        self.total_energy_ai += energy_ai
        self.total_energy_no_ai += energy_no_ai
        
        # NORMALIZE NEXT STATE (state vector to be fed to neural network)
        scaled_temperature_ai = (self.temperature_ai - self.min_temperature) / \
            (self.max_temperature - self.min_temperature)
        scaled_number_users = (self.current_number_users - self.min_number_users) / \
            (self.max_number_users - self.min_number_users)
        scaled_rate_data = (self.current_rate_data - self.min_rate_data) / \
            (self.max_rate_data - self.min_rate_data)    
        # Create vector for updated state
        next_state = np.matrix([scaled_temperature_ai, scaled_number_users, scaled_rate_data])
        return next_state, self.reward, self.train_over
    
    # METHOD THAT RESETS THE ENVIRONMENT
    def reset(self, new_month):
        self.atmospheric_temperature = self.monthly_atmospheric_temperatures[new_month]
        self.initial_month = new_month
        self.current_number_users = self.initial_number_users
        self.current_rate_data = self.initial_rate_data
        self.intrinisic_temperature = self.atmospheric_temperature + 1.25 * self.current_number_users \
            + 1.25 * self.current_rate_data
        self.temperature_ai = self.intrinisic_temperature
        self.temperature_no_ai = (self.optimal_temperature[0] + self.optimal_temperature[1]) / 2.0
        self.total_energy_ai = 0.0
        self.total_energy_no_ai = 0.0
        self.reward = 0.0
        self.train_over = 0
        self.train = 1
        
    # METHOD PROVIDING CURRENT STATE, LAST REWARD AND WHETHER THE TRAIN IS OVER
    def observe(self):
        scaled_temperature_ai = (self.temperature_ai - self.min_temperature) / \
            (self.max_temperature - self.min_temperature)
        scaled_number_users = (self.current_number_users - self.min_number_users) / \
            (self.max_number_users - self.min_number_users)
        scaled_rate_data = (self.current_rate_data - self.min_rate_data) / \
            (self.max_rate_data - self.min_rate_data)
        # calc vector of current state
        current_state = np.matrix([scaled_temperature_ai, scaled_number_users, scaled_rate_data])
        return current_state, self.reward, self.train_over

# Building Neural Network

In [22]:
class Neural_Network(object):
    def __init__(self, learning_rate = 0.001, number_actions = 5):
        self.learning_rate = learning_rate
        self.number_actions = number_actions
        states = Input(shape = (3,))
        x = Dense(units = 64, activation = 'sigmoid') (states)
        # x = Dropout(rate = 0.1)(x)
        x = Dense(units = 32, activation = 'sigmoid') (x)
        # x = Dropout(rate = 0.1)(x)
        q_values = Dense(units = self.number_actions, activation = 'softmax') (x)
        
        self.model = Model(inputs = states, outputs = q_values)
        self.model.compile(loss = 'mse', optimizer = Adam(learning_rate = self.learning_rate))

# Implement Deep Q-Learning with Experience Replay

In [23]:
class DQN(object):
    
    # INITIALIZE ALL THE PARAMETERS AND VARIABLES OF THE DQN
    def __init__(self, max_memory = 100, discount = 0.9):
        self.memory = list()
        self.max_memory = max_memory
        self.discount = discount     # discount factor used in calculating the targets Q
        
    # METHOD THAT BUILDS MEMORY IN EXPERIENCE REPLAY
    def remember(self, transition, train_over):
        """arguments:
        transition: tuple of 4 elemnts (current state, action played, reward received, next state)
        train_over : 0 or 1"""
        self.memory.append([transition, train_over])
        if len(self.memory) > self.max_memory:
            del self.memory[0]                   # delete first memory element (FIFO)
            
    # CONSTRUCT BATCHES OF INPUTS AND TARGETS BY EXTRACTING TRANSITIONS FROM THE MEMORY
    def get_batch(self, model, batch_size = 10):
        len_memory = len(self.memory)
        num_inputs = self.memory[0][0][0].shape[1]  # select first elmnt of transition tuple, ie shape of state vector
        num_outputs = model.output_shape[-1]
        
        # initialize the batches
        inputs = np.zeros((min(len_memory, batch_size), num_inputs))   # typically batch_size x 3
        targets = np.zeros((min(len_memory, batch_size), num_outputs)) # typically batch_size x 5
        
        # extract random transitions from memory and populate input states and outputs Q-values
        for i, idx in enumerate(np.random.randint(0, len_memory, size = min(len_memory, batch_size))):
            current_state, action, reward, next_state = self.memory[idx][0]
            train_over = self.memory[idx][1]
            inputs[i] = current_state
            targets[i] = model.predict(current_state)[0]  # predict returns 2 elements, Q-values is first
            Q_sa = np.max(targets[i])
            if train_over:
                targets[i, action] = reward
            else:
                targets[i, action] = reward + self.discount * Q_sa
        
        return inputs, targets


# Training Phase

In [None]:
# Setting Seeds for Reproducibility
os.environ['PYTHONSAM'] = '0'
np.random.seed(42)
random.seed(12345)

# SETTING UP THE PARAMETERS
epsilon = .3    # exploration vs exploitation ratio. Here 30% exploration (random selection)
number_actions = 5
direction_boundary = (number_actions - 1) / 2   # boundary separating direction of T° change actions
number_epochs = 100
max_memory = 3000
batch_size = 512
temperature_step = 1.5

# BUILDING THE ENVIRONMENT BY CREATING AN OBJECT OF THE ENVIRONMENT CLASS
env = Environment(optimal_temperature = (18.0, 24.0), initial_month = 0, \
                  initial_number_users = 20, initial_rate_data = 30)

# BUILDING THE NEURAL NETWORK OBJECT USING NEURAL_NETWORK CLASS
neural_network = Neural_Network(learning_rate = 0.00001, number_actions = number_actions)

# BUILDING THE DQN MODEL
dqn = DQN(max_memory = max_memory, discount = 0.9)

# CHOOSING THE MODE
train = True

# TRAINING THE AI
env.train = train
model = neural_network.model
early_stopping = True
patience = 10
best_total_reward = -np.inf
patience_count = 0

if (env.train):
    
    # STARTING THE LOOP OVER ALL THE EPOCHS (1 Epoch = 5 Months)
    for epoch in range(1, number_epochs):
        
        # INITIALIZING ALL THE VARIABLES OF BOTH THE ENVIRONMENT AND THE TRAINING LOOP
        total_reward = 0
        loss = 0.
        new_month = np.random.randint(0, 12)
        env.reset(new_month = new_month)
        train_over = False
        current_state, _, _ = env.observe()
        timestep = 0
        
        # STARTING THE LOOP OVER ALL THE TIMESTEPS (1 Timestep = 1 Minute) IN ONE EPOCH
        while ((not train_over) and timestep <= 5 * 30 * 24 * 60):
            
            # PLAYING THE NEXT ACTION BY EXPLORATION
            if np.random.rand() <= epsilon:   # random pick bw [0,1] below epsilon threshold ?
                action = np.random.randint(0, number_actions)  # action bw 0 to 4, boundary = 2
                if (action - direction_boundary < 0):
                    direction = -1
                else:
                    direction = 1
                energy_ai = abs(action - direction_boundary) * temperature_step
                
            # PLAYING THE NEXT ACTION BY INFERENCE
            else:
                q_values = model.predict(current_state)
                action = np.argmax(q_values[0])
                if (action - direction_boundary < 0):
                    direction = -1
                else:
                    direction = 1
                energy_ai = abs(action - direction_boundary) * temperature_step
            
            # UPDATING THE ENVIRONMENT AND REACHING THE NEXT STATE
            next_state, reward, train_over = env.update_env(direction, energy_ai, \
                                                           int(timestep / (30 * 24 * 60)))  # month [1,5]
            total_reward += reward
            
            # STORING THIS NEW TRANSITION INTO THE MEMORY
            dqn.remember([current_state, action, reward, next_state], train_over)
            
            # GATHERING IN TWO SEPARATE BATCHES THE INPUTS AND THE TARGETS
            inputs, targets = dqn.get_batch(model, batch_size = batch_size)
            
            # COMPUTING THE LOSS OVER THE TWO WHOLE BATCHES OF INPUTS AND TARGETS
            loss += model.train_on_batch(inputs, targets)  # keras method training a minibatch with gr descent
            timestep += 1
            current_state = next_state                     # update the current state
            
        # PRINTING THE TRAINING RESULTS FOR EACH EPOCH
        print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs))
        print("Total Energy spent with an AI: {:.0f}".format(env.total_energy_ai))
        print("Total Energy spent with no AI: {:.0f}".format(env.total_energy_no_ai))
        
        # EARLY STOPPING
        if (early_stopping):
            if (total_reward <= best_total_reward):
                patience_count += 1
            elif (total_reward > best_total_reward):
                best_total_reward = total_reward
                patience_count = 0
            if (patience_count >= patience):
                print("Early Stopping")
                break
        
        # SAVING THE MODEL
        model.save("model.h5")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18



Epoch: 001/100
Total Energy spent with an AI: 69
Total Energy spent with no AI: 124
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



Epoch: 002/100
Total Energy spent with an AI: 104
Total Energy spent with no AI: 171
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



Epoch: 003/100
Total Energy spent with an AI: 9
Total Energy spent with no AI: 40
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1



Epoch: 004/100
Total Energy spent with an AI: 98
Total Energy spent with no AI: 260
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

Exception ignored in: <function WeakKeyDictionary.__init__.<locals>.remove at 0x000001CAFDDF59D0>
Traceback (most recent call last):
  File "c:\Users\rafiq\anaconda3\envs\TF\lib\weakref.py", line 370, in remove
    def remove(k, selfref=ref(self)):
KeyboardInterrupt: 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29

# Evaluating Energy Management Model Performance

In [None]:
print('Evaluating one year of Energy management...')

# BUILDING THE ENVIRONMENT BY CREATING AN OBJECT OF THE ENVIRONMENT CLASS
env = Environment(optimal_temperature = (18.0, 24.0), initial_month = 0, initial_number_users = 20, initial_rate_data = 30)

# LOAD PRE-TRAINED MODEL
model = load_model("model.h5")

# CHOOSING THE MODE
train = False

# RUNNING 1 YEAR SIMULATION INFERENCE MODE
env.train = train
current_state, _, _ = env.observe()

# STARTING THE LOOP OVER 1 YEAR
for timestep in tqdm(range(12*30*24*60)):
    q_values = model.predict(current_state)
    action = np.argmax(q_values[0])
    if(action - direction_boundary < 0):
        direction = -1
    else:
        direction = 1
    energy_ai = abs(action - direction_boundary) * temperature_step
    # UPDATING ENVIRONMENT AND REACHING THE STATE
    next_state, _, _ = env.update_env(direction, energy_ai, \
        int(timestep / (30*24*60))) # month [0,11]  
    current_state = next_state    # update the current state
    
# PRINTING THE RESULTS FOR 1 YEAR
print("Total Energy spent with an AI: {:.0f}".format(env.total_energy_ai))
print("Total Energy spent with no AI: {:.0f}".format(env.total_energy_no_ai))
print("ENERGY SAVED WITH AI: {:.0f}%".format((env.total_energy_no_ai - env.total_energy_ai)/env.total_energy_no_ai*100))