<a href="https://colab.research.google.com/github/scoobiii/DeepEnergy/blob/master/Inmemory_optimize_energy_consumption_using_deep_q_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

deep_memory_


In [6]:
# Criar o script de inicialização
script = """
#!/bin/bash
# Cria o diretório /mnt/ramdisk
mkdir -p /mnt/ramdisk

# Monta o ramdisk de 10 GB utilizando tmpfs
mount -t tmpfs -o size=10G tmpfs /mnt/ramdisk

# Verifica se o ramdisk foi montado corretamente
df -h /mnt/ramdisk
"""

# Escrever o script no arquivo /root/mount_ramdisk.sh
with open('/root/mount_ramdisk.sh', 'w') as f:
    f.write(script)

# Torna o script executável
!chmod +x /root/mount_ramdisk.sh


In [1]:
# Artificial Intelligence for Business
import numpy as np
import os
import random
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.models import load_model
from tqdm import tqdm

# Building the environment
- Use a class to create different Environment objects

In [2]:
class Environment(object):
    # introduce and initialize all paramaters and variables of the environment
    def __init__(self, optimal_temperature = [18.0, 24.0], initial_month= 0, \
                 initial_number_users = 10, initial_rate_data = 60):

        self.initial_month = initial_month

        self.monthly_atmospheric_temperatures = [1.0, 5.0, 7.0, 10.0, 11.0, 20.0,
                                                 23.0, 24.0, 22.0, 10.0, 5.0, 1.0]
        self.atmospheric_temperature = self.monthly_atmospheric_temperatures[initial_month]
        self.optimal_temperature = optimal_temperature
        self.min_temperature = -20
        self.max_temperature = 80

        self.min_number_users = 10
        self.max_number_users = 100
        self.max_update_users = 5
        self.initial_number_users = initial_number_users
        self.current_number_users = initial_number_users

        self.min_rate_data = 20
        self.max_rate_data = 300
        self.max_update_data = 10
        self.initial_rate_data = initial_rate_data
        self.current_rate_data = initial_rate_data

        self.intrinsic_temperature = self.atmospheric_temperature + 1.25 * self.current_number_users \
                                    + 1.25 * self.current_rate_data
        self.temperature_ai = self.intrinsic_temperature
        self.temperature_noai = (self.optimal_temperature[0] + self.optimal_temperature[1]) / 2.0 # mid of optimal range

        self.total_energy_ai = 0.0
        self.total_energy_noai = 0.0

        self.reward = 0.0
        self.game_over = 0
        self.train = 1        # train or inference mode

    # method to update environment after AI plays an action
    def update_env(self, direction, energy_ai, month):
        """ variables:
         - direction :  change of temperature by AI incr or decr +1 or -1 """

        # GETTING THE REWARD
        # Computing the energy spent by the server's cooling system when there is no AI
        energy_noai = 0
        if (self.temperature_noai < self.optimal_temperature[0]):
            energy_noai = self.optimal_temperature[0] - self.temperature_noai
            self.temperature_noai = self.optimal_temperature[0]
        elif (self.temperature_noai > self.optimal_temperature[1]):
            energy_noai = self.temperature_noai - self.optimal_temperature[1]
            self.temperature_noai = self.optimal_temperature[1]
        # Computing the Reward and Scaling the Reward
        self.reward = energy_noai - energy_ai
        self.reward = 1e-3 * self.reward

        # GETTING NEXT STATE
        # Updating the atmospheric temperature
        self.atmospheric_temperature = self.monthly_atmospheric_temperatures[month]
        # Updating the number of users between the min / max range
        self.current_number_users += np.random.randint(-self.max_update_users, self.max_update_users)
        if (self.current_number_users > self.max_number_users):
            self.current_number_users = self.max_number_users
        elif (self.current_number_users < self.min_number_users):
            self.current_number_users = self.min_number_users
        # Updating the rate of data between the min / max range
        self.current_rate_data += np.random.randint(-self.max_update_data, self.max_update_data)
        if (self.current_rate_data > self.max_rate_data):
            self.current_rate_data = self.max_rate_data
        elif (self.current_rate_data < self.min_rate_data):
            self.current_rate_data = self.min_rate_data
        # Computing the Delta of Intrinsic Temperature
        past_intrinsic_temperature = self.intrinsic_temperature       # T° of server before action
        self.intrinsic_temperature = self.atmospheric_temperature + 1.25 * self.current_number_users \
                                     + 1.25 * self.current_rate_data  # T° of server updated
        delta_intrinsic_temperature = self.intrinsic_temperature - past_intrinsic_temperature
        # Computing the Delta of Temperature caused by the AI action
        if (direction == -1):
            delta_temperature_ai = -energy_ai  # energy cost = abs delta of T° change by assumption
        elif (direction == 1):
            delta_temperature_ai = energy_ai
        # Updating the new Server's Temperature when there is the AI
        self.temperature_ai += delta_intrinsic_temperature + delta_temperature_ai
        # Updating the new Server's Temperature when there is no AI
        self.temperature_noai += delta_intrinsic_temperature

        # GETTING GAME OVER (allows to end of an epoch if T° out of bound during training)
        if (self.temperature_ai < self.min_temperature):
            if self.train == 1:
                self.game_over = 1
            else:
                self.total_energy_ai += self.optimal_temperature[0] - self.temperature_ai
                self.temperature_ai = self.optimal_temperature[0]
        elif (self.temperature_ai > self.max_temperature):
            if self.train == 1:
                self.game_over = 1
            else:
                self.total_energy_ai += self.temperature_ai - self.optimal_temperature[1]
                self.temperature_ai = self.optimal_temperature[1]

        # UPDATING THE SCORES
        self.total_energy_ai += energy_ai
        self.total_energy_noai += energy_noai

        # NORMALIZE NEXT STATE (state vector to be fed to neural network)
        scaled_temperature_ai = (self.temperature_ai - self.min_temperature) / \
                                (self.max_temperature - self.min_temperature)
        scaled_number_users = (self.current_number_users - self.min_number_users) / \
                              (self.max_number_users - self.min_number_users)
        scaled_rate_data = (self.current_rate_data - self.min_rate_data) / \
                           (self.max_rate_data - self.min_rate_data)
        # create vector for updated state
        next_state = np.matrix([scaled_temperature_ai, scaled_number_users, scaled_rate_data])

        return next_state, self.reward, self.game_over

    # METHOD THAT RESETS THE ENVIRONMENT
    def reset(self, new_month):
        self.atmospheric_temperature = self.monthly_atmospheric_temperatures[new_month]
        self.initial_month = new_month
        self.current_number_users = self.initial_number_users
        self.current_rate_data = self.initial_rate_data
        self.intrinsic_temperature = self.atmospheric_temperature + 1.25 * self.current_number_users \
                                     + 1.25 * self.current_rate_data
        self.temperature_ai = self.intrinsic_temperature
        self.temperature_noai = (self.optimal_temperature[0] + self.optimal_temperature[1]) / 2.0
        self.total_energy_ai = 0.0
        self.total_energy_noai = 0.0
        self.reward = 0.0
        self.game_over = 0
        self.train = 1

    # METHOD PROVIDING CURRENT STATE, LAST REWARD AND WHETHER THE GAME IS OVER
    def observe(self):
        scaled_temperature_ai = (self.temperature_ai - self.min_temperature) / \
                                (self.max_temperature - self.min_temperature)
        scaled_number_users = (self.current_number_users - self.min_number_users) / \
                              (self.max_number_users - self.min_number_users)
        scaled_rate_data = (self.current_rate_data - self.min_rate_data) / \
                           (self.max_rate_data - self.min_rate_data)
        # calc vector of current state
        current_state = np.matrix([scaled_temperature_ai, scaled_number_users, scaled_rate_data])

        return current_state, self.reward, self.game_over

# Building the Neural network
- Fully connected NN with 2 hidden layers (64 then 32 nodes)
- Input : state vector (server T°, number of users, rate of data)
- Output : Q-values of AI actions to regulate T° ( reduce by 3°C or 1.5°, maintain T°, Incr. by 1.5° or 3°C)

In [3]:
class Brain(object):
    def __init__(self, learning_rate = 0.001, number_actions = 5):
        self.learning_rate = learning_rate
        self.number_actions = number_actions
        states = Input(shape = (3,))
        x = Dense(units = 64, activation = 'sigmoid')(states)
        #x = Dropout(rate = 0.1)(x)
        x = Dense(units = 32, activation = 'sigmoid')(x)
        #x = Dropout(rate = 0.1)(x)
        q_values = Dense(units = self.number_actions, activation = 'softmax')(x)

        self.model = Model(inputs = states, outputs = q_values)
        self.model.compile(loss='mse', optimizer = Adam(lr=self.learning_rate))


# Implement Deep Q-Learning with Experience Replay

In [4]:
class DQN(object):

    # INITIALIZE ALL THE PARAMETERS AND VARIABLES OF THE DQN
    def __init__(self, max_memory = 100, discount = 0.9):
        self.memory = list()
        self.max_memory = max_memory
        self.discount = discount     # discount factor used in calculating the targets Q

    # METHOD THAT BUILDS THE MEMORY IN EXPERIENCE REPLAY
    def remember(self, transition, game_over):
        """arguments:
        transition: tuple of 4 elemnts (current state, action played, reward received, next state)
        game_over : 0 or 1"""
        self.memory.append([transition, game_over])
        if len(self.memory) > self.max_memory:
            del self.memory[0]                   # delete first memory element (FIFO)

    # CONSTRUCT BATCHES OF INPUTS AND TARGETS BY EXTRACTING TRANSITIONS FROM THE MEMORY
    def get_batch(self, model, batch_size = 10):
        len_memory = len(self.memory)
        num_inputs = self.memory[0][0][0].shape[1]  # select first elmnt of transition tuple, ie shape of state vector
        num_outputs = model.output_shape[-1]

        # initialize the batches
        inputs = np.zeros((min(len_memory, batch_size), num_inputs))   # typically batch_size x 3
        targets = np.zeros((min(len_memory, batch_size), num_outputs)) # typically batch_size x 5

        # extract random transitions from memory and populate input states and outputs Q-values
        for i, idx in enumerate(np.random.randint(0, len_memory, size = min(len_memory, batch_size))):
            current_state, action, reward, next_state = self.memory[idx][0]
            game_over = self.memory[idx][1]
            inputs[i] = current_state
            targets[i] = model.predict(current_state)[0]  # predict returns 2 elements, Q-values is first
            Q_sa = np.max(targets[i])
            if game_over:
                targets[i, action] = reward
            else:
                targets[i, action] = reward + self.discount * Q_sa

        return inputs, targets

# Training Phase

In [5]:
# Configurando sementes para reprodutibilidade
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(42)
random.seed(12345)

# CONFIGURAÇÃO DOS PARÂMETROS
epsilon = 0.3   # Taxa de exploração vs. exploração. Aqui, 30% de exploração (seleção aleatória)
numero_acoes = 5
limite_direcao = (numero_acoes - 1) / 2   # Limite separando direção das ações de mudança de temperatura
numero_epocas = 100
memoria_maxima = 3000
tamanho_batch = 512
passo_temperatura = 1.5

# CONSTRUINDO O AMBIENTE CRIANDO UM OBJETO DA CLASSE ENVIRONMENT
env = Environment(temperatura_otima=(18.0, 24.0), mes_inicial=0,
                  numero_inicial_usuarios=20, taxa_inicial_dados=30)

# CONSTRUINDO O OBJETO DA REDE NEURAL USANDO A CLASSE BRAIN
cerebro = Brain(taxa_aprendizado=0.00001, numero_acoes=numero_acoes)

# CONSTRUINDO O MODELO DQN
dqn = DQN(memoria_maxima=memoria_maxima, desconto=0.9)

# ESCOLHENDO O MODO
treinar = True

# TREINANDO A IA
env.treinar = treinar
modelo = cerebro.modelo
parada_precoce = True
paciencia = 10
melhor_recompensa_total = -np.inf
contagem_paciencia = 0

if env.treinar:
    # INICIANDO O LOOP DE TODAS AS ÉPOCAS (1 Época = 5 Meses)
    for epoca in range(1, numero_epocas):
        # INICIALIZANDO TODAS AS VARIÁVEIS DO AMBIENTE E DO LOOP DE TREINAMENTO
        recompensa_total = 0
        perda = 0.0
        novo_mes = np.random.randint(0, 12)
        env.reset(novo_mes=novo_mes)
        jogo_terminado = False
        estado_atual, _, _ = env.observar()
        passo_tempo = 0

        # INICIANDO O LOOP DE TODOS OS TIMESTEPS (1 Timestep = 1 Minuto) EM UMA ÉPOCA
        while not jogo_terminado and passo_tempo <= 5 * 30 * 24 * 60:
            # EXECUTANDO A PRÓXIMA AÇÃO POR EXPLORAÇÃO
            if np.random.rand() <= epsilon:   # escolha aleatória dentro de [0,1] abaixo do limite epsilon
                acao = np.random.randint(0, numero_acoes)  # ação entre 0 a 4, limite = 2
                direcao = -1 if (acao - limite_direcao < 0) else 1
                energia_ia = abs(acao - limite_direcao) * passo_temperatura

            # EXECUTANDO A PRÓXIMA AÇÃO POR INFERÊNCIA
            else:
                valores_q = modelo.predict(estado_atual)
                acao = np.argmax(valores_q[0])
                direcao = -1 if (acao - limite_direcao < 0) else 1
                energia_ia = abs(acao - limite_direcao) * passo_temperatura

            # ATUALIZANDO O AMBIENTE E ALCANÇANDO O PRÓXIMO ESTADO
            proximo_estado, recompensa, jogo_terminado = env.atualizar_env(
                direcao, energia_ia, int(passo_tempo / (30 * 24 * 60))
            )
            recompensa_total += recompensa

            # ARMAZENANDO ESSA NOVA TRANSIÇÃO NA MEMÓRIA
            dqn.lembrar([estado_atual, acao, recompensa, proximo_estado], jogo_terminado)

            # COLETANDO EM DOIS LOTES SEPARADOS OS INPUTS E TARGETS
            entradas, alvos = dqn.obter_lote(modelo, tamanho_batch=tamanho_batch)

            # COMPUTANDO A PERDA SOBRE OS DOIS LOTES INTEIROS DE INPUTS E TARGETS
            perda += modelo.train_on_batch(entradas, alvos)
            passo_tempo += 1
            estado_atual = proximo_estado  # atualizar o estado atual

        # IMPRIMINDO OS RESULTADOS DO TREINAMENTO PARA CADA ÉPOCA
        print(f"Época: {epoca:03d}/{numero_epocas:03d}")
        print(f"Total de Energia gasta com IA: {env.energia_total_ia:.0f}")
        print(f"Total de Energia gasta sem IA: {env.energia_total_sem_ia:.0f}")

        # PARADA PRECOCE
        if parada_precoce:
            if recompensa_total <= melhor_recompensa_total:
                contagem_paciencia += 1
            elif recompensa_total > melhor_recompensa_total:
                melhor_recompensa_total = recompensa_total
                contagem_paciencia = 0
            if contagem_paciencia >= paciencia:
                print("Parada Precoce")
                break

        # SALVANDO O MODELO
        modelo.save("modelo.h5")


TypeError: Environment.__init__() got an unexpected keyword argument 'temperatura_otima'

# Evaluating Energy management model performance

In [None]:
print('Evaluating one year of energy management...')

# BUILDING THE ENVIRONMENT BY CREATING AN OBJECT OF THE ENVIRONMENT CLASS
env = Environment(optimal_temperature = (18.0, 24.0), initial_month = 0, \
                  initial_number_users = 20, initial_rate_data = 30)

# LOAD PRE-TRAINED MODEL
model = load_model("model.h5")

# CHOOSING THE MODE
train = False

# RUNNING 1 YEAR SIMULATION INFERENCE MODE
env.train = train
current_state, _, _ = env.observe()

# STARTING THE LOOP OVER 1 YEAR
for timestep in tqdm(range(12 * 30 * 24 * 60)):
    q_values = model.predict(current_state)
    action = np.argmax(q_values[0])
    if (action - direction_boundary < 0):
        direction = -1
    else:
        direction = 1
    energy_ai = abs(action - direction_boundary) * temperature_step
    # UPDATING ENVIRONMENT AND REACHING THE NEXT STATE
    next_state, _, _ = env.update_env(direction, energy_ai, \
                                                int(timestep / (30 * 24 * 60)))  # month [0,11]
    current_state = next_state    # update the current state

# PRINTING THE RESULTS FOR 1 YEAR
print("Total Energy spent with an AI: {:.0f}".format(env.total_energy_ai))
print("Total Energy spent with no AI: {:.0f}".format(env.total_energy_noai))
print("ENERGY SAVED WITH AI: {:.0f}%".format((env.total_energy_noai - env.total_energy_ai)/env.total_energy_noai*100))