In [7]:
import numpy as np

class Environment(object):
    """
    Server Temperature Control Environment

    This environment simulates a server cooling system where:
    - AI can take actions to cool down or heat up the server
    - The goal is to maintain optimal temperature while minimizing energy consumption
    - External factors like atmospheric temperature, number of users, and data rate affect the server temperature
    """

    def __init__(self, optimal_temperature=[18.0, 24.0], initial_month=0,
                 initial_number_users=10, initial_rate_data=60):
        """
        Initialize the environment

        Parameters:
        - optimal_temperature: [min_optimal, max_optimal] temperature range in Celsius
        - initial_month: starting month (0-11, where 0=January)
        - initial_number_users: initial number of users on the server
        - initial_rate_data: initial data processing rate
        """

        # === TEMPORAL PARAMETERS ===
        self.initial_month = initial_month

        # Monthly atmospheric temperatures (January to December)
        self.monthly_atmospheric_temperatures = [1.0, 5.0, 7.0, 10.0, 11.0, 20.0,
                                                23.0, 24.0, 22.0, 10.0, 5.0, 1.0]
        self.atmospheric_temperature = self.monthly_atmospheric_temperatures[initial_month]

        # === TEMPERATURE PARAMETERS ===
        self.optimal_temperature = optimal_temperature  # Target temperature range
        self.min_temperature = -20  # Absolute minimum (causes game over if reached)
        self.max_temperature = 80   # Absolute maximum (causes game over if reached)

        # === USER PARAMETERS ===
        self.min_number_users = 10
        self.max_number_users = 100
        self.max_update_users = 5   # Maximum change in users per step
        self.initial_number_users = initial_number_users
        self.current_number_users = initial_number_users

        # === DATA RATE PARAMETERS ===
        self.min_rate_data = 20
        self.max_rate_data = 300
        self.max_update_data = 10   # Maximum change in data rate per step
        self.initial_rate_data = initial_rate_data
        self.current_rate_data = initial_rate_data

        # === TEMPERATURE CALCULATIONS ===
        # Intrinsic temperature = atmospheric + heat from users + heat from data processing
        self.intrinsic_temperature = (self.atmospheric_temperature +
                                    1.25 * self.current_number_users +
                                    1.25 * self.current_rate_data)

        # Server temperature with AI control
        self.temperature_ai = self.intrinsic_temperature

        # Server temperature without AI (baseline - starts at middle of optimal range)
        self.temperature_noai = (self.optimal_temperature[0] + self.optimal_temperature[1]) / 2.0

        # === ENERGY TRACKING ===
        self.total_energy_ai = 0.0      # Total energy consumed by AI system
        self.total_energy_noai = 0.0    # Total energy consumed by non-AI system

        # === GAME STATE ===
        self.reward = 0.0
        self.game_over = 0
        self.train = 1  # 1 for training mode, 0 for inference mode

    def update_env(self, direction, energy_ai, month):
        """
        Update the environment after AI takes an action

        Parameters:
        - direction: +1 (heating) or -1 (cooling)
        - energy_ai: amount of energy the AI uses for the action
        - month: current month (0-11)

        Returns:
        - next_state: normalized state vector for neural network
        - reward: reward for the action taken
        - game_over: whether the episode should end
        """

        # === STEP 1: CALCULATE REWARD ===
        # First, simulate what a non-AI system would do
        energy_noai = 0
        if self.temperature_noai < self.optimal_temperature[0]:
            # Too cold - need to heat up
            energy_noai = self.optimal_temperature[0] - self.temperature_noai
            self.temperature_noai = self.optimal_temperature[0]
        elif self.temperature_noai > self.optimal_temperature[1]:
            # Too hot - need to cool down
            energy_noai = self.temperature_noai - self.optimal_temperature[1]
            self.temperature_noai = self.optimal_temperature[1]

        # Reward = energy saved compared to non-AI system
        self.reward = energy_noai - energy_ai
        self.reward = 1e-3 * self.reward  # Scale reward

        # === STEP 2: UPDATE ENVIRONMENTAL FACTORS ===
        # Update atmospheric temperature based on month
        self.atmospheric_temperature = self.monthly_atmospheric_temperatures[month]

        # Randomly update number of users (simulates varying server load)
        self.current_number_users += np.random.randint(-self.max_update_users, self.max_update_users)
        self.current_number_users = np.clip(self.current_number_users,
                                          self.min_number_users, self.max_number_users)

        # Randomly update data processing rate
        self.current_rate_data += np.random.randint(-self.max_update_data, self.max_update_data)
        self.current_rate_data = np.clip(self.current_rate_data,
                                       self.min_rate_data, self.max_rate_data)

        # === STEP 3: UPDATE TEMPERATURES ===
        # Calculate change in intrinsic temperature
        past_intrinsic_temperature = self.intrinsic_temperature
        self.intrinsic_temperature = (self.atmospheric_temperature +
                                    1.25 * self.current_number_users +
                                    1.25 * self.current_rate_data)
        delta_intrinsic_temperature = self.intrinsic_temperature - past_intrinsic_temperature

        # Calculate temperature change from AI action
        if direction == -1:
            delta_temperature_ai = -energy_ai  # Cooling
        elif direction == 1:
            delta_temperature_ai = energy_ai   # Heating

        # Update AI-controlled temperature
        self.temperature_ai += delta_intrinsic_temperature + delta_temperature_ai

        # Update non-AI temperature (only affected by environmental changes)
        self.temperature_noai += delta_intrinsic_temperature

        # === STEP 4: CHECK GAME OVER CONDITIONS ===
        if self.temperature_ai < self.min_temperature:
            if self.train == 1:
                self.game_over = 1  # End episode during training
            else:
                # During inference, apply corrective energy cost
                self.total_energy_ai += self.optimal_temperature[0] - self.temperature_ai
                self.temperature_ai = self.optimal_temperature[0]
        elif self.temperature_ai > self.max_temperature:
            if self.train == 1:
                self.game_over = 1  # End episode during training
            else:
                # During inference, apply corrective energy cost
                self.total_energy_ai += self.temperature_ai - self.optimal_temperature[1]
                self.temperature_ai = self.optimal_temperature[1]

        # === STEP 5: UPDATE ENERGY TOTALS ===
        self.total_energy_ai += energy_ai
        self.total_energy_noai += energy_noai

        # === STEP 6: CREATE NORMALIZED STATE VECTOR ===
        # Normalize all values to [0, 1] range for neural network input
        scaled_temperature_ai = ((self.temperature_ai - self.min_temperature) /
                               (self.max_temperature - self.min_temperature))
        scaled_number_users = ((self.current_number_users - self.min_number_users) /
                             (self.max_number_users - self.min_number_users))
        scaled_rate_data = ((self.current_rate_data - self.min_rate_data) /
                          (self.max_rate_data - self.min_rate_data))

        next_state = np.matrix([scaled_temperature_ai, scaled_number_users, scaled_rate_data])

        return next_state, self.reward, self.game_over

    def reset(self, new_month):
        """
        Reset the environment to initial state for a new episode

        Parameters:
        - new_month: starting month for the new episode (0-11)
        """
        self.atmospheric_temperature = self.monthly_atmospheric_temperatures[new_month]
        self.initial_month = new_month
        self.current_number_users = self.initial_number_users
        self.current_rate_data = self.initial_rate_data

        # Recalculate intrinsic temperature
        self.intrinsic_temperature = (self.atmospheric_temperature +
                                    1.25 * self.current_number_users +
                                    1.25 * self.current_rate_data)

        # Reset temperatures
        self.temperature_ai = self.intrinsic_temperature
        self.temperature_noai = (self.optimal_temperature[0] + self.optimal_temperature[1]) / 2.0

        # Reset tracking variables
        self.total_energy_ai = 0.0
        self.total_energy_noai = 0.0
        self.reward = 0.0
        self.game_over = 0
        self.train = 1

    def observe(self):
        """
        Get the current state of the environment

        Returns:
        - current_state: normalized state vector [temperature, users, data_rate]
        - reward: last reward received
        - game_over: whether the episode has ended
        """
        # Normalize current state
        scaled_temperature_ai = ((self.temperature_ai - self.min_temperature) /
                               (self.max_temperature - self.min_temperature))
        scaled_number_users = ((self.current_number_users - self.min_number_users) /
                             (self.max_number_users - self.min_number_users))
        scaled_rate_data = ((self.current_rate_data - self.min_rate_data) /
                          (self.max_rate_data - self.min_rate_data))

        current_state = np.matrix([scaled_temperature_ai, scaled_number_users, scaled_rate_data])

        return current_state, self.reward, self.game_over


In [8]:

if __name__ == "__main__":
    print("=== SERVER COOLING ENVIRONMENT DEMONSTRATION ===\n")

    # Create environment starting in January (month 0)
    env = Environment(optimal_temperature=[18.0, 24.0], initial_month=0,
                     initial_number_users=20, initial_rate_data=80)

    print("1. INITIAL STATE:")
    print(f"   Atmospheric temperature: {env.atmospheric_temperature}°C")
    print(f"   Number of users: {env.current_number_users}")
    print(f"   Data rate: {env.current_rate_data}")
    print(f"   Intrinsic temperature: {env.intrinsic_temperature:.2f}°C")
    print(f"   AI temperature: {env.temperature_ai:.2f}°C")
    print(f"   No-AI temperature: {env.temperature_noai:.2f}°C")
    print(f"   Optimal range: {env.optimal_temperature[0]}-{env.optimal_temperature[1]}°C")

    # Get initial observation
    state, reward, game_over = env.observe()
    print(f"\n2. INITIAL OBSERVATION:")
    print(f"   Normalized state: {state}")
    print(f"   Reward: {reward}")
    print(f"   Game over: {game_over}")

    print(f"\n3. TAKING ACTIONS:")

    # Example 1: AI tries to cool down the server
    print(f"\n   ACTION 1: Cool down (direction=-1, energy=2.0)")
    print(f"   Before action - AI temp: {env.temperature_ai:.2f}°C")

    next_state, reward, game_over = env.update_env(direction=-1, energy_ai=2.0, month=0)

    print(f"   After action - AI temp: {env.temperature_ai:.2f}°C")
    print(f"   Reward: {reward:.6f}")
    print(f"   New state: {next_state}")
    print(f"   Game over: {game_over}")

    # Example 2: AI tries to heat up the server
    print(f"\n   ACTION 2: Heat up (direction=+1, energy=1.5)")
    print(f"   Before action - AI temp: {env.temperature_ai:.2f}°C")

    next_state, reward, game_over = env.update_env(direction=1, energy_ai=1.5, month=1)

    print(f"   After action - AI temp: {env.temperature_ai:.2f}°C")
    print(f"   Reward: {reward:.6f}")
    print(f"   New state: {next_state}")
    print(f"   Game over: {game_over}")

    # Show how environmental factors changed
    print(f"\n4. ENVIRONMENTAL CHANGES:")
    print(f"   Atmospheric temp: {env.atmospheric_temperature}°C (changed to February)")
    print(f"   Number of users: {env.current_number_users} (randomly updated)")
    print(f"   Data rate: {env.current_rate_data} (randomly updated)")
    print(f"   Total energy - AI: {env.total_energy_ai:.2f}")
    print(f"   Total energy - No AI: {env.total_energy_noai:.2f}")

    # Reset environment
    print(f"\n5. RESETTING ENVIRONMENT:")
    env.reset(new_month=6)  # Reset to July
    state, reward, game_over = env.observe()
    print(f"   Reset to July - Atmospheric temp: {env.atmospheric_temperature}°C")
    print(f"   AI temperature: {env.temperature_ai:.2f}°C")
    print(f"   Reset state: {state}")

    print(f"\n=== END OF DEMONSTRATION ===")

=== SERVER COOLING ENVIRONMENT DEMONSTRATION ===

1. INITIAL STATE:
   Atmospheric temperature: 1.0°C
   Number of users: 20
   Data rate: 80
   Intrinsic temperature: 126.00°C
   AI temperature: 126.00°C
   No-AI temperature: 21.00°C
   Optimal range: 18.0-24.0°C

2. INITIAL OBSERVATION:
   Normalized state: [[1.46       0.11111111 0.21428571]]
   Reward: 0.0
   Game over: 0

3. TAKING ACTIONS:

   ACTION 1: Cool down (direction=-1, energy=2.0)
   Before action - AI temp: 126.00°C
   After action - AI temp: 124.00°C
   Reward: -0.002000
   New state: [[1.44       0.12222222 0.21071429]]
   Game over: 1

   ACTION 2: Heat up (direction=+1, energy=1.5)
   Before action - AI temp: 124.00°C
   After action - AI temp: 123.25°C
   Reward: -0.001500
   New state: [[1.4325     0.16666667 0.17857143]]
   Game over: 1

4. ENVIRONMENTAL CHANGES:
   Atmospheric temp: 5.0°C (changed to February)
   Number of users: 25 (randomly updated)
   Data rate: 70 (randomly updated)
   Total energy - AI: 3.5