In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import warnings
from requests import get
from io import StringIO

url = 'https://raw.githubusercontent.com/sedhha/bits-ai-ml-practice-problems/main/2825/assignment_2/dataset/energydata_complete.csv'
response = get(url, verify=False)
# Load and preprocess data (same as before)
data = pd.read_csv(StringIO(response.text))
data['date'] = pd.to_datetime(data['date'])
numeric_columns = data.select_dtypes(include=[np.number]).columns
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].mean())

features = ['T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9',
            'RH_1', 'RH_2', 'RH_3', 'RH_4', 'RH_5', 'RH_6', 'RH_7', 'RH_8', 'RH_9',
            'Visibility', 'Tdewpoint', 'Press_mm_hg', 'Windspeed']
target = 'Appliances'

X = data[features]
y = data[target]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)

# Define Actor and Critic models
class Actor(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_dim)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return torch.softmax(self.fc3(x), dim=-1)

class Critic(nn.Module):
    def __init__(self, state_dim):
        super(Critic, self).__init__()
        self.fc1 = nn.Linear(state_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# Environment simulation
def simulate_environment(state, action, scaler, data, current_index):
    orig_state = scaler.inverse_transform(state.reshape(1, -1)).flatten()
    temp_adjustment = action - 1  # -1 for decrease, 0 for maintain, 1 for increase
    new_temps = orig_state[:9] + temp_adjustment
    next_state = orig_state.copy()
    next_state[:9] = new_temps
    next_state_scaled = scaler.transform(next_state.reshape(1, -1)).flatten()
    energy_before = data.iloc[current_index]['Appliances']
    energy_after = data.iloc[current_index + 1]['Appliances'] if current_index + 1 < len(data) else energy_before
    reward = -np.sum(np.abs(new_temps - 22)) + (energy_before - energy_after)
    return next_state_scaled, reward

# Training function
def train_function(X_train, y_train, state_dim, action_dim, scaler, data, episodes=500):
    warnings.filterwarnings("ignore", category=UserWarning, message="X does not have valid feature names, but StandardScaler was fitted with feature names")
    
    actor = Actor(state_dim, action_dim)
    critic = Critic(state_dim)
    actor_optimizer = optim.Adam(actor.parameters(), lr=0.001)
    critic_optimizer = optim.Adam(critic.parameters(), lr=0.001)
    
    actor_losses = []
    critic_losses = []
    episode_rewards = []
    
    pbar = tqdm(total=episodes, desc="Training Progress")
    
    for episode in range(episodes):
        state = torch.FloatTensor(X_train[0])
        total_reward = 0
        print(f"Starting episode {episode + 1}")
        
        for step in range(len(X_train) - 1):
            if step % 1000 == 0:
                print(f"  Step {step}")
            
            action_probs = actor(state)
            action = torch.multinomial(action_probs, 1).item()
            
            next_state, reward = simulate_environment(state.numpy(), action, scaler, data, step)
            next_state = torch.FloatTensor(next_state)
            total_reward += reward
            
            critic_value = critic(state)
            critic_value_next = critic(next_state)
            
            target_value = reward + 0.99 * critic_value_next
            critic_loss = nn.MSELoss()(critic_value, target_value.detach())
            
            actor_loss = -torch.log(action_probs[action]) * (target_value.item() - critic_value.item())
            
            critic_optimizer.zero_grad()
            critic_loss.backward()
            critic_optimizer.step()
            
            actor_optimizer.zero_grad()
            actor_loss.backward()
            actor_optimizer.step()
            
            state = next_state
            actor_losses.append(actor_loss.item())
            critic_losses.append(critic_loss.item())
        
        episode_rewards.append(total_reward)
        if (episode + 1) % 10 == 0:
            print(f"Episode {episode + 1}/{episodes}, Reward: {total_reward}")
        
        pbar.update(1)
        pbar.set_postfix({"Reward": f"{total_reward:.2f}"})
    
    pbar.close()
    print("Training complete!")
    
    return actor, critic, episode_rewards, actor_losses, critic_losses

# Initiate training
state_dim = 22
action_dim = 3  # Decrease, Maintain, Increase

actor, critic, episode_rewards, actor_losses, critic_losses = train_function(X_train, y_train, state_dim, action_dim, scaler, data, episodes=500)

URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1000)>