## 0. Dataset and DataLoader

In [None]:
from sklearn.metrics import mean_absolute_error,r2_score, mean_squared_error
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.preprocessing import MinMaxScaler
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
import torch.optim as optim  
import copy
from metrics import predict_and_evaluate

In [None]:

class WindPowerDataset(Dataset):
    def __init__(self, csv_file, wind_power_scaler=None, weather_scaler=None, save_scalers=False):
        self.data = pd.read_csv(csv_file)
        
        # sample every 5 rows
        self.data = self.data.iloc[::5, :].reset_index(drop=True)

        # calculate original std
        self.original_wind_power_std = self.data.iloc[:, 2].std()
        self.original_weather_std = self.data.iloc[:, 4:12].std()
        
        # initialize scalers
        if wind_power_scaler is None or weather_scaler is None:
            self.wind_power_scaler = MinMaxScaler()
            self.weather_scaler = MinMaxScaler()

            # Normalize the wind power data
            self.data.iloc[:, 2] = self.wind_power_scaler.fit_transform(self.data.iloc[:, 2].values.reshape(-1, 1)).squeeze()

            # Normalize the weather data
            self.data.iloc[:, 4:12] = self.weather_scaler.fit_transform(self.data.iloc[:, 4:12])
            
            if save_scalers:
                with open('wind_power_scaler.pkl', 'wb') as f:
                    pickle.dump(self.wind_power_scaler, f)
                with open('weather_scaler.pkl', 'wb') as f:
                    pickle.dump(self.weather_scaler, f)
        else:
            self.wind_power_scaler = wind_power_scaler
            self.weather_scaler = weather_scaler
            self.data.iloc[:, 2] = self.wind_power_scaler.transform(self.data.iloc[:, 2].values.reshape(-1, 1)).squeeze()
            self.data.iloc[:, 4:12] = self.weather_scaler.transform(self.data.iloc[:, 4:12])
    
    def __len__(self):
        return len(self.data) - 312  # 288 (1440/5) + 24 (120/5), 288=1day history, 24=forecast 2 hour ahead
    
    def __getitem__(self, idx):
        wind_power_history = self.data.iloc[idx:idx + 288, 2].values.astype(float)
        future_weather = self.data.iloc[idx + 288:idx + 312, 4:12].values.astype(float)
        future_wind_power = self.data.iloc[idx + 312, 2]
        return torch.tensor(wind_power_history, dtype=torch.float32), \
               torch.tensor(future_weather, dtype=torch.float32), \
               torch.tensor(future_wind_power, dtype=torch.float32)
    
    def get_original_stds(self):
        return {
            'original_wind_power_std': self.original_wind_power_std,
            'original_weather_std': self.original_weather_std.to_dict()
        }


dataset = WindPowerDataset('CAISO_zone_1_.csv', save_scalers=True)
weather_stds = dataset.get_original_stds()
#print("weather_stds: ", weather_stds)

total_size = len(dataset)
train_size = int(0.8 * total_size)  
test_size = total_size - train_size  


train_idx = list(range(train_size))
test_idx = list(range(train_size, total_size))


train_dataset = Subset(dataset, train_idx)
test_dataset = Subset(dataset, test_idx)

batch_size = 32  

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)  
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)   

wind_power_scaler=dataset.wind_power_scaler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## 1. Train surrogate model

### GRU

In [None]:
class WindPowerPredictor(nn.Module):  
    def __init__(self):  
        super(WindPowerPredictor, self).__init__()  
        self.gru_wind = nn.GRU(input_size=1, hidden_size=50, num_layers=2, batch_first=True)  
        self.gru_weather = nn.GRU(input_size=8, hidden_size=50, num_layers=2, batch_first=True)  
        self.fc = nn.Linear(100, 1)  
        self.sigmoid = nn.Sigmoid()
      
    def forward(self, wind_history, weather_future):  
        wind_history = wind_history.unsqueeze(-1)  
        _, hn_wind = self.gru_wind(wind_history)  
        _, hn_weather = self.gru_weather(weather_future)  
        hn_wind = hn_wind[-1, :, :]  
        hn_weather = hn_weather[-1, :, :]  
        combined = torch.cat((hn_wind, hn_weather), dim=1)  
        output = self.fc(combined)  
        output = self.sigmoid(output)
        return output  
  
def train_model(csv_file, epochs=10, batch_size=32, learning_rate=0.001, test_split=0.2, device='cpu'):  
    dataset = WindPowerDataset(csv_file, save_scalers=True)  
    test_size = int(len(dataset) * test_split)  
    train_size = len(dataset) - test_size  
    train_dataset = Subset(dataset, list(range(train_size)))  
    test_dataset = Subset(dataset, list(range(train_size, len(dataset))))  
      
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)  
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)  
      
    model = WindPowerPredictor().to(device)  
    criterion = nn.MSELoss()  
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)  
      
    best_test_loss = float('inf')  
    best_model_state = None  
  
    for epoch in range(epochs):  
        model.train()  
        for wind_history, weather_future, future_wind_power in train_loader:  
            wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)  
            optimizer.zero_grad()  
            output = model(wind_history, weather_future)  
            loss = criterion(output.squeeze(), future_wind_power)  
            loss.backward()  
            optimizer.step()  
  
        # validation
        model.eval()  
        test_predictions = []  
        test_targets = []  
        test_loss = 0.0  
        with torch.no_grad():  
            for wind_history, weather_future, future_wind_power in test_loader:  
                wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)  
                output = model(wind_history, weather_future)  
                loss = criterion(output.squeeze(), future_wind_power)  
                test_loss += loss.item()  
                test_predictions.extend(output.squeeze().cpu().numpy())  
                test_targets.extend(future_wind_power.cpu().numpy())  
  
        test_loss /= len(test_loader)  
        print(f'Epoch {epoch+1}/{epochs}, Test Loss: {test_loss}')  
  
        # update and save the best model
        if test_loss < best_test_loss:  
            best_model_state = copy.deepcopy(model.state_dict())  
            best_test_loss=test_loss
  
    # save the best model  
    torch.save(best_model_state, 'wind_gru_caiso_sigmoid.pth')  
    print(f'Best model saved with test loss: {best_test_loss}')    
    return best_model_state

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
csv_file = 'CAISO_zone_1_.csv'
model = train_model(csv_file, device=device,learning_rate=0.0001, epochs=20)

### transformer

In [None]:
class WindPowerPredictor(nn.Module):
    def __init__(self, d_model=50):
        super(WindPowerPredictor, self).__init__()
        self.d_model = d_model
        self.embedding_wind = nn.Linear(1, d_model)
        self.embedding_weather = nn.Linear(8, d_model)
        self.transformer_wind = nn.Transformer(
            d_model=d_model, nhead=2, num_encoder_layers=2, num_decoder_layers=2, dim_feedforward=200, dropout=0.1
        )
        self.transformer_weather = nn.Transformer(
            d_model=d_model, nhead=2, num_encoder_layers=2, num_decoder_layers=2, dim_feedforward=200, dropout=0.1
        )
        self.fc = nn.Linear(d_model * 2, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, wind_history, weather_future):
        wind_history = self.embedding_wind(wind_history.unsqueeze(-1))  # (batch_size, seq_len, d_model)
        wind_history = wind_history.permute(1, 0, 2)  # (seq_len, batch_size, d_model)
        weather_future = self.embedding_weather(weather_future)  # (batch_size, seq_len, d_model)
        weather_future = weather_future.permute(1, 0, 2)  # (seq_len, batch_size, d_model)

        transformer_output_wind = self.transformer_wind(wind_history, wind_history)
        transformer_output_weather = self.transformer_weather(weather_future, weather_future)
        
        combined = torch.cat((transformer_output_wind[-1, :, :], transformer_output_weather[-1, :, :]), dim=1)
        output = self.fc(combined)
        output = self.sigmoid(output)  
        return output

def train_model(csv_file, epochs=10, batch_size=32, learning_rate=0.001, test_split=0.2, device='cpu'):
    dataset = WindPowerDataset(csv_file, save_scalers=True)
    test_size = int(len(dataset) * test_split)
    train_size = len(dataset) - test_size
    train_dataset = Subset(dataset, list(range(train_size)))
    test_dataset = Subset(dataset, list(range(train_size, len(dataset))))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    model = WindPowerPredictor().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    best_test_loss = float('inf')
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        for wind_history, weather_future, future_wind_power in train_loader:
            wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
            optimizer.zero_grad()
            output = model(wind_history, weather_future)
            loss = criterion(output.squeeze(), future_wind_power)
            loss.backward()
            optimizer.step()

        # validation
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for wind_history, weather_future, future_wind_power in test_loader:
                wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
                output = model(wind_history, weather_future)
                loss = criterion(output.squeeze(), future_wind_power)
                test_loss += loss.item()

        test_loss /= len(test_loader)
        print(f'Epoch {epoch+1}/{epochs}, Test Loss: {test_loss}')

        # update and save the best model
        if test_loss < best_test_loss:
            best_test_loss = test_loss
            best_model_state = model.state_dict().copy()  

    # save the best model
    torch.save(best_model_state, 'wind_caiso_transformer_sigmoid.pth')
    print(f'Best model saved with test loss: {best_test_loss}')

    return model


In [None]:
model = train_model(csv_file, device=device, epochs=20, learning_rate=0.0001)

### TCN

In [None]:
class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=(kernel_size-1) * dilation_size, dilation=dilation_size),
                       nn.ReLU(),
                       nn.Dropout(dropout)]
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

class WindPowerPredictorTCN(nn.Module):
    def __init__(self):
        super(WindPowerPredictorTCN, self).__init__()
        self.tcn_wind = TemporalConvNet(num_inputs=1, num_channels=[50]*3, kernel_size=3, dropout=0.2)
        self.tcn_weather = TemporalConvNet(num_inputs=8, num_channels=[50]*3, kernel_size=3, dropout=0.2)
        self.fc = nn.Linear(50 * 2, 1)  # Combined output size of TCNs
        self.sigmoid = nn.Sigmoid()

    def forward(self, wind_history, weather_future):
        wind_history = wind_history.unsqueeze(1)  # (batch_size, 1, seq_len)
        tcn_output_wind = self.tcn_wind(wind_history).transpose(1, 2)[:, -1, :]
        
        weather_future = weather_future.transpose(1, 2)  # (batch_size, 8, seq_len)
        tcn_output_weather = self.tcn_weather(weather_future).transpose(1, 2)[:, -1, :]
        
        combined = torch.cat((tcn_output_wind, tcn_output_weather), dim=1)
        output = self.fc(combined)
        output = self.sigmoid(output)
        return output


In [None]:
def get_lr_lambda(total_epochs):
    def lr_lambda(epoch):
        return 1 - (epoch / total_epochs)
    return lr_lambda

def train_model_tcn(csv_file, epochs=40, batch_size=32, learning_rate=0.001, test_split=0.2, device='cpu'):
    dataset = WindPowerDataset(csv_file, save_scalers=True)
    test_size = int(len(dataset) * test_split)
    train_size = len(dataset) - test_size
    train_dataset = Subset(dataset, list(range(train_size)))
    test_dataset = Subset(dataset, list(range(train_size, len(dataset))))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    model = WindPowerPredictorTCN().to(device)
    criterion = torch.nn.MSELoss()  
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    
    lr_lambda = get_lr_lambda(epochs)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda)
    
    best_loss = float('inf')
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        for wind_history, weather_future, future_wind_power in train_loader:
            wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
            optimizer.zero_grad()
            output = model(wind_history, weather_future)
            loss = criterion(output.squeeze(), future_wind_power)
            loss.backward()
            optimizer.step()
        
        scheduler.step()
        
        model.eval()

        test_loss = 0.0
        with torch.no_grad():
            for wind_history, weather_future, future_wind_power in test_loader:
                wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
                output = model(wind_history, weather_future)
                loss = criterion(output.squeeze(), future_wind_power)
                test_loss += loss.item()
     
        
        test_loss /= len(test_loader)  
        print(f'Epoch {epoch+1}/{epochs}, Test Loss: {test_loss}')  
  
        # 更新并保存最佳模型  
        if test_loss < best_loss:  
            best_model_state = copy.deepcopy(model.state_dict())  
            best_loss = test_loss
    torch.save(best_model_state, 'wind_tcn_caiso_sigmoid.pth')
    return best_model_state

In [None]:
model = train_model_tcn(csv_file, device=device, epochs=100, learning_rate=0.00001)    #Training time is longer compared to other models

### LSTM

In [None]:
class WindPowerPredictor(nn.Module):
    def __init__(self):
        super(WindPowerPredictor, self).__init__()
        self.lstm_wind = nn.LSTM(input_size=1, hidden_size=50, num_layers=2, batch_first=True)
        self.lstm_weather = nn.LSTM(input_size=8, hidden_size=50, num_layers=2, batch_first=True)
        self.fc = nn.Linear(100, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, wind_history, weather_future):
        wind_history = wind_history.unsqueeze(-1)
        _, (hn_wind, _) = self.lstm_wind(wind_history)
        _, (hn_weather, _) = self.lstm_weather(weather_future)
        hn_wind = hn_wind[-1, :, :]
        hn_weather = hn_weather[-1, :, :]
        combined = torch.cat((hn_wind, hn_weather), dim=1)
        output = self.fc(combined)
        output = self.sigmoid(output)  
        return output

In [None]:
def train_model_lstm(csv_file, epochs=10, batch_size=32, learning_rate=0.001, test_split=0.2, device='cpu'):
    dataset = WindPowerDataset(csv_file, save_scalers=True)
    test_size = int(len(dataset) * test_split)
    train_size = len(dataset) - test_size
    train_dataset = Subset(dataset, list(range(train_size)))
    test_dataset = Subset(dataset, list(range(train_size, len(dataset))))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    model = WindPowerPredictor().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    best_test_loss = float('inf')
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        for wind_history, weather_future, future_wind_power in train_loader:
            wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
            optimizer.zero_grad()
            output = model(wind_history, weather_future)
            loss = criterion(output.squeeze(), future_wind_power)
            loss.backward()
            optimizer.step()

        # 验证阶段
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for wind_history, weather_future, future_wind_power in test_loader:
                wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
                output = model(wind_history, weather_future)
                loss = criterion(output.squeeze(), future_wind_power)
                test_loss += loss.item()

        test_loss /= len(test_loader)
        print(f'Epoch {epoch+1}/{epochs}, Test Loss: {test_loss}')

        
        if test_loss < best_test_loss:
            best_test_loss = test_loss
            best_model_state = model.state_dict().copy() 

    torch.save(best_model_state, 'wind_caiso_lstm_sigmoid.pth')
    print(f'Best model saved with test loss: {best_test_loss}')

    return model


In [None]:
model = train_model_lstm(csv_file, device=device, epochs=20, learning_rate=0.0001)

## 2. Train target model

### GRU

In [None]:
import torch.optim as optim  
import copy

class WindPowerPredictor(nn.Module):  
    def __init__(self):  
        super(WindPowerPredictor, self).__init__()  
        self.gru_wind = nn.GRU(input_size=1, hidden_size=128, num_layers=1, batch_first=True)  
        self.gru_weather = nn.GRU(input_size=8, hidden_size=128, num_layers=1, batch_first=True)  
        self.fc = nn.Linear(256, 1)  
        self.sigmoid = nn.Sigmoid()
      
    def forward(self, wind_history, weather_future):  
        wind_history = wind_history.unsqueeze(-1)  
        _, hn_wind = self.gru_wind(wind_history)  
        _, hn_weather = self.gru_weather(weather_future)  
        hn_wind = hn_wind[-1, :, :]  
        hn_weather = hn_weather[-1, :, :]  
        combined = torch.cat((hn_wind, hn_weather), dim=1)  
        output = self.fc(combined)  
        output = self.sigmoid(output)
        return output  
  
def train_model(csv_file, epochs=10, batch_size=32, learning_rate=0.001, test_split=0.2, device='cpu'):  
    dataset = WindPowerDataset(csv_file, save_scalers=True)  
    test_size = int(len(dataset) * test_split)  
    train_size = len(dataset) - test_size  
    train_dataset = Subset(dataset, list(range(train_size)))  
    test_dataset = Subset(dataset, list(range(train_size, len(dataset))))  
      
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)  
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)  
      
    model = WindPowerPredictor().to(device)  
    criterion = nn.MSELoss()  
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)  
      
    best_test_loss = float('inf')  
    best_model_state = None  
  
    for epoch in range(epochs):  
        model.train()  
        for wind_history, weather_future, future_wind_power in train_loader:  
            wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)  
            optimizer.zero_grad()  
            output = model(wind_history, weather_future)  
            loss = criterion(output.squeeze(), future_wind_power)  
            loss.backward()  
            optimizer.step()  
  
        # 验证阶段  
        model.eval()  
        test_predictions = []  
        test_targets = []  
        test_loss = 0.0  
        with torch.no_grad():  
            for wind_history, weather_future, future_wind_power in test_loader:  
                wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)  
                output = model(wind_history, weather_future)  
                loss = criterion(output.squeeze(), future_wind_power)  
                test_loss += loss.item()  
                test_predictions.extend(output.squeeze().cpu().numpy())  
                test_targets.extend(future_wind_power.cpu().numpy())  
  
        test_loss /= len(test_loader)  
        print(f'Epoch {epoch+1}/{epochs}, Test Loss: {test_loss}')  
  
      
        if test_loss < best_test_loss:  
            #best_test_mape = test_mape  
            best_model_state = copy.deepcopy(model.state_dict())  
            best_test_loss=test_loss
  
    torch.save(best_model_state, 'wind_gru_caiso_sigmoid_version2.pth')  
    print(f'Best model saved with test loss: {best_test_loss}')  
    return best_model_state

csv_file = 'CAISO_zone_1_.csv'
model = train_model(csv_file, device=device,learning_rate=0.00003, epochs=20)

### transformer

In [None]:
class WindPowerPredictorV2(nn.Module):
    def __init__(self, d_model=64, nhead=4, num_layers=3, dim_feedforward=128, dropout=0.1, max_seq_len=300):
        super(WindPowerPredictorV2, self).__init__()
        self.d_model = d_model

        self.embedding_wind = nn.Linear(1, d_model)
        self.embedding_weather = nn.Linear(8, d_model)

        self.pos_encoder_wind = nn.Parameter(torch.zeros(max_seq_len, 1, d_model))
        self.pos_encoder_weather = nn.Parameter(torch.zeros(max_seq_len, 1, d_model))


        encoder_layers = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=False
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)

        self.fc_combine = nn.Sequential(
            nn.Linear(d_model * 2, d_model),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model, d_model // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model // 2, 1)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, wind_history, weather_future):
        # wind_history: (batch_size, T1)        e.g., (32, 288)
        # weather_future: (batch_size, T2, 8)    e.g., (32, 24, 8)

        batch_size, seq_len_wind = wind_history.shape
        _, seq_len_weather, _ = weather_future.shape

        if seq_len_wind > self.pos_encoder_wind.size(0):
            raise ValueError(f"Wind sequence length {seq_len_wind} exceeds max supported length {self.pos_encoder_wind.size(0)}")
        if seq_len_weather > self.pos_encoder_weather.size(0):
            raise ValueError(f"Weather sequence length {seq_len_weather} exceeds max supported length {self.pos_encoder_weather.size(0)}")

        wind_emb = self.embedding_wind(wind_history.unsqueeze(-1))           # (B, T1, D)
        weather_emb = self.embedding_weather(weather_future)                 # (B, T2, D)

        wind_emb = wind_emb.permute(1, 0, 2)  # (T1, B, D)
        weather_emb = weather_emb.permute(1, 0, 2)  # (T2, B, D)

        wind_emb = wind_emb + self.pos_encoder_wind[:seq_len_wind]      # (T1, B, D)
        weather_emb = weather_emb + self.pos_encoder_weather[:seq_len_weather]  # (T2, B, D)

        wind_features = self.transformer_encoder(wind_emb)              # (T1, B, D)
        weather_features = self.transformer_encoder(weather_emb)        # (T2, B, D)

        wind_last = wind_features[-1, :, :]        
        weather_last = weather_features[-1, :, :]  

        combined = torch.cat((wind_last, weather_last), dim=1)  # (B, 2D)

        output = self.fc_combine(combined)  # (B, 1)
        output = self.sigmoid(output)
        return output

def train_model(csv_file, epochs=10, batch_size=32, learning_rate=0.001, test_split=0.2, device='cpu'):
    dataset = WindPowerDataset(csv_file, save_scalers=True)
    test_size = int(len(dataset) * test_split)
    train_size = len(dataset) - test_size
    train_dataset = Subset(dataset, list(range(train_size)))
    test_dataset = Subset(dataset, list(range(train_size, len(dataset))))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    model = WindPowerPredictorV2().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    best_test_loss = float('inf')
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        for wind_history, weather_future, future_wind_power in train_loader:
            wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
            optimizer.zero_grad()
            output = model(wind_history, weather_future)
            loss = criterion(output.squeeze(), future_wind_power)
            loss.backward()
            optimizer.step()

        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for wind_history, weather_future, future_wind_power in test_loader:
                wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
                output = model(wind_history, weather_future)
                loss = criterion(output.squeeze(), future_wind_power)
                test_loss += loss.item()

        test_loss /= len(test_loader)
        print(f'Epoch {epoch+1}/{epochs}, Test Loss: {test_loss}')

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            best_model_state = model.state_dict().copy()  

    torch.save(best_model_state, 'wind_caiso_transformer_sigmoid_version2.pth')
    print(f'Best model saved with test loss: {best_test_loss}')

    return model


In [None]:
model = train_model(csv_file, device=device,learning_rate=0.00003, epochs=20)

### tcn

In [None]:
class TemporalConvNet_V2(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=3, dropout=0.2):
        super(TemporalConvNet_V2, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]

            padding = (kernel_size - 1) * dilation_size

            conv = nn.Conv1d(
                in_channels, out_channels, kernel_size,
                stride=1, padding=padding, dilation=dilation_size  
            )
            relu = nn.ReLU()
            drop = nn.Dropout(dropout)

            if in_channels != out_channels:
                res_conv = nn.Conv1d(in_channels, out_channels, 1)
            else:
                res_conv = None

            layers.append(nn.ModuleDict({
                'conv': conv,
                'relu': relu,
                'dropout': drop,
                'res_conv': res_conv
            }))
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        # x: (batch_size, in_channels, seq_len)
        for layer in self.layers:
            residual = x  

            x = layer['conv'](x)  
            x = layer['relu'](x)
            x = layer['dropout'](x)
            x = x[:, :, :residual.size(2)]  

            # 残差连接
            if layer['res_conv'] is not None:
                residual = layer['res_conv'](residual)
            x = x + residual
            x = layer['relu'](x)  
        return x


class WindPowerPredictorTCN_V2(nn.Module):
    def __init__(self, d_model=64, dropout=0.2, fusion_mode='concat'):
        super(WindPowerPredictorTCN_V2, self).__init__()
        self.fusion_mode = fusion_mode  # 'concat' or 'add'
        self.tcn_wind = TemporalConvNet_V2(
            num_inputs=1,
            num_channels=[d_model, d_model*2, d_model, d_model],  
            kernel_size=3,
            dropout=dropout
        )

        self.tcn_weather = TemporalConvNet_V2(
            num_inputs=8,
            num_channels=[d_model, d_model*2, d_model, d_model],
            kernel_size=3,
            dropout=dropout
        )

        if fusion_mode == 'concat':
            fc_input_dim = d_model * 2
        elif fusion_mode == 'add':
            fc_input_dim = d_model
        else:
            raise ValueError("fusion_mode must be 'concat' or 'add'")

        self.fc = nn.Sequential(
            nn.Linear(fc_input_dim, d_model // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model // 2, 1)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, wind_history, weather_future):
        # wind_history: (batch_size, 288)
        # weather_future: (batch_size, 24, 8)

        x_wind = wind_history.unsqueeze(1)  # (B, 1, 288)
        x_wind = self.tcn_wind(x_wind)     # (B, D, 288)
        x_wind = x_wind[:, :, -1]          

        x_weather = weather_future.transpose(1, 2)  # (B, 8, 24)
        x_weather = self.tcn_weather(x_weather)     # (B, D, 24)
        x_weather = x_weather[:, :, -1]             # -> (B, D)

        if self.fusion_mode == 'concat':
            combined = torch.cat((x_wind, x_weather), dim=1)  # (B, 2D)
        elif self.fusion_mode == 'add':
            combined = x_wind + x_weather  

        output = self.fc(combined)         # (B, 1)
        output = self.sigmoid(output)
        return output

In [None]:
def load_model(model_path, device='cpu'):
    model = WindPowerPredictorTCN_V2().to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()  
    return model

def get_lr_lambda(total_epochs):
    def lr_lambda(epoch):
        return 1 - (epoch / total_epochs)
    return lr_lambda

def train_model_tcn(csv_file, epochs=40, batch_size=32, learning_rate=0.001, test_split=0.2, device='cpu'):
    dataset = WindPowerDataset(csv_file, save_scalers=True)
    test_size = int(len(dataset) * test_split)
    train_size = len(dataset) - test_size
    train_dataset = Subset(dataset, list(range(train_size)))
    test_dataset = Subset(dataset, list(range(train_size, len(dataset))))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    model = WindPowerPredictorTCN_V2(d_model=64, dropout=0.2, fusion_mode='concat').to(device)
    criterion = torch.nn.MSELoss()  
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    lr_lambda = get_lr_lambda(epochs)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda)
    
    best_loss = float('inf')
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        for wind_history, weather_future, future_wind_power in train_loader:
            wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
            optimizer.zero_grad()
            output = model(wind_history, weather_future)
            loss = criterion(output.squeeze(), future_wind_power)
            loss.backward()
            optimizer.step()
        
        scheduler.step()
        
        model.eval()

        test_loss = 0.0
        with torch.no_grad():
            for wind_history, weather_future, future_wind_power in test_loader:
                wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
                output = model(wind_history, weather_future)
                loss = criterion(output.squeeze(), future_wind_power)
                test_loss += loss.item()
     
        
        test_loss /= len(test_loader)  
        
        print(f'Epoch {epoch+1}/{epochs}, Test Loss: {test_loss}')  

        if test_loss < best_loss:  
            best_model_state = copy.deepcopy(model.state_dict())  
    torch.save(best_model_state, 'wind_tcn_caiso_sigmoid_v2.pth')

    return model

In [None]:
train_model_tcn(csv_file, epochs=20, batch_size=32, learning_rate=0.00003, test_split=0.2, device=device)

In [None]:
model=WindPowerPredictorTCN_V2().to(device)
model.load_state_dict(torch.load('wind_tcn_caiso_sigmoid_v2.pth', map_location=device))

### LSTM

In [None]:
class WindPowerPredictor(nn.Module):
    def __init__(self):
        super(WindPowerPredictor, self).__init__()
        self.lstm_wind = nn.LSTM(input_size=1, hidden_size=128, num_layers=1, batch_first=True)
        self.lstm_weather = nn.LSTM(input_size=8, hidden_size=128, num_layers=1, batch_first=True)
        self.fc = nn.Linear(256, 1)
        self.relu=nn.ReLU()
    
    def forward(self, wind_history, weather_future):
        wind_history = wind_history.unsqueeze(-1)
        _, (hn_wind, _) = self.lstm_wind(wind_history)
        _, (hn_weather, _) = self.lstm_weather(weather_future)
        hn_wind = hn_wind[-1, :, :]
        hn_weather = hn_weather[-1, :, :]
        combined = torch.cat((hn_wind, hn_weather), dim=1)
        output = self.fc(combined)
        output = self.relu(output)
        return output

In [None]:
def train_model(csv_file, epochs=10, batch_size=32, learning_rate=0.001, test_split=0.2, device='cpu'):
    dataset = WindPowerDataset(csv_file, save_scalers=True)
    test_size = int(len(dataset) * test_split)
    train_size = len(dataset) - test_size
    train_dataset = Subset(dataset, list(range(train_size)))
    test_dataset = Subset(dataset, list(range(train_size, len(dataset))))
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    model = WindPowerPredictor().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    best_test_loss = float('inf')
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        for wind_history, weather_future, future_wind_power in train_loader:
            wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
            optimizer.zero_grad()
            output = model(wind_history, weather_future)
            loss = criterion(output.squeeze(), future_wind_power)
            loss.backward()
            optimizer.step()

        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for wind_history, weather_future, future_wind_power in test_loader:
                wind_history, weather_future, future_wind_power = wind_history.to(device), weather_future.to(device), future_wind_power.to(device)
                output = model(wind_history, weather_future)
                loss = criterion(output.squeeze(), future_wind_power)
                test_loss += loss.item()

        test_loss /= len(test_loader)
        print(f'Epoch {epoch+1}/{epochs}, Test Loss: {test_loss}')

        if test_loss < best_test_loss:
            best_test_loss = test_loss
            best_model_state = model.state_dict().copy()  

    torch.save(best_model_state, 'wind_caiso_lstm_sigmoid_version2.pth')
    print(f'Best model saved with test loss: {best_test_loss}')

    return model

In [None]:
model = train_model(csv_file, device=device, epochs=20, learning_rate=0.0001)