## In this version I will integrate a neural netwrok to my reinforcement learning model to enhance its ability to capture complex patterns and relationships in the data thar a linear model might miss. This approach is commonly known as a Deep-Q-Netork (DQN).

In [1]:
import gurobipy as gp
import pandas as pd
from code_map import final_markets, new_meters, utils,  timeframes, rl_utils, met_api, nve_api
import numpy as np
from datetime import datetime, timedelta
from collections import defaultdict, Counter
import random
import sklearn
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as Func

In [2]:
tf = timeframes.one_week
areas = ["NO5"]
norm_method = "min_max"

In [3]:
L, M, H = utils.get_all_sets(timeframe= tf, areas = areas)
F, freq_data, _ = utils.get_frequency_sets(tf= tf, M =M, H= H)
L_u, L_d, Fu_h_l, Fd_h_l, R_h_l, P_h_m, Vp_h_m, Vm_m, R_m = utils.get_parameters(L = L, M = M, H = H)
Ir_hlm, Ia_hlm, Va_hm = utils.get_income_dictionaries(H=H, L = L, M = M, freq_data= freq_data, Fu_h_l= Fu_h_l, Fd_h_l= Fd_h_l, P_h_m= P_h_m, Vp_h_m= Vp_h_m, F = F, markets_dict = {market.name : market for market in M}, timeframe = tf, areas = areas)
compatible_dict = utils.get_compatibility_dict(L = L ,M = M, index = False)
compatible_list = utils.get_compatibility_dict(L = L ,M = M, index = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.sort_values(by = "Time", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.sort_values(by = "Time", inplace = True)


In [4]:
sup_market_names = ["FCR", "aFRR"]
markets = [market for market in M if sup_market_names[0] in market.name  or sup_market_names[1] in market.name]
exp_price_dict, exp_vol_dict = rl_utils.get_expected_prices_and_volumes_dict(bid_timeframe= H, markets= markets)
norm_exp_price_dict = rl_utils.normalize_dict_vals(exp_price_dict, norm_method= norm_method)
norm_exp_vol_dict = rl_utils.normalize_dict_vals(exp_vol_dict, norm_method= norm_method)
weather_dict = met_api.get_normalized_weather_dfs(reference_tf= timeframes.one_month, usage_tf = tf, areas = areas)

spot_path = "../master-data/spot_data/spot_june_23.csv"
norm_da_df = final_markets.preprocess_spot_data(pd.read_csv(spot_path), year = tf.year, start_month = tf.start_month, end_month = tf.end_month, start_day = tf.start_day, end_day = tf.end_day, start_hour = tf.start_hour, end_hour = tf.end_hour, area = "NO5", normalize= True)



In [5]:
air_temp_df = weather_dict[("air_temperature", areas[0])]
wind_speed_df = weather_dict[("wind_speed", areas[0])]
precipitation_df = weather_dict[("sum(precipitation_amount P1D)", areas[0])]
cloud_cover_df = weather_dict[("cloud_area_fraction", areas[0])]

In [6]:
markets = [market for market in M if sup_market_names[0] in market.name  or sup_market_names[1] in market.name]
print([m.name for m in markets])

['FCR_D_D_1_NO5', 'FCR_D_D_2_NO5', 'FCR_N_D_1_NO5', 'FCR_N_D_2_NO5', 'aFRR up_NO5', 'aFRR down_NO5']


In [7]:
def get_features(bid_hour : pd.Timestamp, available_assets : [new_meters.PowerMeter], market : final_markets.ReserveMarket, 
                 norm_exp_price_dict : dict, norm_exp_vol_dict : dict, precipitation_df : pd.DataFrame, air_temp_df : pd.DataFrame, 
                 cloud_cover_df : pd.DataFrame, wind_speed_df : pd.DataFrame, norm_da_df : pd.DataFrame, L : [new_meters.PowerMeter], 
                 markets : [final_markets.ReserveMarket]):
    """ Function to get the features for the given hour and market. It is important to use features that will help the model learn which actions to take 
        and update the weights correctly for the given state.
        The features that possibly can be used here are the following:
        - day of week
        - hour of day
        - number of possible assets
        - Day Ahead (DA) price
        - Weather forecast including precipitation, temperature, wind speed and cloud cover
        - Market historical prices
        - Market historical volumes
        - frequency data (historical)

        Will add more features as I go along. Fyllingsgrad is ready to be added, but I only have weekly values which means that it will be the same for all hours in the week.
        
    Args:
        available_assets (new_meters.PowerMeter]): _description_
        hour (pd.Timestamp): _description_
        market (final_markets.ReserveMarket): _description_
        norm_exp_price_dict (dict) : dictionary of the normalized expected prices for each direction and area in the bid_timeframe 
        norm_exp_vol_dict (dict) : of the normalized expected volumes for each direction and area in the bid_timeframe 
        norm_w_df (pd.DataFrame) : normalized weather data 
        norm_da_df (pd.DataFrame) : normalized day ahead prices 
        L ([new_meters.PowerMeter]) : list of all possible assets

    Returns:
        torch.tensor: normalized features which depends on which market, hour and available assets are given
    """
    day_of_week = bid_hour.weekday()
    hour_of_day = bid_hour.hour
    expected_price = norm_exp_price_dict[(market.direction, market.area, bid_hour)]
    expected_volume = norm_exp_vol_dict[(market.direction, market.area, bid_hour)]
    #precipitation = norm_w_df["precipitation"].loc[norm_w_df["Time (Local)"] == bid_hour]
    #temperature = norm_w_df["air_temp"].loc[norm_w_df["Time (Local)"] == bid_hour]
    precipitation = precipitation_df.loc[bid_hour.replace(hour= 0, minute= 0, second= 0, microsecond= 0)]
    temperature = air_temp_df.loc[bid_hour]
    wind_speed = wind_speed_df.loc[bid_hour]
    cloud_cover = cloud_cover_df.loc[bid_hour.replace(hour= 0, minute= 0, second= 0, microsecond= 0)]
    da_price = norm_da_df["settlement"].loc[norm_da_df["Time(Local)"] == bid_hour]
    market_nr = (markets.index(market)+1)/6
    
    return torch.tensor([day_of_week/7, hour_of_day/24, expected_price, expected_volume, len(available_assets)/len(L), precipitation, temperature, wind_speed, cloud_cover, da_price.values[0], market_nr], dtype=torch.float32)

# info opp til 2 dager tilbake i tid kan være interessant, (en type moving average e.l.), skal jeg da ha det for det markedet som er relevant eller for alle? Skal det være for prisene i markedene, volum, begge? DA priser? Vær?
# uforventede utfall - umms - planlagte utfall ligger tilgjengelig - historiske umms ligger på nordpool. Trenger å vite tilgjegelig kapasitet og produksjon for forskjelige teknologier i hvert prisområde og om endringer er planlagt eller uplanlagt. når man får vite det
# bør adde wind speed og skydekke. Dette sier litt om produksjon fra vind og sol og kan derfor påvirke priser. 
# czc mellom budområdene. Regnes ut ved å se på DA prisen for to dager siden og differanse mellom to budområder. 
# vannstand - hvor data hentes fra NVE. Det er interessant, spesielt å se på om vannstand i noen av kraftverkene er tett opp mot 100% av tilgjengelig kapasitet da de må se seg nødt til å produsere mer for å unngå oversvømmelse.


In [41]:
# Define the neural network architecture
class QNetwork(nn.Module):
    def __init__(self, input_size, output_size, hidden_sizes):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.output = nn.Linear(hidden_sizes[1], output_size)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.output(x)
        return x




In [42]:
class linearQNetwork(nn.Module):
    def __init__(self, input_size, output_size, hidden_size):
        super(linearQNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = Func.relu(self.fc1(x))
        x = Func.relu(self.fc2(x))
        return self.fc3(x)

In [47]:
def update_weights(network, optimizer, actions, new_actions, features, new_features, gamma, batch_size, rewards):
    """
    Update the weights of the neural network based on the provided batch of experiences and return the updated weights.

    Args:
        network (torch.nn.Module): The neural network whose weights will be updated.
        optimizer (torch.optim.Optimizer): The optimizer used for updating the weights.
        actions (list of int): Actions taken at each timestep.
        new_actions (list of int): Actions taken at the next timestep.
        features (torch.tensor): Features for each timestep.
        new_features (torch.tensor): Features for the next timestep.
        gamma (float): Discount factor for future rewards.

    Returns:
        dict: The updated weights of the neural network.
    """
  
    current_q_values = network(new_features).gather(1, actions.unsqueeze(-1)).squeeze(-1)

    # Get the predicted Q-values for the next states
    next_q_values = network(new_features).max(1)[0]

    # Calculate the target Q-values including the immediate reward
    target_q_values = rewards + gamma * next_q_values

    # Compute loss
    loss = F.mse_loss(current_q_values, target_q_values.detach())

    # Zero the gradients, perform a backward pass, and update the weights
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Return the updated weights
    return network.state_dict(), loss.item()


In [48]:
def initialize_weights(n_features :int , n_actions : int, zeros : bool = False):
    """ Function to initialize the weights to use in the RL model

    Args:
        n_features (int): number of features
        n_actions (int): number of actions
        zeros (bool, optional): If True, the weights will be initialized to zeros. Defaults to False which means that the weights will be initialized to random values between 0 and 0.1.

    Returns:
        torch.Tensor: The weights in a tensor of shape (n_actions, n_features)
    """
    if zeros:
        return torch.zeros((n_actions, n_features), dtype=torch.float32)
    else:
        # Initialize with random values from a normal distribution
        return torch.randn((n_actions, n_features), dtype=torch.float32) * 0.1

In [49]:
def train_nstep_model(epsilon : float, alpha : float, gamma : float, num_episodes : int, L : [new_meters.PowerMeter], M : [final_markets.ReserveMarket], H : [pd.Timestamp], norm_exp_price_dict : dict, norm_exp_vol_dict : dict, weather_dict : dict, norm_da_df : pd.DataFrame, n_actions : int, n_features : int):
    """ Training function to learn how to bid in to the markets due to the bidding constraints and the asset constraints as well as optimizing the weights. 
    The function will learn how to bid in to the markets by updating the weights due to the features. 
    The change from v_4 is that this function will compare bids that are set to the same hour to get a better estimate of the reward.
    The change from v_5 is that this function will use a neural network instead of a Q-table to update the weights.
    
    Args:
        epsilon (float): float number between 0 and 1, says how much the agent should explore
        alpha (float): float number between 0 and 1, also known as the learning rate
        gamma (float): float number between 0 and 1, also known as the discount factor
        num_episodes (int): number of episodes to be ran
        L ([new_meters.PowerMeter]): list of PowerMeter objects
        M ([final_markets.Reservemarket]): list of ReserveMarket objects
        H ([pd.Timestamp]): list of timestamps
        norm_exp_price_dict (dict): dictionary of the normalized expected prices for each direction and area in the bid_timeframe
        norm_exp_vol_dict (dict): dictionary of the normalized expected volumes for each direction and area in the bid_timeframe
        weather_dict (dict): dictionary of the normalized weather data
        norm_da_df (pd.DataFrame) : normalized day ahead prices in pd.dataframe format
        n_actions (int): number of actions
        n_features (int): number of features

    Returns:
        bids (dict): dictionary that holds control over the final bids for each market and each hour. The keys are tuples of the market name and the hour and the values are the the assets that were bid for the given market and hour. The values are tuples including the list of assets and the aggregated volume.
        revenues (dict): dictionary that holds the revenue for each episode
        available_assets (dict): dictionary that holds the available assets for each hour
        episode_weights (dict): dictionary that holds the weights for each episode
        action_feature_dict: dictionary that holds the features for each action for each episode
        asset_bids (list): list that holds the bids for each episode. The indexes are the episode number and the values are dataframes that holds the bids for each market and each hour for each episode.
    """
    revenues = {}
    bid_timeframe = H[48:] # the hours where bids can be placed in
    place_bid_hours = [hour for hour in H[24:-48] if hour.hour == 7 or hour.hour == 17 or hour.hour == 18] # the hours where bids can be placed from
    bids = {}
    asset_bids = []
    sup_market_names = ["FCR", "aFRR"]
    markets = [market for market in M if sup_market_names[0] in market.name  or sup_market_names[1] in market.name]
    #market_names = [market.name for market in markets]
    # will only use FCR-N, FCR-D and aFRR. The FCR markets are both D-1 and D-2 and the aFRR market is D-1 but it is both up and down
    air_temp_df = weather_dict[("air_temperature", areas[0])]
    wind_speed_df = weather_dict[("wind_speed", areas[0])]
    precipitation_df = weather_dict[("sum(precipitation_amount P1D)", areas[0])]
    cloud_cover_df = weather_dict[("cloud_area_fraction", areas[0])]

    available_assets = {hour: L.copy() for hour in bid_timeframe} 
    #bids = {(market.name, hour): [] for hour in bid_timeframe for market in markets}
    "For each hour, an action should be to either bid in every feasible asset, to bid the minimum volume or to bid nothing"
    (possible_hours, market_name) = rl_utils.get_possible_dates(place_bid_hours[0]) # the market_name is not the full name of the market, but rather a substring of the full name. Since FCR-D and FCR-N has the same deadlines there will be two markets with the same substring
    
    possible_markets = [m for m in markets if market_name in m.name] # will either be one or two markets
    #indexes = [market_names.index(m.name) for m in possible_markets] # the indexes for each market. This is used to slice the Q-table so it is not necessary in this case
    features = get_features(bid_hour = possible_hours[0], available_assets= available_assets[possible_hours[0]], 
                            market = possible_markets[0],norm_da_df=norm_da_df, norm_exp_price_dict= norm_exp_price_dict, 
                            norm_exp_vol_dict= norm_exp_vol_dict, precipitation_df= precipitation_df, cloud_cover_df= cloud_cover_df, 
                            wind_speed_df= wind_speed_df, air_temp_df= air_temp_df, L = L, markets = markets)

    
    action_0 = random.randint(0, n_actions-1)
    action_feature_dict = {}
    weights = initialize_weights(n_actions = n_actions, n_features = n_features, zeros = False)
    hidden_sizes = [64,64]
    device = (
            torch.device('cuda') if torch.cuda.is_available()
            else torch.device('cpu'))
    #network = QNetwork(input_size = n_features, output_size = n_actions, batch_size= 6*24, hidden_sizes = hidden_sizes).to(device = device)
    network = QNetwork(input_size = n_features, output_size = n_actions, hidden_sizes = hidden_sizes).to(device = device)
    optimizer = optim.Adam(network.parameters(), lr=0.01)


    # extract hours from H where hour == 7, 17, 18
    epsilon_decay = epsilon/num_episodes
    alpha_decay = alpha/num_episodes
    episode_weights = {}
    episode_losses = {}

    for episode_n in range(num_episodes):
        ep_loss = 0
        if episode_n > episode_n/10:
            epsilon -= epsilon_decay
            alpha -= alpha_decay
        revenue = 0
        episode_bid_df = pd.DataFrame(columns= ["Market", "Hour", "Asset Count", "Total Flex Volume"])
        episode_bid_dict = {}
        reward_bid_hour_dict = {}
        if episode_n == 0:
            (D_2_hours, D_2_names) = rl_utils.get_possible_dates(H[17]) # returns the possible hours for the market to place bids in
            D_2_markets = [m for m in markets if D_2_names in m.name] # because i am now using only no5 markets, this list should be of length 2.
            print([m.name for m in D_2_markets])
            for D_2_market in D_2_markets: # the markets that are possible to bid in
                for D_2_hour in D_2_hours:
                    chosen_portfolio, reward, flex_vol = rl_utils.make_bid(D_2_market, D_2_hour, action_0, available_assets[D_2_hour], compatible_dict) # get portfolio and reward for a bid - will have to figure out a better way to define reward
                    if D_2_hour not in reward_bid_hour_dict.keys():
                        reward_bid_hour_dict[D_2_hour] = [reward]
                    else:
                        reward_bid_hour_dict[D_2_hour].append(reward)

                    available_assets[D_2_hour] = [asset for asset in available_assets[D_2_hour] if asset not in chosen_portfolio] # remove the assets that were bid from the available assets

                    if len(chosen_portfolio) > 0:
                        episode_bid_df.loc[len(episode_bid_df)] = [D_2_market.name, D_2_hour, len(chosen_portfolio), flex_vol] # add the bid to the episode_bid_df

                    episode_bid_dict[(D_2_market.name, D_2_hour)] = (chosen_portfolio, flex_vol) # add the bid to the bids dictionary
                    
                    new_features = get_features(bid_hour = D_2_hour, available_assets= available_assets[D_2_hour], 
                                                market = D_2_market, norm_da_df=norm_da_df, norm_exp_price_dict= norm_exp_price_dict, 
                                                norm_exp_vol_dict= norm_exp_vol_dict, precipitation_df= precipitation_df, cloud_cover_df= cloud_cover_df, 
                                                wind_speed_df= wind_speed_df, air_temp_df= air_temp_df, L = L, markets = markets) # update the features
                    #print(f"weights : {weights}")
                    possible_actions = torch.tensor([torch.matmul(weights[action], features) for action in range(n_actions)], dtype=torch.float32)
                    #print(f"possible actions : {possible_actions}")
                    
                    new_action = rl_utils.greedy_action(possible_actions, epsilon)
                    action_feature_dict[(episode_n, D_2_market.name, D_2_hour)] = (action_0, new_action, features, new_features)
                    features, action_0 = new_features, new_action
                    revenue += reward
       
        #print(f" weights at the start of episode nr {episode_n}: {weights}")
        #features, action_0, revenue, new_features, episode_bid_dict, available_assets, reward_bid_hour_dict, episode_bid_df, action_feature_dict = one_episode_actions(episode_n = episode_n, epsilon = epsilon, alpha = alpha, gamma = gamma, L = L, M = M, H = H, norm_exp_price_dict = norm_exp_price_dict, norm_exp_vol_dict = norm_exp_vol_dict, weather_dict = weather_dict, norm_da_df = norm_da_df, n_actions = n_actions, n_features = n_features, markets = markets, bid_timeframe = bid_timeframe, place_bid_hours = place_bid_hours, available_assets = available_assets, weights = weights, action_0 = action_0, revenue = revenue, episode_bid_dict = episode_bid_dict, reward_bid_hour_dict = reward_bid_hour_dict, episode_bid_df = episode_bid_df, action_feature_dict = action_feature_dict, precipitation_df = precipitation_df, cloud_cover_df = cloud_cover_df, wind_speed_df = wind_speed_df, air_temp_df = air_temp_df)
        available_assets = {hour: L.copy() for hour in bid_timeframe}
        for place_hour in place_bid_hours: # the hours when the bids are placed from
            (possible_hours, market_name) = rl_utils.get_possible_dates(place_hour) # returns the possible hours for the market to place bids in
           
            possible_markets = [m for m in markets if market_name in m.name] # because i am now using only no5 markets, this list should be of length 2.
            #print([m.name for m in possible_markets])
            
            for current_market in possible_markets: # the markets that are possible to bid in
                for bid_hour in possible_hours:
                    chosen_portfolio, reward, flex_vol = rl_utils.make_bid(current_market, bid_hour, action_0, available_assets[bid_hour], compatible_dict) # get portfolio and reward for a bid - will have to figure out a better way to define reward
                    if bid_hour not in reward_bid_hour_dict.keys():
                        reward_bid_hour_dict[bid_hour] = [reward]
                    else:
                        reward_bid_hour_dict[bid_hour].append(reward)

                    available_assets[bid_hour] = [asset for asset in available_assets[bid_hour] if asset not in chosen_portfolio] # remove the assets that were bid from the available assets

                    if len(chosen_portfolio) > 0:
                        episode_bid_df.loc[len(episode_bid_df)] = [current_market.name, bid_hour, len(chosen_portfolio), flex_vol] # add the bid to the episode_bid_df

                    episode_bid_dict[(current_market.name, bid_hour)] = (chosen_portfolio, flex_vol) # add the bid to the bids dictionary
                    
                    new_features = get_features(bid_hour = bid_hour, available_assets= available_assets[bid_hour], 
                                                market = current_market, norm_da_df=norm_da_df, norm_exp_price_dict= norm_exp_price_dict, 
                                                norm_exp_vol_dict= norm_exp_vol_dict, precipitation_df= precipitation_df, cloud_cover_df= cloud_cover_df, 
                                                wind_speed_df= wind_speed_df, air_temp_df= air_temp_df, L = L, markets = markets) # update the features
                    #print(f"weights : {weights}")
                    possible_actions = torch.tensor([torch.matmul(weights[action], features) for action in range(n_actions)], dtype=torch.float32)
                    #print(f"possible actions : {possible_actions}")
                    
                    new_action = rl_utils.greedy_action(possible_actions, epsilon)
                    action_feature_dict[(episode_n, current_market.name, bid_hour)] = (action_0, new_action, features, new_features)
                    features, action_0 = new_features, new_action
                    revenue += reward
            #print([m.name for m in possible_markets])
            # Instead of updating the weights after each hour, the weights are updated after each market. This is to get a better estimate of the reward since the rewards are given after each of the markets are considererd for an hour.
            if "FCR_N_D_1_NO5" in [m.name for m in possible_markets]: # The FCR D-1 markets has the latest deadline, so after these markets has been considered, all markets has been considered for the given hour.
                # istead of updating the weights after each hour, the weights are updated after each market. This is to get a better estimate of the reward since the rewards are given after each of the markets are considererd for an hour.
                # I should probably use a neural network to update the weights instead of using the Q-table. Must find out how to do it properly
                #print("possible_hours : ", possible_hours)
                print("amount of hours : ", len(possible_hours))
                hourly_rewards = torch.tensor([reward_bid_hour_dict[bid_hour] for bid_hour in possible_hours], dtype=torch.float32) # should get 24 hours
                #print(f"hourly rewards : {hourly_rewards}")
                print(f"hourly rewards shape : {hourly_rewards.shape}")
                actions = torch.tensor([action_feature_dict[(episode_n, market.name, hour)][0] for market in markets for hour in possible_hours], dtype=torch.long) # should get 24 hours * 6 markets
                #print(f"actions : {actions}")
                print(f"actions shape : {actions.shape}")
                
                new_actions = torch.tensor([action_feature_dict[(episode_n, market.name, hour)][1] for market in markets for hour in possible_hours], dtype=torch.long) # should get 24 hours * 6 markets
                #print(f"new_actions : {new_actions}")
                print(f"new_actions shape : {new_actions.shape}")
                
                #make tensors of the features and new_features for each hour for each market where each set of features are 11 features long
                features = torch.stack([action_feature_dict[(episode_n, market.name, hour)][2] for market in markets for hour in possible_hours], dim = 0) # should get 24 hours * 6 markets * 11 features
                #print(f"features : {features}")
                print(f"features shape : {features.shape}")
                new_features = torch.stack([action_feature_dict[(episode_n, market.name, hour)][3] for market in markets for hour in possible_hours], dim = 0)
                #print(f"new_features : {new_features}")
                print(f"new_features shape : {new_features.shape}")
             
                batch_size = len(actions)
                weights, loss = update_weights(network, optimizer, actions,\
                                                new_actions, features, new_features, gamma, batch_size, hourly_rewards)
                print(f"weights after update: {weights}")
                ep_loss += loss


        
        episode_weights[episode_n] = weights.copy()        
        bids[episode_n] = episode_bid_df.sort_values(by = ["Hour"])
        revenues[episode_n] = revenue
        asset_bids.append(episode_bid_dict)
        episode_losses[episode_n] = ep_loss
    return bids, revenues, available_assets, episode_weights, action_feature_dict, asset_bids, episode_losses

In [50]:
bids, revenues, available_assets, weights, action_list, asset_bids, ep_losses = train_nstep_model(epsilon = 0.3, alpha = 0.3, gamma = 0.9, num_episodes = 2, L = L, M = M, H = H, norm_exp_price_dict = norm_exp_price_dict, norm_exp_vol_dict = norm_exp_vol_dict, weather_dict= weather_dict, norm_da_df = norm_da_df, n_features= 11, n_actions= 3)

['FCR_D_D_2_NO5', 'FCR_N_D_2_NO5']
amount of hours :  24
hourly rewards shape : torch.Size([24, 6])
actions shape : torch.Size([144])
new_actions shape : torch.Size([144])
features shape : torch.Size([144, 11])
new_features shape : torch.Size([144, 11])


RuntimeError: The size of tensor a (6) must match the size of tensor b (144) at non-singleton dimension 1

In [None]:
mod, x, y, w, d = utils.run_optimization_model(L = L, M = M, F = F, H = H, Fu_h_l = Fu_h_l, Fd_h_l = Fd_h_l, R_h_l = R_h_l, Vp_h_m = Vp_h_m, Vm_m = Vm_m, R_m = R_m, Ir_hlm = Ir_hlm, Ia_hlm = Ia_hlm, Va_hm = Va_hm, compatible_list= compatible_list, log_filename= "week_mod_no5.log", model_name= "week_mod_no5")