### In this file I will try to further develop the model and use the inspiration from the previous bidding files. The goal is to end up with a deep reinforcement learning model that can handle the bidding process and be able to make bids without knowing the prices. 

In [1]:
import gurobipy as gp
import pandas as pd
from code_map import final_markets, new_meters, utils, data_handling, timeframes
import numpy as np
from datetime import datetime, timedelta
from collections import defaultdict, Counter
import random

In [2]:
tf = timeframes.one_week

In [4]:
L, M, F, H, freq_data, power_meter_dict, consumption_data = utils.get_all_sets(timeframe= tf, areas = ["NO5"])

In [5]:
L_u, L_d, Fu_h_l, Fd_h_l, R_h_l, P_h_m, Vp_h_m, Vm_m, R_m = utils.get_parameters(L = L, M = M, H = H)

In [6]:
Ir_hlm, Ia_hlm, Va_hm = utils.get_income_dictionaries(H=H, L = L, M = M, freq_data= freq_data, Fu_h_l= Fu_h_l, Fd_h_l= Fd_h_l, P_h_m= P_h_m, Vp_h_m= Vp_h_m, F = F, markets_dict = {market.name : market for market in M}, timeframe = tf, areas = ["NO5"])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.sort_values(by = "Time", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.sort_values(by = "Time", inplace = True)


In [7]:
compatible_dict = utils.get_compatibility_dict(L = L ,M = M, index = False)

In [8]:
len(L)

960

In [9]:
def random_arg_max(possible_actions):
    imax = 0
    xmax = possible_actions[imax]  # Current maximum
    nmax = 1  # Number of maximum values at the moment
    for i in range(1, len(possible_actions)):
        if possible_actions[i] == xmax:
            nmax += 1
            if nmax * random.random() < 1.0:
                imax = i
        elif possible_actions[i] > xmax:
            nmax = 1  # Reset count since a new maximum is found
            imax = i
            xmax = possible_actions[i]  # Update the new maximum
    return imax


In [None]:
def greedy_action(Q : np.array, epsilon : float ):
    """returns the index of the greedy action

    Args:
        Q (np.array): numpy array which is sliced for the actions
        epsilon (float): float number between 0 and 1, often close to 0

    Returns:
        int: index of the greedy action
    """
    if random.random() <= (1- epsilon): # pick greedy
        return random_arg_max(Q)
    else:
        return random.randint(0, len(Q)-1) # random


#### Will have to determine which type of network to use, and how to implement it. May look into the INF265 tasks for inspiration. Deep Feedforward Network (Multilayer Perceptron) is interesting. May look in to attention mechanisms and how to implement them. Also actor-critic architectures is interesting. May end up in a hybrid model.

In [10]:
def get_possible_dates(date : pd.Timestamp):
    """ Function to get the possible dates for placing a bid given the current date

    Args:
        date (pd.Timestamp): the current date

    Returns:
        (pd.date_range, str): the possible dates for placing a bid and for which market
    """
    if date.hour == 17: # FCR D-2
        return (pd.date_range(date + timedelta(days=1) + timedelta(hours=7), date + timedelta(days = 2) + timedelta(hours = 6), freq='H', tz = "Europe/Oslo"), "D_2")
    elif date.hour == 7: # aFRR
        return (pd.date_range(date + timedelta(hours = 17), date + timedelta(days = 1) + timedelta(hours = 16), freq='H', tz = "Europe/Oslo"), "aFRR")
    elif date.hour == 18: # FCR D-1
        return (pd.date_range(date + timedelta(hours=6), date + timedelta(days = 1) + timedelta(hours = 5), freq='H', tz = "Europe/Oslo"), "D_1")
    else:
        return ([], "No bids")

In [11]:
def check_constraints_for_hour(possible_assets : [[new_meters.PowerMeter]], hour : pd.Timestamp, possible_volume : float, expected_price : float, market : final_markets.ReserveMarket):
            
    max_vol = market.volume_data.loc[market.volume_data["Time(Local)"] == hour].values[0][1]  # set of volumes for markets
    constrained_vol = possible_volume if possible_volume >= market.min_volume and possible_volume < max_vol else 0
    # get the prices for the given market within the given hours
    possible_price = market.price_data.loc[market.price_data["Time(Local)"] == hour].values[0][1]  #price for markets
    # Calculate the possible revenues
    """print(f"possible_prices in function : {possible_prices}")
    print(f"expected_prices in function : {expected_prices}")"""
    bids_to_be_made = possible_price if possible_price >= expected_price else 0
    """print(f"bids_to_be_made in function : {bids_to_be_made}")
    print(f"possible_volumes in function : {constrained_vols}")"""
    possible_revenue = constrained_vol * bids_to_be_made
    
    #print(f"possible_revenues in function : {possible_revenues}")
    possible_assets = possible_assets if possible_revenue > 0 else [] 
    #print(f"possible_assets in function : {possible_assets}")
    return possible_revenue, possible_assets

In [16]:
from itertools import combinations

def get_portfolio_for_market(possible_assets : [new_meters.PowerMeter], market : final_markets.ReserveMarket, hour : pd.Timestamp):
    """This function will return one feasible combination quickly.

    Args:
        possible_assets ([new_meters.PowerMeter]): _description_
        market (final_markets.ReserveMarket): _description_
        hour (pd.Timestamp): _description_

    Returns:
        _type_: _description_
    """
   
    # Fetch volumes for each asset at the given hour
    asset_volumes = [(asset, asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0]) 
                     for asset in possible_assets]

    # Sort assets by volume in descending order
    asset_volumes.sort(key=lambda x: x[1], reverse=True)

    # Find a feasible combination
    feasible_combination = []
    total_volume = 0
    for asset, volume in asset_volumes:
        feasible_combination.append(asset)
        total_volume += volume
        if total_volume >= market.min_volume:
            break

    return feasible_combination



In [None]:
import random

def get_n_portfolios_for_market(possible_assets : [new_meters.PowerMeter], market : final_markets.ReserveMarket, hour : pd.Timestamp, top_n=100, iterations=100):
    # Fetch volumes for each asset at the given hour
    asset_volumes = [(asset, asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0]) 
                     for asset in possible_assets]

    # Sort assets by volume in descending order
    asset_volumes.sort(key=lambda x: x[1], reverse=True)

    # Select top N assets
    top_assets = asset_volumes[:top_n]

    feasible_combinations = set()

    for _ in range(iterations):
        # Randomly sample a smaller subset from top assets
        sampled_assets = random.sample(top_assets, k=random.randint(1, top_n))

        # Greedy addition to meet minimum volume
        combination, total_volume = [], 0
        for asset, volume in sampled_assets:
            combination.append(asset)
            total_volume += volume
            if total_volume >= market.min_volume:
                feasible_combinations.add(tuple(combination))
                break

    return list(feasible_combinations)


### The get_portfolio_for_market function is useless even though I only have NO5 assets and markets. The function is way to computationally expensive. I will have to find a way to make it more efficient or find another way of defining the action space

### It will be important to notice that the bidding deadlines are not the same for all markets. This will have to be taken into account when training the model. I will have to make some sort of iteration process that will make the model learn the bidding deadlines for each market and how this affects the bidding process. I must take in to account that the bids is not necessarily being accepted and i will have to make a method where the model will recieve a signal if the bid is accepted or not before it makes the next bid. 

### Think it will be a good idea to use RL for choosing which markets that will be bid in to and then using some sort of heuristic to determine which of the assets that will be bid in to the chosen market(s).

In [None]:
def get_all_feasible_portfolios(M : [final_markets.ReserveMarket], L : [new_meters.PowerMeter], H : [pd.Timestamp]):
    
    portfolios = {}
    for market in M:
        possible_assets = [asset for asset in L if asset in compatible_dict[market]]
        for hour in H:
            if market.direction == "up":
                # check the compatibility for the assets
                possible_volume = sum([asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0] for asset in possible_assets])
                if possible_volume < market.min_volume:
                    possible_assets = []
                else:
                    # check all possible combination of assets and see if the total volume of the assets in the combination is higher than min_volume
                    feasible_combinations = get_portfolios_for_market(possible_assets, market, hour)
                    # Add feasible combinations to the portfolio dictionary
                    portfolios[(market, hour)] = feasible_combinations
                    
                
                #print(f"possible_volumes: {possible_volumes}")
                possible_revenue, possible_assets = check_constraints_for_hour(possible_assets, hour ,possible_volume , market)
            elif market.direction == "down":
                possible_volume = sum([asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0] for asset in possible_assets])
                #print(f"possible_volumes: {possible_volumes}")

                possible_revenue, possible_assets = check_constraints_for_hour(possible_assets, hour, possible_volume, market)
            else:        
                possible_up_volume = sum([asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0] if asset.direction != "down" else 0 for asset in possible_assets])
                possible_down_volume = sum([asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0] if asset.direction != "up" else 0 for asset in possible_assets])
                #print(f"possible_up_volumes: {possible_up_volumes}")
                #print(f"possible_down_volumes: {possible_down_volumes}")
                # possible volumes should be != 0 if both up and down volume is higher than min_volume. If both vols are higher than min_volume, then the actuale volume should be decided by the lowest one
                actual_volumes = min(possible_up_volume, possible_down_volume)
                #print(f"actual_volumes: {actual_volumes}")
                # Find the hours where both up and down volume is higher than min_volume
                possible_volume = actual_volumes if possible_up_volume >= market.min_volume and possible_down_volume >= market.min_volume else 0 
                #possible_volumes = np.where((possible_up_volumes >= min_vols and possible_down_volumes >= min_vols), actual_volumes, 0) 
                #print(f"possible_volumes: {possible_volumes}")
                possible_revenue, possible_assets = check_constraints_for_hour(possible_assets, hour, possible_volume, market)
                """print(f"possible_revenues: {possible_revenues}")
                print(f"possible_assets: {possible_assets}")"""

    
    return portfolios

In [None]:
def get_features(day_of_week, hour_of_day, available_assets, possible_markets, hour):
    
    return [day_of_week, hour_of_day, available_assets, possible_markets]

In [None]:
def train_model(epsilon, alpha, weights, num_episodes, L, M, H):
    """ TRaining function to learn how to bid in to the markets due to the bidding constraints and the asset constraints as well as optimizing the weights. The function will learn how to bid in to the markets by updating the Q-table. 
    The Q-table will be updated by the Sarsa algorithm. The algorithm will be implemented with an epsilon-greedy policy. The algorithm will be implemented with a function to find the feasible portfolios of assets to bid in to the markets. The algorithm will be implemented with a function to place the bids for the feasible portfolios of assets to bid in to the markets. The algorithm will be implemented with a function to check the constraints for the feasible portfolios of assets to bid in to the markets. The algorithm will be implemented with a function to get the possible dates for placing a bid given the current date. The algorithm will be implemented with a function to get the greedy action. The algorithm will be implemented with a function to get the random action. The algorithm will be implemented with a function to get the possible dates for placing a bid given the current date. 
    

    Args:
        epsilon (float): float number between 0 and 1, often close to 0
        alpha (float): float number between 0 and 1, also known as the learning rate
        num_episodes (int): number of episodes to be ran
        L ([new_meters.PowerMeter]): list of PowerMeter objects
        M ([final_markets.Reservemarket]): list of ReserveMarket objects
        H ([pd.Timestamp]): list of timestamps

    Returns:
        bids (dict): dictionary that holds control over the final bids for each market and each hour
        revenues (dict): dictionary that holds the revenue for each episode
        available_assets (dict): dictionary that holds the available assets for each hour
        Q (np.array): numpy array that holds the Q-values for each state-action pair
    """
    revenues = {}
    bid_timeframe = H[24:] # the hours where bids can be placed in
    place_bid_hours = [hour for hour in H[:-48] if hour.hour == 7 or hour.hour == 17 or hour.hour == 18] # the hours where bids can be placed from

    sup_market_names = ["FCR", "aFRR"]
    markets = [market for market in M if sup_market_names[0] in market.name  or sup_market_names[1] in market.name]
    market_names = [market.name for market in markets]
    # will only use FCR and aFRR
    
    available_assets = {hour: L.copy() for hour in bid_timeframe} 
    bids = {(market.name, hour): [] for hour in bid_timeframe for market in markets}
    n_actions = len(L) * len(market_names)
    #Q = np.zeros((7, 24, len(L)+1, len(market_names)+1 )) # day of week, hour of day, available assets, possible markets, assets already bid
   
    (possible_hours, market_name) = get_possible_dates(place_bid_hours[0])
    
    "an action should be to bid in to one of the possible markets returned from get_possible_dates"
    # will have to make a slice of Q where only the indexes for the possible markets are included
    possible_markets = [m for m in markets if market_name in m.name]
    indexes = [market_names.index(m.name) for m in possible_markets]
    #q_0 = Q[possible_hours[0].weekday(), possible_hours[0].hour, :, indexes]
    features = get_features(possible_hours[0].weekday(), possible_hours[0].hour, available_assets= available_assets[possible_hours[0]], possible_markets = indexes)
    
    action_0 = greedy_action(q_0, epsilon, )
    value_0 = Q[possible_hours[0].weekday(), possible_hours[0].hour, len(available_assets[possible_hours[0]]), action_0]
    # extract hours from H where hour == 7, 17, 18
    expected_prices = {}
    for directions in ["up", "down", "both"]:
        for area in ["NO1", "NO2", "NO3", "NO4", "NO5"]:
            for hour in bid_timeframe:
                expected_prices[(directions, area, hour)] = np.mean([market.price_data.loc[market.price_data["Time(Local)"] == hour].values[0][1] for market in markets if market.area == area and market.direction == directions])
            
    for episode_n in range(num_episodes):
        if episode_n > 0:
            epsilon -= 0.02
            alpha -= 0.001
        revenue = 0
        bids = {(market.name, hour): [] for hour in bid_timeframe for market in markets}
        available_assets = {hour: L.copy() for hour in bid_timeframe}
        for place_hour in place_bid_hours:
            (possible_hours, market_name) = get_possible_dates(place_hour)
            if len(possible_hours) != 24:
                #print(f"No bids for {hour}")
                #print(f"possible_hours: {len(possible_hours)}")
                continue
            possible_markets = [m for m in markets if market_name in m.name]
            indexes = [market_names.index(m.name) for m in possible_markets]
            for market in possible_markets: # This will turn in to a problem as it should be possible to place several bids for each hour, but the Q-table is only updated once (for one market) for each hour
                # if i continue to do it like this, i must change the action to not choose the market, but for each market for each hour choose the number of assets to bid
                # maybe ill change the actions to rather be the number of assets to bid. If I do this, I must be secure that the number of assets to bid still holds all of the constraints.
                    # If I do this, i should change the espilon-greedy function to change between the valid actions for each hour for each market
                    # The epsilon greedy function must find each combination of feasible number of assets and to each market for each hour. Maybe I should precompute this and store it in a dictionary
                for h, bid_hour in enumerate(possible_hours):
                    (possible_revenues, possible_assets) = place_hourly_bids(market=market, available_assets=available_assets[bid_hour], hour=bid_hour, expected_price=expected_prices[h])
                    revenue += np.sum(possible_revenues)
                    # Select assets with positive revenue for bidding
                    assets_to_bid = [asset for asset in possible_assets if possible_revenues > 0]
                    # Flatten the list if it's a list of lists
                    #assets_to_bid = [item for sublist in assets_to_bid for item in sublist]
                    # Store the bid information
                    bids[(market.name, bid_hour)] = [asset.meter_id for asset in assets_to_bid]
                    # Update available assets
                    available_assets[bid_hour] = [asset for asset in available_assets[bid_hour] if asset not in assets_to_bid]
                    q_1 = Q[bid_hour.weekday(), bid_hour.hour, len(available_assets[bid_hour]), indexes]
                    action_1 = greedy_action(q_1, epsilon)
                    value_1 = Q[bid_hour.weekday(), bid_hour.hour, len(available_assets[bid_hour]), action_1]
                    Q[bid_hour.weekday(), bid_hour.hour, len(available_assets[bid_hour]), action_0] = value_0 + alpha * (possible_revenues + value_1 - value_0)
                    action_0, value_0 = action_1, value_1
        revenues[episode_n] = revenue
    return bids, revenues, available_assets, Q