### In this file I will try to further develop the model and use the inspiration from the previous bidding files. The goal is to end up with a deep reinforcement learning model that can handle the bidding process and be able to make bids without knowing the prices. 

In [1]:
import gurobipy as gp
import pandas as pd
from code_map import final_markets, new_meters, utils, data_handling, timeframes
import numpy as np
from datetime import datetime, timedelta
from collections import defaultdict, Counter
import random

In [2]:
tf = timeframes.one_week

In [4]:
L, M, F, H, freq_data, power_meter_dict, consumption_data = utils.get_all_sets(timeframe= tf, areas = ["NO5"])

In [5]:
L_u, L_d, Fu_h_l, Fd_h_l, R_h_l, P_h_m, Vp_h_m, Vm_m, R_m = utils.get_parameters(L = L, M = M, H = H)

In [6]:
Ir_hlm, Ia_hlm, Va_hm = utils.get_income_dictionaries(H=H, L = L, M = M, freq_data= freq_data, Fu_h_l= Fu_h_l, Fd_h_l= Fd_h_l, P_h_m= P_h_m, Vp_h_m= Vp_h_m, F = F, markets_dict = {market.name : market for market in M}, timeframe = tf, areas = ["NO5"])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.sort_values(by = "Time", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.sort_values(by = "Time", inplace = True)


In [7]:
compatible_dict = utils.get_compatibility_dict(L = L ,M = M, index = False)

In [8]:
len(L)

960

In [9]:
def random_arg_max(possible_actions):
    imax = 0
    xmax = possible_actions[imax]  # Current maximum
    nmax = 1  # Number of maximum values at the moment
    for i in range(1, len(possible_actions)):
        if possible_actions[i] == xmax:
            nmax += 1
            if nmax * random.random() < 1.0:
                imax = i
        elif possible_actions[i] > xmax:
            nmax = 1  # Reset count since a new maximum is found
            imax = i
            xmax = possible_actions[i]  # Update the new maximum
    return imax


In [None]:
def greedy_action(Q : np.array, epsilon : float ):
    """returns the index of the greedy action

    Args:
        Q (np.array): numpy array which is sliced for the actions
        epsilon (float): float number between 0 and 1, often close to 0

    Returns:
        int: index of the greedy action
    """
    if random.random() <= (1- epsilon): # pick greedy
        return random_arg_max(Q)
    else:
        return random.randint(0, len(Q)-1) # random


#### Will have to determine which type of network to use, and how to implement it. May look into the INF265 tasks for inspiration. Deep Feedforward Network (Multilayer Perceptron) is interesting. May look in to attention mechanisms and how to implement them. Also actor-critic architectures is interesting. May end up in a hybrid model.

In [10]:
def get_possible_dates(date : pd.Timestamp):
    """ Function to get the possible dates for placing a bid given the current date

    Args:
        date (pd.Timestamp): the current date

    Returns:
        (pd.date_range, str): the possible dates for placing a bid and for which market
    """
    if date.hour == 17: # FCR D-2
        return (pd.date_range(date + timedelta(days=1) + timedelta(hours=7), date + timedelta(days = 2) + timedelta(hours = 6), freq='H', tz = "Europe/Oslo"), "D_2")
    elif date.hour == 7: # aFRR
        return (pd.date_range(date + timedelta(hours = 17), date + timedelta(days = 1) + timedelta(hours = 16), freq='H', tz = "Europe/Oslo"), "aFRR")
    elif date.hour == 18: # FCR D-1
        return (pd.date_range(date + timedelta(hours=6), date + timedelta(days = 1) + timedelta(hours = 5), freq='H', tz = "Europe/Oslo"), "D_1")
    else:
        return ([], "No bids")

In [16]:
def get_feasible_portfolio_for_market(possible_assets : [new_meters.PowerMeter], market : final_markets.ReserveMarket, hour : pd.Timestamp):
    """This function will return one feasible combination quickly.

    Args:
        possible_assets ([new_meters.PowerMeter]): list of possible assets for the given market and given hour
        market (final_markets.ReserveMarket): the market for which the portfolio is to be found
        hour (pd.Timestamp): the hour for which the portfolio is to be found

    Returns:
        list(new_meters.PowerMeter): the feasible portfolio that will be used for the given market in the given hour
    """
   
    # Fetch volumes for each asset at the given hour
    feasible_assets = [asset for asset in possible_assets if asset in compatible_dict[market]] # exclude assets that are not compatible with the given market
    # Fetch volumes for each asset at the given hour
    asset_volumes = [(asset, asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0]) 
                     for asset in feasible_assets]

    # Sort assets by volume in descending order
    asset_volumes.sort(key=lambda x: x[1], reverse=True)

    # Find a feasible combination
    feasible_combination = []
    total_volume = 0
    for asset, volume in asset_volumes:
        feasible_combination.append(asset)
        total_volume += volume
        if total_volume >= market.min_volume:
            break

    return feasible_combination



In [None]:
def get_n_portfolios_for_market(possible_assets : [new_meters.PowerMeter], market : final_markets.ReserveMarket, hour : pd.Timestamp, top_n=100, iterations=100):
    """function to find a diverse and large subset of feasible combinations without the computational overhead of checking all possible combinations

    Args:
        possible_assets ([new_meters.PowerMeter]): list of possible assets for the given market and given hour
        market (final_markets.ReserveMarket): the market for which the portfolio is to be found
        hour (pd.Timestamp): the hour for which the portfolio is to be found
        top_n (int, optional): controls the number of top assets to consider. Defaults to 100.
        iterations (int, optional):  determines how many different combinations to try and generate. Defaults to 100.

    Returns:
        list(tuple(new_meters.PowerMeter)): list of top_n feasible combinations of assets that can be bid to the given market in the given hour
    """
    feasible_assets = [asset for asset in possible_assets if asset in compatible_dict[market]] # exclude assets that are not compatible with the given market
    # Fetch volumes for each asset at the given hour
    asset_volumes = [(asset, asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0]) 
                     for asset in feasible_assets]

    # Sort assets by volume in descending order
    asset_volumes.sort(key=lambda x: x[1], reverse=True)

    # Select top N assets
    top_assets = asset_volumes[:top_n]

    feasible_combinations = set()

    for _ in range(iterations):
        # Randomly sample a smaller subset from top assets
        sampled_assets = random.sample(top_assets, k=random.randint(1, top_n))

        # Greedy addition to meet minimum volume
        combination, total_volume = [], 0
        for asset, volume in sampled_assets:
            combination.append(asset)
            total_volume += volume
            if total_volume >= market.min_volume:
                feasible_combinations.add(tuple(combination))
                break

    return list(feasible_combinations)


### The get_portfolio_for_market function where I try to find each and every combination of assets for a given market in a given hour is useless even though I only have NO5 assets and markets. The function is way to computationally expensive. I will have to find a way to make it more efficient or find another way of defining the action space

### It will be important to notice that the bidding deadlines are not the same for all markets. This will have to be taken into account when training the model. I will have to make some sort of iteration process that will make the model learn the bidding deadlines for each market and how this affects the bidding process. I must take in to account that the bids is not necessarily being accepted and i will have to make a method where the model will recieve a signal if the bid is accepted or not before it makes the next bid. 

### Think it will be a good idea to use RL for choosing which markets that will be bid in to and then using some sort of heuristic to determine which of the assets that will be bid in to the chosen market(s).

In [None]:
def get_features(day_of_week, hour_of_day, available_assets, hour):
    
    return [day_of_week, hour_of_day, available_assets]

In [None]:
def train_model(epsilon, alpha, weights, num_episodes, L, M, H):
    """ Training function to learn how to bid in to the markets due to the bidding constraints and the asset constraints as well as optimizing the weights. The function will learn how to bid in to the markets by updating the Q-table. 
    The Q-table will be updated by the Sarsa algorithm. The algorithm will be implemented with an epsilon-greedy policy. The algorithm will be implemented with a function to find the feasible portfolios of assets to bid in to the markets. The algorithm will be implemented with a function to place the bids for the feasible portfolios of assets to bid in to the markets. The algorithm will be implemented with a function to check the constraints for the feasible portfolios of assets to bid in to the markets. The algorithm will be implemented with a function to get the possible dates for placing a bid given the current date. The algorithm will be implemented with a function to get the greedy action. The algorithm will be implemented with a function to get the random action. The algorithm will be implemented with a function to get the possible dates for placing a bid given the current date. 
    

    Args:
        epsilon (float): float number between 0 and 1, often close to 0
        alpha (float): float number between 0 and 1, also known as the learning rate
        num_episodes (int): number of episodes to be ran
        L ([new_meters.PowerMeter]): list of PowerMeter objects
        M ([final_markets.Reservemarket]): list of ReserveMarket objects
        H ([pd.Timestamp]): list of timestamps

    Returns:
        bids (dict): dictionary that holds control over the final bids for each market and each hour
        revenues (dict): dictionary that holds the revenue for each episode
        available_assets (dict): dictionary that holds the available assets for each hour
        Q (np.array): numpy array that holds the Q-values for each state-action pair
    """
    revenues = {}
    bid_timeframe = H[24:] # the hours where bids can be placed in
    place_bid_hours = [hour for hour in H[:-48] if hour.hour == 7 or hour.hour == 17 or hour.hour == 18] # the hours where bids can be placed from

    sup_market_names = ["FCR", "aFRR"]
    markets = [market for market in M if sup_market_names[0] in market.name  or sup_market_names[1] in market.name]
    market_names = [market.name for market in markets]
    # will only use FCR-N, FCR-D and aFRR. The FCR markets are both D-1 and D-2 and the aFRR market is D-1 but it is both up and down
    
    available_assets = {hour: L.copy() for hour in bid_timeframe} 
    bids = {(market.name, hour): [] for hour in bid_timeframe for market in markets}
    n_actions = 3
    "For each hour, an action should be to either bid in every feasible asset, to bid the minimum volume or to bid nothing"
   
    (possible_hours, market_name) = get_possible_dates(place_bid_hours[0])
    
    possible_markets = [m for m in markets if market_name in m.name]
    indexes = [market_names.index(m.name) for m in possible_markets] # the indexes for each market. This is used to slice the Q-table so it is not necessary in this case
    #q_0 = Q[possible_hours[0].weekday(), possible_hours[0].hour, :, indexes]
    features = get_features(possible_hours[0].weekday(), possible_hours[0].hour, available_assets= available_assets[possible_hours[0]])
    
    action_0 = random.randint(0, n_actions-1)
    action_list = [] # list to keep track of the actions taken.
    
    # extract hours from H where hour == 7, 17, 18
    """  expected_prices = {}
    for directions in ["up", "down", "both"]:
        for area in ["NO1", "NO2", "NO3", "NO4", "NO5"]:
            for hour in bid_timeframe:
                expected_prices[(directions, area, hour)] = np.mean([market.price_data.loc[market.price_data["Time(Local)"] == hour].values[0][1] for market in markets if market.area == area and market.direction == directions])
            """
    for episode_n in range(num_episodes):
        if episode_n > 0:
            epsilon -= 0.02
            alpha -= 0.001
        revenue = 0
        bids = {(market.name, hour): [] for hour in bid_timeframe for market in markets}
        available_assets = {hour: L.copy() for hour in bid_timeframe}
        for place_hour in place_bid_hours:
            (possible_hours, market_name) = get_possible_dates(place_hour)
            if len(possible_hours) != 24:
                print(f"No bids for {place_hour}")
                print(f"possible_hours: {len(possible_hours)}")
                break
            possible_markets = [m for m in markets if market_name in m.name] # because i am now using only no5 markets, this list should be of length 1
            indexes = [market_names.index(m.name) for m in possible_markets]
            current_market = possible_markets[indexes[0]]
            for h, bid_hour in enumerate(possible_hours):
                # place bid and get a reward !!!
                new_features = get_features(bid_hour.weekday(), bid_hour.hour, available_assets= available_assets[bid_hour])
                possible_actions = [np.dot(weights, np.array(new_features + [action])) for action in range(n_actions)]
                new_action = greedy_action(possible_actions, epsilon)
                action_list.append(new_action)
                delta = reward 
                    
                    
        revenues[episode_n] = revenue
    return bids, revenues, available_assets, Q