### In this file I will try to further develop the model and use the inspiration from the previous bidding files. The goal is to end up with a deep reinforcement learning model that can handle the bidding process and be able to make bids without knowing the prices. 

In [1]:
import gurobipy as gp
import pandas as pd
from code_map import final_markets, new_meters, utils, weather, timeframes
import numpy as np
from datetime import datetime, timedelta
from collections import defaultdict, Counter
import random
import sklearn

In [2]:
tf = timeframes.one_week

In [3]:
L, M, F, H, freq_data, power_meter_dict, consumption_data = utils.get_all_sets(timeframe= tf, areas = ["NO5"])

In [4]:
L_u, L_d, Fu_h_l, Fd_h_l, R_h_l, P_h_m, Vp_h_m, Vm_m, R_m = utils.get_parameters(L = L, M = M, H = H)

In [5]:
Ir_hlm, Ia_hlm, Va_hm = utils.get_income_dictionaries(H=H, L = L, M = M, freq_data= freq_data, Fu_h_l= Fu_h_l, Fd_h_l= Fd_h_l, P_h_m= P_h_m, Vp_h_m= Vp_h_m, F = F, markets_dict = {market.name : market for market in M}, timeframe = tf, areas = ["NO5"])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.sort_values(by = "Time", inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df.sort_values(by = "Time", inplace = True)


In [6]:
compatible_dict = utils.get_compatibility_dict(L = L ,M = M, index = False)

In [7]:
len(L)

960

In [8]:
def random_arg_max(possible_actions):
    imax = 0
    xmax = possible_actions[imax]  # Current maximum
    nmax = 1  # Number of maximum values at the moment
    for i in range(1, len(possible_actions)):
        if possible_actions[i] == xmax:
            nmax += 1
            if nmax * random.random() < 1.0:
                imax = i
        elif possible_actions[i] > xmax:
            nmax = 1  # Reset count since a new maximum is found
            imax = i
            xmax = possible_actions[i]  # Update the new maximum
    return imax


In [9]:
def greedy_action(possible_actions : list, epsilon : float ):
    """returns the index of the greedy action

    Args:
        possible_actions (lsit): list of the possible actions
        epsilon (float): float number between 0 and 1, often close to 0

    Returns:
        int: index of the greedy action
    """
    if random.random() <= (1- epsilon): # pick greedy
        return random_arg_max(possible_actions)
    else:
        return random.randint(0, len(possible_actions)-1) # random


In [10]:
def get_expected_prices_and_volumes_dict(bid_timeframe, markets):
    """ function to calculate the expected prices for each direction and area in the bid_timeframe

    Args:
        bid_timeframe (_type_): _description_
        markets (_type_): _description_

    Returns:
        _type_: _description_
    """
    expected_prices = {}
    expected_volumes = {}
    for directions in ["up", "down", "both"]:
        for area in ["NO1", "NO2", "NO3", "NO4", "NO5"]:
            for hour in bid_timeframe:
                expected_prices[(directions, area, hour)] = np.mean([market.price_data.loc[market.price_data["Time(Local)"] == hour].values[0][1] for market in markets if market.area == area and market.direction == directions])
                expected_volumes[(directions, area, hour)] = np.mean([market.volume_data.loc[market.volume_data["Time(Local)"] == hour].values[0][1] for market in markets if market.area == area and market.direction == directions])
    return expected_prices, expected_volumes
    

In [11]:
sup_market_names = ["FCR", "aFRR"]
markets = [market for market in M if sup_market_names[0] in market.name  or sup_market_names[1] in market.name]
exp_price_dict, exp_vol_dict = get_expected_prices_and_volumes_dict(bid_timeframe= H, markets= markets)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [12]:
exp_price_dict[("up", "NO5", H[4])]

3.8666666666666667

In [13]:
exp_vol_dict[("up", "NO5", H[4])]

2.0

#### Will have to determine which type of network to use, and how to implement it. May look into the INF265 tasks for inspiration. Deep Feedforward Network (Multilayer Perceptron) is interesting. May look in to attention mechanisms and how to implement them. Also actor-critic architectures is interesting. May end up in a hybrid model.

In [14]:
def get_possible_dates(date : pd.Timestamp):
    """ Function to get the possible dates for placing a bid given the current date

    Args:
        date (pd.Timestamp): the current date

    Returns:
        (pd.date_range, str): the possible dates for placing a bid and for which market
    """
    if date.hour == 17: # FCR D-2
        return (pd.date_range(date + timedelta(days=1) + timedelta(hours=7), date + timedelta(days = 2) + timedelta(hours = 6), freq='H', tz = "Europe/Oslo"), "D_2")
    elif date.hour == 7: # aFRR
        return (pd.date_range(date + timedelta(hours = 17), date + timedelta(days = 1) + timedelta(hours = 16), freq='H', tz = "Europe/Oslo"), "aFRR")
    elif date.hour == 18: # FCR D-1
        return (pd.date_range(date + timedelta(hours=6), date + timedelta(days = 1) + timedelta(hours = 5), freq='H', tz = "Europe/Oslo"), "D_1")
    else:
        return ([], "No bids")

In [15]:
def get_feasible_portfolio_for_market(possible_assets : [new_meters.PowerMeter], market : final_markets.ReserveMarket, hour : pd.Timestamp):
    """This function will return one feasible combination quickly.

    Args:
        possible_assets ([new_meters.PowerMeter]): list of possible assets for the given market and given hour
        market (final_markets.ReserveMarket): the market for which the portfolio is to be found
        hour (pd.Timestamp): the hour for which the portfolio is to be found

    Returns:
        list(new_meters.PowerMeter): the feasible portfolio that will be used for the given market in the given hour
    """
   
    # Fetch volumes for each asset at the given hour
    feasible_assets = [asset for asset in possible_assets if asset in compatible_dict[market]] # exclude assets that are not compatible with the given market
    # Fetch volumes for each asset at the given hour
    asset_volumes = []
    for asset in feasible_assets:
        if market.direction == "both":
            if asset.direction == "both":
                vol = min(asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0], asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0])
                asset_volumes.append((asset, vol))
            else:
                vol = asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0] if asset.direction == "up" else asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0]
                asset_volumes.append((asset, vol))
        else:
            vol = asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0] if market.direction == "up" else asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0]
            asset_volumes.append((asset, vol))
            

    # Sort assets by volume in descending order
    asset_volumes.sort(key=lambda x: x[1], reverse=True)

    # Find a feasible combination
    feasible_combination = []
    total_volume = 0
    for asset, volume in asset_volumes:
        feasible_combination.append(asset)
        total_volume += volume
        if total_volume >= market.min_volume:
            break

    return feasible_combination, total_volume



Idea for new way to define flex volume for each asset when not knowing the actual consumption for given hour. Due to the strong patterns in the consumption i can find the average consumption for each load for every hour of day and day of week as well as seasonal variations. Then i can find some sort of variance as well to define the flex volume as a range. This will be done for each load. Should ask Trond to see if this is a good idea and get some help from him. 

To do this I need a bigger dataset of the consumption. As per now i only have consumption data for june. Maybe I have to do this in databricks. Should talk to Trond about this. 

In [16]:
def get_n_portfolios_for_market(possible_assets : [new_meters.PowerMeter], market : final_markets.ReserveMarket, hour : pd.Timestamp, top_n=100, iterations=100):
    """function to find a diverse and large subset of feasible combinations without the computational overhead of checking all possible combinations

    Args:
        possible_assets ([new_meters.PowerMeter]): list of possible assets for the given market and given hour
        market (final_markets.ReserveMarket): the market for which the portfolio is to be found
        hour (pd.Timestamp): the hour for which the portfolio is to be found
        top_n (int, optional): controls the number of top assets to consider. Defaults to 100.
        iterations (int, optional):  determines how many different combinations to try and generate. Defaults to 100.

    Returns:
        feasible_combinations: dict of top_n feasible combinations of assets that can be bid to the given market in the given hour where the values holds the portfolio's aggregated volume
    """
    feasible_assets = [asset for asset in possible_assets if asset in compatible_dict[market]] # exclude assets that are not compatible with the given market
    # Fetch volumes for each asset at the given hour
    
    asset_volumes = []
    for asset in feasible_assets:
        if market.direction == "both":
            if asset.direction == "both":
                vol = min(asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0], asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0])
                asset_volumes.append((asset, vol))
            else:
                vol = asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0] if asset.direction == "up" else asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0]
                asset_volumes.append((asset, vol))
        else:
            vol = asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0] if market.direction == "up" else asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0]
            asset_volumes.append((asset, vol))
            

    # Sort assets by volume in descending order
    asset_volumes.sort(key=lambda x: x[1], reverse=True)

    # Select top N assets
    top_assets = asset_volumes[:top_n]
    
    #feasible_combinations = set()
    feasible_combinations = {}
    if len(top_assets) > 0:
        for _ in range(iterations):
            # Randomly sample a smaller subset from top assets
            num_assets_to_sample = min(len(top_assets), random.randint(1, top_n))

            sampled_assets = random.sample(top_assets, k=num_assets_to_sample)

            # Greedy addition to meet minimum volume
            combination, total_volume = [], 0
            for asset, volume in sampled_assets:
                combination.append(asset)
                total_volume += volume
                if total_volume >= market.min_volume:
                    #feasible_combinations.add((tuple(combination), total_volume))
                    feasible_combinations[tuple(combination)] = total_volume
                    break
    
    return feasible_combinations


### The get_portfolio_for_market function where I try to find each and every combination of assets for a given market in a given hour is useless even though I only have NO5 assets and markets. The function is way to computationally expensive. I will have to find a way to make it more efficient or find another way of defining the action space

### It will be important to notice that the bidding deadlines are not the same for all markets. This will have to be taken into account when training the model. I will have to make some sort of iteration process that will make the model learn the bidding deadlines for each market and how this affects the bidding process. I must take in to account that the bids is not necessarily being accepted and i will have to make a method where the model will recieve a signal if the bid is accepted or not before it makes the next bid. 

### Think it will be a good idea to use RL for choosing which markets that will be bid in to and then using some sort of heuristic to determine which of the assets that will be bid in to the chosen market(s).

In [22]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [28]:
weather_data = weather.get_weather_data(tf= tf, areas = ["NO5"])

In [33]:
def normalize_weather_data(weather_data, scaler = sklearn.preprocessing.MinMaxScaler()):
    data = weather_data.copy()
    data["precipitation"] = scaler.fit_transform(data[["precipitation"]])
    data["air_temp"] = scaler.fit_transform(data[["air_temp"]])
    return data


In [34]:
norm_w_df = normalize_weather_data(weather_data= weather_data)

In [35]:
norm_w_df

Unnamed: 0,Time (Local),precipitation,air_temp,area
1,2023-06-19 00:00:00+02:00,0.0,0.645833,NO5
7,2023-06-19 01:00:00+02:00,0.0,0.611111,NO5
12,2023-06-19 02:00:00+02:00,0.0,0.541667,NO5
15,2023-06-19 03:00:00+02:00,0.0,0.520833,NO5
21,2023-06-19 04:00:00+02:00,0.0,0.520833,NO5
...,...,...,...,...
817,2023-06-25 19:00:00+02:00,0.0,0.861111,NO5
820,2023-06-25 20:00:00+02:00,0.0,0.854167,NO5
829,2023-06-25 21:00:00+02:00,0.0,0.736111,NO5
830,2023-06-25 22:00:00+02:00,0.0,0.576389,NO5


In [None]:
spot_path = "../master-data/spot_data/spot_june_23.csv"

day_ahead_data = final_markets.preprocess_spot_data(pd.read_csv(spot_path), year = tf.year, start_month = tf.start_month, end_month = tf.end_month, start_day = tf.start_day, end_day = tf.end_day, start_hour = tf.start_hour, end_hour = tf.end_hour, area = "NO5")
scaler = sklearn.preprocessing.MinMaxScaler()
day_ahead_data["settlement"] = 

In [None]:
day_ahead_data.head()

In [None]:
def get_features(bid_hour : pd.Timestamp, available_assets : [new_meters.PowerMeter], market : final_markets.ReserveMarket):
    """ Function to get the features for the given hour and market. It is important to use features that will help the model learn which actions to take 
        and update the weights correctly for the given state.
        The features that possibly can be used here are the following:
        - day of week
        - hour of day
        - number of possible assets
        - Day Ahead (DA) price
        - Weather forecast
        - Market historical prices
        - Market historical volumes
        - frequency data (historical)
        
    Args:
        available_assets (new_meters.PowerMeter]): _description_
        hour (pd.Timestamp): _description_
        market (final_markets.ReserveMarket): _description_

    Returns:
        _type_: _description_
    """
    day_of_week = bid_hour.weekday()
    hour_of_day = bid_hour.hour
    expected_price = exp_price_dict[(market.direction, market.area, bid_hour)]
    expected_volume = exp_vol_dict[(market.direction, market.area, bid_hour)]
    precipitation = weather_data["precipitation"].loc[weather_data["Time(Local)"] == bid_hour]
    temperature = weather_data["temperature"].loc[weather_data["Time(Local)"] == bid_hour]
    da_price = day_ahead_data["settlement"].loc[day_ahead_data["Time(Local)"] == bid_hour]
    
    
    return [day_of_week, hour_of_day, expected_price, expected_volume, len(available_assets), precipitation, temperature, da_price]

In [None]:
def get_income_for_portfolio(volume : float, market : final_markets.ReserveMarket, hour : pd.Timestamp):
    """function to calculate the income for a given portfolio

    Args:
        portfolio ([new_meters.PowerMeter]): the portfolio for which the income is to be calculated
        market (final_markets.ReserveMarket): the market for which the income is to be calculated
        hour (pd.Timestamp): the hour for which the income is to be calculated

    Returns:
        float: the income for the given portfolio
    """
    
    return volume * market.price_data.loc[market.price_data["Time(Local)"] == hour].values[0][1]


In [None]:
def make_bid(market : final_markets.ReserveMarket, hour : pd.Timestamp, action : int, possible_assets : [new_meters.PowerMeter]):
    """Function to make a bid to a given market in a given hour

    Args:
        market (final_markets.ReserveMarket): the market to bid to
        hour (pd.Timestamp): the hour to bid to
        action (int): the index of the portfolio to bid
        possible_assets ([new_meters.PowerMeter]): list of possible assets for the given market and given hour

    Returns:
        portfolio (list(new_meters.PowerMeter)): the portfolio that was bid
        income (float): the income for the portfolio in the given market at the given hour
    """
    if action == 0:
        return [], 0
    elif action == 1:
        portfolio_dict = get_n_portfolios_for_market(possible_assets = possible_assets, market = market, hour = hour, top_n=100, iterations=100)
        portfolio = list(portfolio_dict.keys())[0] if len(portfolio_dict.keys()) > 0 else []
        volume = portfolio_dict[portfolio] if len(portfolio_dict.keys()) > 0 else 0
        return portfolio, get_income_for_portfolio(volume, market, hour)
    else:
        portfolio, volume = get_feasible_portfolio_for_market(possible_assets = possible_assets, market = market, hour = hour)
        return portfolio, get_income_for_portfolio(volume, market, hour)

In [None]:
def train_model(epsilon, alpha, weights, num_episodes, L, M, H):
    """ Training function to learn how to bid in to the markets due to the bidding constraints and the asset constraints as well as optimizing the weights. The function will learn how to bid in to the markets by updating the weights due to the features. 
    

    Args:
        epsilon (float): float number between 0 and 1, often close to 0
        alpha (float): float number between 0 and 1, also known as the learning rate
        num_episodes (int): number of episodes to be ran
        L ([new_meters.PowerMeter]): list of PowerMeter objects
        M ([final_markets.Reservemarket]): list of ReserveMarket objects
        H ([pd.Timestamp]): list of timestamps

    Returns:
        bids (dict): dictionary that holds control over the final bids for each market and each hour
        revenues (dict): dictionary that holds the revenue for each episode
        available_assets (dict): dictionary that holds the available assets for each hour
    """
    revenues = {}
    bid_timeframe = H[24:] # the hours where bids can be placed in
    place_bid_hours = [hour for hour in H[:-48] if hour.hour == 7 or hour.hour == 17 or hour.hour == 18] # the hours where bids can be placed from
    bids = {}
    sup_market_names = ["FCR", "aFRR"]
    markets = [market for market in M if sup_market_names[0] in market.name  or sup_market_names[1] in market.name]
    #market_names = [market.name for market in markets]
    # will only use FCR-N, FCR-D and aFRR. The FCR markets are both D-1 and D-2 and the aFRR market is D-1 but it is both up and down
    
    available_assets = {hour: L.copy() for hour in bid_timeframe} 
    #bids = {(market.name, hour): [] for hour in bid_timeframe for market in markets}
    n_actions = 3
    "For each hour, an action should be to either bid in every feasible asset, to bid the minimum volume or to bid nothing"
   
    (possible_hours, market_name) = get_possible_dates(place_bid_hours[0]) # the market_name is not the full name of the market, but rather a substring of the full name. Since FCR-D and FCR-N has the same deadlines there will be two markets with the same substring
    
    possible_markets = [m for m in markets if market_name in m.name] # will either be one or two markets
    #indexes = [market_names.index(m.name) for m in possible_markets] # the indexes for each market. This is used to slice the Q-table so it is not necessary in this case
    features = get_features(bid_hour = possible_hours[0], available_assets= available_assets[possible_hours[0]], market = possible_markets[0])
    
    action_0 = random.randint(0, n_actions-1)
    action_list = [] # list to keep track of the actions taken.
    
    # extract hours from H where hour == 7, 17, 18
    
    
    for episode_n in range(num_episodes):
        if episode_n > 0:
            epsilon -= 0.02
            alpha -= 0.001
        revenue = 0
        #bids = {(market.name, hour): [] for hour in bid_timeframe for market in markets}
        available_assets = {hour: L.copy() for hour in bid_timeframe}
        for place_hour in place_bid_hours:
            (possible_hours, market_name) = get_possible_dates(place_hour)
            if len(possible_hours) != 24:
                #print(f"No bids for {place_hour}")
                #print(f"possible_hours: {len(possible_hours)}")
                break
            possible_markets = [m for m in markets if market_name in m.name] # because i am now using only no5 markets, this list should be of length 1 or 2.
            #indexes = [market_names.index(m.name) for m in possible_markets]
            #print(f"place_hour : {place_hour}")
            for current_market in possible_markets:
                #print(f"current_market : {current_market.name}")
                for bid_hour in possible_hours:
                    #print(f"bid_hour : {bid_hour}")
                    #print(f" action_0 : {action_0}")
                    chosen_portfolio, reward = make_bid(current_market, bid_hour, action_0, available_assets[bid_hour]) # get portfolio and reward for a bid
                    #print(f"reward for {bid_hour} is {reward}")
                    available_assets[bid_hour] = [asset for asset in available_assets[bid_hour] if asset not in chosen_portfolio] # remove the assets that were bid from the available assets
                    bids[(current_market.name, bid_hour)] = chosen_portfolio # add the bid to the bids dictionary
                    new_features = get_features(bid_hour = bid_hour, available_assets= available_assets[bid_hour], market = current_market) # update the features
                    
                    possible_actions = [np.dot(weights[action], new_features) for action in range(n_actions)] 
                    
                    new_action = greedy_action(possible_actions, epsilon)
                    
                    action_list.append(new_action)
                    
                    delta = reward + np.dot(weights[new_action], new_features) - np.dot(weights[action_0], features)
                    
                    #weights = weights + alpha * delta * np.array(features + [action_0])
                    weights[action_0] = weights[action_0] + alpha * delta * features[action_0] # * features???
                    
                    features, action_0 = new_features, new_action
                    revenue += reward
                    
                    
        revenues[episode_n] = revenue
    return bids, revenues, available_assets, weights, action_list

In [None]:
def initialize_weights(n_features :int , n_actions : int):
    return [np.zeros((n_features)) for _ in range(n_actions)]

In [None]:
initialize_weights(3, 3)

In [None]:
bids, revenues, available_assets, weights, action_list = train_model(epsilon = 0.2, alpha = 0.1, weights = initialize_weights(5, 3), num_episodes = 1, L = L, M = M, H = H)

In [None]:
revenues

In [None]:
bids.keys()