#### This file should build on what i made in v_2 and make it more in to a RL problem. I will have to define an episode, the state space and different states. The actions and the reward function has to be made as well.

In [3]:
import gurobipy as gp
import pandas as pd
from code_map import final_markets, new_meters, utils, data_handling, timeframes
import numpy as np
from datetime import datetime, timedelta
from collections import defaultdict, Counter
import random

In [4]:
L, M, F, H, freq_data, power_meter_dict, consumption_data, L_u, L_d, Fu_h_l, Fd_h_l, R_h_l, P_h_m, Vp_h_m, Vm_m, R_m, dominant_directions, Ir_hlm, Ia_hlm, Va_hm, compatible_list = data_handling.load_collections("./half_month_collections.pkl")

In [6]:
compatible_dict = utils.get_compatibility_dict(L = L ,M = M, index = False)

One episode should be for one week. That means that I should use have hours from two days before the week starts until the saturday. 

In [None]:
def random_arg_max(possible_actions):
    imax = 0
    xmax = possible_actions[imax]  # Current maximum
    nmax = 1  # Number of maximum values at the moment

    for i in range(1, len(possible_actions)):
        if possible_actions[i] == xmax:
            nmax += 1
            if nmax * random.random() < 1.0:
                imax = i
        elif possible_actions[i] > xmax:
            nmax = 1  # Reset count since a new maximum is found
            imax = i
            xmax = possible_actions[i]  # Update the new maximum

    return imax


In [None]:
def greedy_action(Q : np.array, epsilon : float ):
    """ Function to get the possible dates for placing a bid given the current date
    One action should be to locate a given amount of assets in to one market. the mininmum amount of assets is as many is needed to cover the demand in the market. The maximum amount of assets is the total amount of assets. The action is chosen greedily with respect to the expected reward of the action. The expected reward is the expected reward of the action given the current state. 
    The current state is the current date and the current location of the assets. The expected reward

    Args:
        date (pd.Timestamp): the current date

    Returns:
        (pd.date_range, str): the possible dates for placing a bid and for which market
    """
    if random() <= (1- epsilon): # pick greedy
        return random_arg_max(Q)
    else:
        return random.randint(len(Q)) # random


In [7]:
def get_possible_dates(date : pd.Timestamp):
    """ Function to get the possible dates for placing a bid given the current date

    Args:
        date (pd.Timestamp): the current date

    Returns:
        (pd.date_range, str): the possible dates for placing a bid and for which market
    """
    if date.hour == 17: # FCR D-2
        return (pd.date_range(date + timedelta(days=1) + timedelta(hours=7), date + timedelta(days = 2) + timedelta(hours = 6), freq='H', tz = "Europe/Oslo"), "D_2")
    elif date.hour == 7: # aFRR
        return (pd.date_range(date + timedelta(hours = 17), date + timedelta(days = 1) + timedelta(hours = 16), freq='H', tz = "Europe/Oslo"), "aFRR")
    elif date.hour == 18: # FCR D-1
        return (pd.date_range(date + timedelta(hours=6), date + timedelta(days = 1) + timedelta(hours = 5), freq='H', tz = "Europe/Oslo"), "D_1")
    else:
        return ([], "No bids")

In [8]:
def check_constraints_for_hour(possible_assets : [[new_meters.PowerMeter]], hour : pd.Timestamp, possible_volume : float, expected_price : float, market : final_markets.ReserveMarket):
            
    max_vol = market.volume_data.loc[market.volume_data["Time(Local)"] == hour].values[0][1]  # set of volumes for markets
    constrained_vol = possible_volume if possible_volume >= market.min_volume and possible_volume < max_vol else 0
    # get the prices for the given market within the given hours
    possible_price = market.price_data.loc[market.price_data["Time(Local)"] == hour].values[0][1]  #price for markets
    # Calculate the possible revenues
    """print(f"possible_prices in function : {possible_prices}")
    print(f"expected_prices in function : {expected_prices}")"""
    bids_to_be_made = possible_price if possible_price >= expected_price else 0
    """print(f"bids_to_be_made in function : {bids_to_be_made}")
    print(f"possible_volumes in function : {constrained_vols}")"""
    possible_revenue = constrained_vol * bids_to_be_made
    
    #print(f"possible_revenues in function : {possible_revenues}")
    possible_assets = possible_assets if possible_revenue > 0 else [] 
    #print(f"possible_assets in function : {possible_assets}")
    return possible_revenue, possible_assets

In [9]:
def place_hourly_bids(hour : pd.Timestamp, expected_price : float, available_assets : dict, market : final_markets.ReserveMarket):
    """ Function to place bids for a given market and set of hours. The bids are placed for every hour in the set of hours. The bids are placed for the assets that are not already bid in to other markets in the given hours. The bids are placed for the assets that are compatible with the given market.

    Args:
        possible_hours (pd.Timestamp]): The time stamps for which the bids are placed
        available_assets (dict): Dictionary with the available assets for each hour
        market (final_markets.ReserveMarket): the market to be bid in to

    Returns:
        tuple (np.array, np.array): The possible revenue of the placed bids for each hour and the assets which are bid for each hour
    """
    #hourly_assets = [available_assets[hour] for hour in possible_hours]
    possible_assets = [asset for asset in possible_assets if asset in compatible_list[market]]

    if market.direction == "up":
        # check the compatibility for the assets
        possible_volume = sum([asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0] for asset in possible_assets])
        #print(f"possible_volumes: {possible_volumes}")
        possible_revenue, possible_assets = check_constraints_for_hour(possible_assets, hour ,possible_volume,  expected_price , market)
    elif market.direction == "down":
        possible_volume = sum([asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0] for asset in possible_assets])
        #print(f"possible_volumes: {possible_volumes}")

        possible_revenue, possible_assets = check_constraints_for_hour(possible_assets, hour, possible_volume, expected_price, market)
    else:        
        possible_up_volume = sum([asset.up_flex_volume["value"].loc[asset.up_flex_volume["Time(Local)"] == hour].values[0] if asset.direction != "down" else 0 for asset in possible_assets])
        possible_down_volume = sum([asset.down_flex_volume["value"].loc[asset.down_flex_volume["Time(Local)"] == hour].values[0] if asset.direction != "up" else 0 for asset in possible_assets])
        #print(f"possible_up_volumes: {possible_up_volumes}")
        #print(f"possible_down_volumes: {possible_down_volumes}")
        # possible volumes should be != 0 if both up and down volume is higher than min_volume. If both vols are higher than min_volume, then the actuale volume should be decided by the lowest one
        actual_volumes = min(possible_up_volume, possible_down_volume)
        #print(f"actual_volumes: {actual_volumes}")
        # Find the hours where both up and down volume is higher than min_volume
        possible_volume = actual_volumes if possible_up_volume >= market.min_volume and possible_down_volume >= market.min_volume else 0 
        #possible_volumes = np.where((possible_up_volumes >= min_vols and possible_down_volumes >= min_vols), actual_volumes, 0) 
        #print(f"possible_volumes: {possible_volumes}")
        possible_revenue, possible_assets = check_constraints_for_hour(possible_assets, hour, possible_volume, expected_price, market)
        """print(f"possible_revenues: {possible_revenues}")
        print(f"possible_assets: {possible_assets}")"""
    return (possible_revenue, possible_assets)


In [None]:
def Sarsa(epsilon, n_episodes, alpha, L, M, H):
    
    revenue = 0
    bid_timeframe = pd.date_range(H[-1] + timedelta(hours=1), periods=48, freq='H', tz="Europe/Oslo")
    available_assets = {hour: L.copy() for hour in bid_timeframe}
    bids = {(market.name, hour): [] for hour in bid_timeframe for market in M}
    markets_name_dict = {market.name: market for market in M}
    sup_market_names = ["FCR", "aFRR"]
    market_names = [market.name for market in M if market.name in sup_market_names[0] or market.name in sup_market_names[1]]
    terminal_state = False
    #state space consists of day of week, hour of day, available assets, possible markets
    Q = np.zeros(7, 24, len(L), len(market_names), len(L)) # day of week, hour of day, available assets, possible markets, assets already bid
    (possible_hours, market_name) = get_possible_dates(H[0])
    q_0 = Q[possible_hours[0].weekday(), possible_hours[0].hour, len(available_assets[possible_hours[0]]), :, len(L) - len(available_assets[possible_hours[0]])]
    "an action should be to bid in to on of the possible markets"
    action_0 = greedy_action(q_0, epsilon)
    value_0 = Q[possible_hours[0].weekday(), possible_hours[0].hour, len(available_assets[possible_hours[0]]), action_0, len(L) - len(available_assets[possible_hours[0]])]
    while not terminal_state:
        for hour in H:
            (possible_hours, market_name) = get_possible_dates(hour)
            if len(possible_hours) != 24:
                #print(f"No bids for {hour}")
                #print(f"possible_hours: {len(possible_hours)}")
                continue

            possible_markets = [m for m in M if market_name in m.name]
            for market in possible_markets:
                markets_to_check = [m for m in markets_name_dict.values() if m.area == market.area and m.direction == market.direction]
                expected_prices = np.array([np.mean([market.price_data.loc[market.price_data["Time(Local)"] == hour].values[0][1] for market in markets_to_check]) for hour in possible_hours])
                for h, bid_hour in enumerate(possible_hours):
                    
                    (possible_revenues, possible_assets) = place_hourly_bids(market=market, available_assets=available_assets[hour], hour=hour, expected_price=expected_prices[h])
                    revenue += np.sum(possible_revenues)

                    # Select assets with positive revenue for bidding
                    assets_to_bid = [asset for asset in possible_assets[h] if possible_revenues[h] > 0]
                    # Flatten the list if it's a list of lists
                    #assets_to_bid = [item for sublist in assets_to_bid for item in sublist]
                    # Store the bid information
                    bids[(market.name, bid_hour)] = [asset.meter_id for asset in assets_to_bid]
                    # Update available assets
                    available_assets[bid_hour] = [asset for asset in available_assets[bid_hour] if asset not in assets_to_bid]

    return bids, revenue, available_assets
    
    
    