In [1]:
import pandas as pd
from datetime import timedelta
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import math
import time
plt.rc('font', size=12)

# Filter out deprecated warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
vic_data = pd.read_csv('../../../data/victoria.csv')

In [3]:
TIME = 'Time (UTC+10)'
PRICE = 'Regions VIC Trading Price ($/MWh)'
GENERATION = 'Regions VIC Trading Total Intermittent Generation (MW)'
DEMAND = 'Regions VIC Operational Demand (MW)'

POWER = 300
CAPACITY = 580
CHARGE_EFF = 90
DISCHARGE_EFF = 90
EFFICIENCY = 0.9
MLF = 0.991
FIXED_OP = 8.1
VAR_OP = 0

CHARGE = -1
DISCHARGE = 1

In [4]:
def create_df(df):
    """ Returns a proper dataframe with columns needed """

    df = df[[TIME, PRICE]]
    
    # Since the first date is at 00:00:00, the first period should be 48
    period = [48]
    x = 1
    while x < len(df):
        for i in range(48):
            period.append(i+1)
            x += 1
    df.insert(1, 'Period', period)
    
    df['raw_power'] = 0
    df['dispatch'] = 0
    df['revenue'] = 0
    df['opening'] = 0
    df['closing'] = 0
    df['decision'] = 0
    
    # I removed the first row because first row of vic is 00:00:00, 
    # which is the last period from the previous year
    first_row = df.loc[0]
    df = df.drop([0], axis=0) 
    
    return df, first_row

In [5]:
def first_cycle(spot_price):
    """ Returns first indexes of the first periods for the first cycles """
    
    max_price = 0
    min_price = 999999

    for i in range(48-6):
        """ Finds which 6 periods have the most sum and least sum 
            by going through 1 to 6, 2 to 7 and so on """

        curr = spot_price.iloc[i:i+6 ,0].sum()

        if curr < min_price:
            min_price = curr
            min_i = i  # Stores the first index of the max period

        if curr > max_price:

            max_price = curr
            max_i = i  # Stores the first index of the min period
            
    min_, max_ = store_index(min_i, max_i)
                
    return min_, max_

In [6]:
def sec_cycle(spot_price, min_index, max_index):
    """ Returns first indexes of the first periods for the second cycles """
    
    remaining = list(spot_price.index)
    sec_min_price = 999999
    sec_max_price = 0
    to_remove = min_index + max_index
    
    # remove all periods after max
    remaining = remaining[:remaining.index(to_remove[-1])+1]
    
    for index in to_remove:
        remaining.remove(index)

    for i in range(len(remaining) - 6):

        # make sure the next six indexes are increment of 1
        if remaining[i] == (remaining[i+5] - 5):

            curr_sum = spot_price.iloc[remaining[i]:remaining[i]+6 ,0].sum()

            if curr_sum < sec_min_price:
                sec_min_price = curr_sum
                sec_min_i = remaining[i]  # Stores the first index of the second max period

            if curr_sum > sec_max_price:

                sec_max_price = curr_sum
                sec_max_i = remaining[i]  # Stores the first index of the second min period
                
    min_, max_ = store_index(sec_min_i, sec_max_i)
    
    min_index += min_
    max_index += max_
                
    return min_index, max_index

In [7]:
def store_index(index1, index2):
    """ Store the rest of the max and min price indexes """
    list1 = []
    list2 = []
    for i in range(6):
        list1.append(index1 + i)
        list2.append(index2 + i)
        
    return list1, list2

In [8]:
def algorithm2(ori_df):
    """ Finds optimal charge and discharge period from the mean """
    
    spot_price = ori_df.groupby(['Period'])[[PRICE]].mean()
    
    # First cycle
    min_index, max_index = first_cycle(spot_price)

    # Second cycle
    # Comment line 11 if only want one cycle
    min_index, max_index = sec_cycle(spot_price, min_index, max_index)

    # the charge and discharge period are fixed in Algorithm 2, +1 to get their periods
    charge_period = list(np.asarray(min_index) + 1)
    discharge_period = list(np.asarray(max_index) + 1)
    
    for i in list(ori_df.index):
        if ori_df.at[i, 'Period'] in charge_period:
            ori_df.at[i, 'decision'] = CHARGE
                
        elif ori_df.at[i, 'Period'] in discharge_period:
            ori_df.at[i, 'decision'] = DISCHARGE

    return ori_df

In [12]:
def get_opencap(i, df):
    """Get the opening battery capacity for every 30-minutes interval 
    Input:
        i : Current row in df
        df : DataFrame with 'opening' and 'closing' column
    Return 
        opening_cap : Opening battery capacity. Integer dtype
    
    """

    if i != 1: #Not the first row
        df.at[i,"opening"] = df.at[i-1,"closing"]
    opening_cap = df.at[i, "opening"]
    
    return opening_cap
    
def get_dispatch(rawPower):
    """ Get the power dispatched for every 30-minutes interval 
    Input:
        rawPower : Current rawPower. Integer Datatype
    Return:
        dispatch : Raw_power dispatched from the market. Integer dtype
    """
    if rawPower < 0:
        eff = 1
    else:
        eff = DISCHARGE_EFF / 100
            
    dispatch = (rawPower / 2) * eff
    return dispatch

def get_closecap(opening_cap, dispatch):
    """Get the battery closing capacity for every 30-minutes interval
    Input:
        opening_cap : Opening battery capacity. Integer dtype
        dispatch : Raw_power dispatched from the market. Integer dtype
    Return:
        closecap : Closing battery capacity. Integer dtype
    """
    if dispatch < 0:
        eff = CHARGE_EFF / 100
    else:
        eff = 100 / DISCHARGE_EFF

    closecap = round(max(0, min((opening_cap - (dispatch * eff)), CAPACITY)), 0)
    return closecap
    
def get_revenue(price, dispatch):
    """ Get the Revenue for every 30-minutes interval
    Input:
        price : Market spot price for electricity. Float dtype
        dispatch : Raw_power dispatched from the market. Integer dtype
    Return:
        revenue : current revenue. Integer dtype
    """
    if dispatch < 0:
        factor = 1/MLF
    else:
        factor = MLF 
    revenue = round(price * dispatch * factor)
    return revenue

def get_power(i, df, opening_cap):
    """ Get the Raw Power for every 30-minutes interval
    Input:
        i : Current row in df
        df : DataFrame with 'charge_forecaset' and 'discharge_forecast' column
    Return:
        opening_cap : Opening battery capacity. Integer dtype
    """
    raw_power = 0
    if (df.at[i,'decision'] == CHARGE):
        raw_power = -min(POWER, (CAPACITY-opening_cap)/EFFICIENCY*2)   # FROM checkpoint3
        
    elif (df.at[i,'decision'] == DISCHARGE):
        raw_power = min(POWER, opening_cap*2)      # FROM Algorithm 2 and 3 and checkpoint3
            
    return raw_power

def calculate(df):
    """ Calculate the Battery Opening, Closing Capacity, Raw Power, Market dispatch 
        and Revenue for the entire df.
    Input:
        df : DataFrame with 'price, opening', 'closing', 'raw_power', 'dispatch'
            and 'revenue' column
    Return :
        df : DataFrame with 'price, opening', 'closing', 'raw_power', 'dispatch'
            and 'revenue' column
    """
    # Go through each 30-minute interval of df
    for i in list(df.index):
        # get current Spot Price
        price = df.at[i, PRICE]
        
        # update opening capacity
        opening_cap = get_opencap(i, df)

        # find raw_power
        rawPower = get_power(i, df, opening_cap)
        df.at[i,'raw_power'] = rawPower

        # find market_dispatch 
        dispatch = get_dispatch(rawPower)
        df.at[i,"dispatch"] = dispatch

        # find closing_capacity   
        df.at[i,"closing"] = get_closecap(opening_cap, dispatch)

        #find revenue        
        df.at[i,"revenue"] = get_revenue(price, dispatch)
    
    return df

In [20]:
def show_result(df):
    """ Print the revenue related information computed from df.
    Input: 
        df : DataFrame with 'revenue' column.
    """
    print("Total revenue in the dataset:", df["revenue"].sum())
    print("Total days in the dataset:", len(df)/48)
    print("Revenue per day:", df["revenue"].sum() / (len(df)/48))
    
    return None

def run_all(ori_df):
    """ Run the entire data pipeline including initialisng (Data Processing), 
    finding the optimal charging and discarging period (Data Modelling), 
    calculating the revenue based on the optimal period mentioned above (Model Testing and Evaluation).
    
    Input:
        ori_df : DataFrame which contains spot_price for every 30-minute interval. 
    Return:
        df : DataFrame with 'price, opening', 'closing', 'raw_power', 'dispatch'
            and 'revenue' column.
    """
    # Start time
    start = time.time()
    # Initialise df
    df2, first_row = create_df(ori_df)
    
    df = algorithm2(df2)
    
    # Calculate the revenue
    df = calculate(df)
    
    # Add unused row back to df
    df.loc[0] = first_row
    df = df.sort_index()
    
    # Show the revenue       
    show_result(df)
    # End Time      
    end = time.time()
    print("Time Complexity for running the entire Algorithm 3: {time_taken}s".format(time_taken = end-start))
            
    return df

In [21]:
# Using mean
vic = run_all(vic_data)

Total revenue in the dataset: 57930542
Total days in the dataset: 1322.0208333333333
Revenue per day: 43819.689175347085
Time Complexity for running the entire Algorithm 3: 4.4228293895721436s


In [17]:
vic

Unnamed: 0,Time (UTC+10),Period,Regions VIC Trading Price ($/MWh),raw_power,dispatch,revenue,opening,closing,decision
0,2018-01-01 00:00:00,48,90.43,0,0,0,0,0,0
1,2018-01-01 00:30:00,1,92.46,0,0,0,0,0,0
2,2018-01-01 01:00:00,2,87.62,0,0,0,0,0,0
3,2018-01-01 01:30:00,3,73.08,0,0,0,0,0,0
4,2018-01-01 02:00:00,4,70.18,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
63452,2021-08-14 22:00:00,44,49.93,0,0,0,0,0,0
63453,2021-08-14 22:30:00,45,62.86,0,0,0,0,0,0
63454,2021-08-14 23:00:00,46,32.26,0,0,0,0,0,0
63455,2021-08-14 23:30:00,47,25.10,0,0,0,0,0,0
