In [1]:
import pandas as pd
import numpy as np
import math
import time

from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [2]:
vic_data = pd.read_csv('../../data/victoria.csv')

In [3]:
# Constant Variables 
TIME = 'Time (UTC+10)'
PRICE = 'Regions VIC Trading Price ($/MWh)'
GENERATION = 'Regions VIC Trading Total Intermittent Generation (MW)'
DEMAND = 'Regions VIC Operational Demand (MW)'

POWER = 300
CAPACITY = 580
EFFICIENCY = 0.9
CHARGE_EFF = 90
DISCHARGE_EFF = 90
MLF = 0.991
FIXED_OP = 8.1
VAR_OP = 0

CHARGE = -1
DISCHARGE = 1

In [4]:
# percentile.exc from excel != np.percentile (np.percentile == percentile.inc from excel)
# code taken from https://stackoverflow.com/questions/38596100/python-equivalent-of-excels-percentile-exc

def quantile_exc(ser, q):
    ser_sorted = ser.sort_values()
    rank = q * (len(ser) + 1) - 1
    assert rank > 0, 'quantile is too small'
    rank_l = int(rank)
    return ser_sorted.iat[rank_l] + (ser_sorted.iat[rank_l + 1] - ser_sorted.iat[rank_l]) * (rank - rank_l)

In [5]:
def create_df(ori_df):
    """ Returns a proper dataframe with columns needed """

    df = ori_df[[TIME, PRICE]]
    df['raw_power'] = 0
    df['dispatch'] = 0
    df['revenue'] = 0
    df['opening'] = 0
    df['closing'] = 0
    df['decision1'] = 0
    
    # I removed the first row because first row of vic is 00:00:00, 
    # which is the last period from the previous year
    df = df.drop([0], axis=0) 
    
    return df

In [6]:
def algorithm3(df):
    """ Finds optimal charge and discharge time across the dataset """
    
    for i in list(df.index):

        if ((i+LOOKAHEAD) < len(df)):

            thelist = df.iloc[i:i+LOOKAHEAD][PRICE]
            ser = pd.Series(thelist)


            if (df.at[i,PRICE] <= quantile_exc(ser,CHARGING_PERCENTILE)):
                df.at[i,'decision1'] = CHARGE

            if (df.at[i,PRICE] >= quantile_exc(ser,DISCHARGING_PERCENTILE)):
                df.at[i,'decision1'] = DISCHARGE
                
        else:
            thelist = df.iloc[i:][PRICE]
            ser = pd.Series(thelist)

            if (df.at[i,PRICE] <= ser.quantile(CHARGING_PERCENTILE)):
                df.at[i,'decision1'] = CHARGE

            if (df.at[i,PRICE] >= ser.quantile(DISCHARGING_PERCENTILE)):
                df.at[i,'decision1'] = DISCHARGE
            
    return df

In [7]:
def improve(df):
    """ This function tries to modify the decision by increasing the discharging frequency when it's idle """
    
    df['decision2'] = df['decision1']
    
    for i in range(1, len(df)-1):
        
        # if the current decision is idle
        if df.at[i, 'decision1'] == 0:
            prev = df.at[i-1, 'decision1']
            next_ = df.at[i+1, 'decision1']
            
            # find the next non-idle decision
            j = i+1
            while next_ == 0:
                j += 1
                next_ = df.at[j, 'decision1']
                
            # if the previous decision and the next non-idle decision is the same
            if next_ == prev:
                prices = list(df.loc[i:j-1, PRICE])
                mean_price = sum(prices)/len(prices)
                
                # discharge when the mean price of the idle decisions is greater
                # by THRESHOLD_PRICE than the next non-idle decision
                if mean_price - df.at[j, PRICE] > THRESHOLD_PRICE:
                    df.loc[i:j-1, 'decision2'] = DISCHARGE
                
    
    return df

### Improve2: compares price with a percentile (if the length of the price list is less than 3, then the price is compared with the mean price)

In [68]:
def improve2(df):
    """ This function tries to modify the decision by increasing the discharging frequency when it's idle """
    
    df['decision2'] = df['decision1']
    
    for i in range(1, len(df)-1):
        
        # if the current decision is idle
        if df.at[i, 'decision1'] == 0:
            prev = df.at[i-1, 'decision1']
            next_ = df.at[i+1, 'decision1']
            
            # find the next non-idle decision
            j = i+1
            while next_ == 0:
                j += 1
                next_ = df.at[j, 'decision1']
                
            # if the previous decision and the next non-idle decision is the same
            if next_ == prev:
                prices = list(df.loc[i:j-1, PRICE])
                mean_price = sum(prices)/len(prices)

                
                ser = pd.Series(prices)
                if(next_ == 1):
                    if(len(prices) > 2):
                        if (df.at[j,PRICE] <= quantile_exc(ser,CHARGING_PERCENTILE)):
                            df.loc[i:j-1, 'decision2'] = CHARGE
                    else:
                        if (mean_price - df.at[j, PRICE] < THRESHOLD_PRICE):
                            df.loc[i:j-1, 'decision2'] = CHARGE
                else:
                    if(len(prices) > 2):
                        if (df.at[j,PRICE] >= quantile_exc(ser,DISCHARGING_PERCENTILE)):
                            df.loc[i:j-1, 'decision2'] = DISCHARGE
                    else:
                        if (mean_price - df.at[j, PRICE] > THRESHOLD_PRICE):
                            df.loc[i:j-1, 'decision2'] = DISCHARGE
    
    return df

In [8]:
def get_opencap(i, df):
    """Get the opening battery capacity for every 30-minutes interval 
    Input:
        i : Current row in df
        df : DataFrame with 'opening' and 'closing' column
    Return 
        opening_cap : Opening battery capacity. Integer dtype
    
    """

    if i != 1: #Not the first row
        df.at[i,"opening"] = df.at[i-1,"closing"]
    opening_cap = math.ceil(df.at[i, "opening"])
    
    return opening_cap
    
def get_dispatch(rawPower):
    """ Get the power dispatched for every 30-minutes interval 
    Input:
        rawPower : Current rawPower. Integer Datatype
    Return:
        dispatch : Raw_power dispatched from the market. Integer dtype
    """
    if rawPower < 0:
        eff = 1
    else:
        eff = DISCHARGE_EFF / 100
            
    dispatch = round(rawPower / 2 * eff, 0)
    return dispatch

def get_closecap(opening_cap, dispatch):
    """Get the battery closing capacity for every 30-minutes interval
    Input:
        opening_cap : Opening battery capacity. Integer dtype
        dispatch : Raw_power dispatched from the market. Integer dtype
    Return:
        closecap : Closing battery capacity. Integer dtype
    """
    if dispatch < 0:
        eff = CHARGE_EFF / 100
    else:
        eff = 100 / DISCHARGE_EFF

    closecap = math.ceil(max(0, min((opening_cap - (dispatch * eff)), CAPACITY)))
    return closecap
    
def get_revenue(price, dispatch):
    """ Get the Revenue for every 30-minutes interval
    Input:
        price : Market spot price for electricity. Float dtype
        dispatch : Raw_power dispatched from the market. Integer dtype
    Return:
        revenue : current revenue. Integer dtype
    """
    if dispatch < 0:
        factor = 1/MLF
    else:
        factor = MLF 
    revenue = math.ceil(price * dispatch * factor)
    return revenue

def run_algo3(i, df, opening_cap):
    """ Get the Raw Power for every 30-minutes interval
    Input:
        i : Current row in df
        df : DataFrame with 'charge_forecaset' and 'discharge_forecast' column
    Return:
        opening_cap : Opening battery capacity. Integer dtype
    """
    raw_power = 0
    if (df.at[i,'decision2'] == CHARGE):
        raw_power = -min(POWER, (CAPACITY-opening_cap)/EFFICIENCY*2)     # CHARGE
        
    elif (df.at[i,'decision2'] == DISCHARGE):
        raw_power = min(POWER, opening_cap/EFFICIENCY*2)      # DISCHARGE
            
    return raw_power

def calculate(df):
    """ Calculate the Battery Opening, Closing Capacity, Raw Power, Market dispatch 
        and Revenue for the entire df.
    Input:
        df : DataFrame with 'price, opening', 'closing', 'raw_power', 'dispatch'
            and 'revenue' column
    Return :
        df : DataFrame with 'price, opening', 'closing', 'raw_power', 'dispatch'
            and 'revenue' column
    """
    # Go through each 30-minute interval of df
    for i in list(df.index):
        # get current Spot Price
        price = df.at[i, PRICE]
        
        # update opening capacity
        opening_cap = get_opencap(i, df)

        # find raw_power
        rawPower = run_algo3(i, df, opening_cap)
        df.at[i,'raw_power'] = rawPower

        # find market_dispatch 
        dispatch = get_dispatch(rawPower)
        df.at[i,"dispatch"] = dispatch

        # find closing_capacity   
        df.at[i,"closing"] = get_closecap(opening_cap, dispatch)

        #find revenue        
        df.at[i,"revenue"] = get_revenue(price, dispatch)
    
    return df

In [9]:
def show_result(df):
    """ Print the revenue related information computed from df.
    Input: 
        df : DataFrame with 'revenue' column.
    """
    print("Total revenue in the dataset:", df["revenue"].sum())
    print("Total days in the dataset:", len(df)/48)
    print("Revenue per day:", df["revenue"].sum() / (len(df)/48))
    
    return None

def run_all(ori_df):
    """ Run the entire data pipeline including initialisng (Data Processing), 
    finding the optimal charging and discarging period (Data Modelling), 
    calculating the revenue based on the optimal period mentioned above (Model Testing and Evaluation).
    
    Input:
        ori_df : DataFrame which contains spot_price for every 30-minute interval. 
    Return:
        df : DataFrame with 'price, opening', 'closing', 'raw_power', 'dispatch'
            and 'revenue' column.
    """
    # Start time
    start = time.time()
    # Initialise df
    df2 = create_df(ori_df)
    
    # Find Optimal Charging and Discharging period
    df3 = algorithm3(df2)
    
    df3 = improve(df3)
    
    # Calculate the revenue
    df = calculate(df3)
    # Show the revenue       
    show_result(df)
    # End Time      
    end = time.time()
    print("Time Complexity for running the entire Algorithm 3: {time_taken}s".format(time_taken = end-start))
            
    return df

### run_all2 uses improve2 function

In [64]:
def run_all2(ori_df):
    
    # Start time
    start = time.time()
    # Initialise df
    df2 = create_df(ori_df)
    
    # Find Optimal Charging and Discharging period
    df3 = algorithm3(df2)
    
    df3 = improve2(df3)
    
    # Calculate the revenue
    df = calculate(df3)
    # Show the revenue       
    show_result(df)
    # End Time      
    end = time.time()
    print("Time Complexity for running the entire Algorithm 3: {time_taken}s".format(time_taken = end-start))
            
    return df

In [None]:
# Kennedy's algorithm

for i in list(np.arange(10, 50, 5)):
    print("Threshold price is:", i)
    THRESHOLD_PRICE = i
    LOOKAHEAD = 10
    CHARGING_PERCENTILE = 0.32
    DISCHARGING_PERCENTILE = 0.74
    test_run = run_all(vic_data)
    print()

In [66]:
# Kennedy's algorithm
# maximum revenue: 92882.54/day <- $70 threshold price

for i in list(np.arange(50, 80, 5)):
    print("Threshold price is:", i)
    THRESHOLD_PRICE = i
    LOOKAHEAD = 10
    CHARGING_PERCENTILE = 0.32
    DISCHARGING_PERCENTILE = 0.74
    test_run = run_all(vic_data)
    print()

Threshold price is: 50
Total revenue in the dataset: 122674911
Total days in the dataset: 1322.0
Revenue per day: 92794.94024205749
Time Complexity for running the entire Algorithm 3: 26.139611959457397s

Threshold price is: 55
Total revenue in the dataset: 122628817
Total days in the dataset: 1322.0
Revenue per day: 92760.07337367625
Time Complexity for running the entire Algorithm 3: 24.783838987350464s

Threshold price is: 60
Total revenue in the dataset: 122606596
Total days in the dataset: 1322.0
Revenue per day: 92743.26475037822
Time Complexity for running the entire Algorithm 3: 28.108087062835693s

Threshold price is: 65
Total revenue in the dataset: 122605929
Total days in the dataset: 1322.0
Revenue per day: 92742.7602118003
Time Complexity for running the entire Algorithm 3: 24.630378007888794s

Threshold price is: 70
Total revenue in the dataset: 122790718
Total days in the dataset: 1322.0
Revenue per day: 92882.54009077155
Time Complexity for running the entire Algorithm 

## Changing lookahead value! (threshold is set to 70)

### Kennedy's -- Changing Lookahead value
- highest revenue when lookahead == 10

In [12]:
for i in list(np.arange(3, 21, 1)):
    print("Lookahead is:", i)
    THRESHOLD_PRICE = 70
    LOOKAHEAD = i
    CHARGING_PERCENTILE = 0.32
    DISCHARGING_PERCENTILE = 0.74
    test_run = run_all(vic_data)
    print()

Lookahead is: 3
Total revenue in the dataset: 98886753
Total days in the dataset: 1322.0
Revenue per day: 74800.8721633888
Time Complexity for running the entire Algorithm 3: 25.489946126937866s

Lookahead is: 4
Total revenue in the dataset: 108732598
Total days in the dataset: 1322.0
Revenue per day: 82248.56127080182
Time Complexity for running the entire Algorithm 3: 26.718438148498535s

Lookahead is: 5
Total revenue in the dataset: 113067016
Total days in the dataset: 1322.0
Revenue per day: 85527.24357034796
Time Complexity for running the entire Algorithm 3: 26.13019609451294s

Lookahead is: 6
Total revenue in the dataset: 116974390
Total days in the dataset: 1322.0
Revenue per day: 88482.89712556732
Time Complexity for running the entire Algorithm 3: 23.268133878707886s

Lookahead is: 7
Total revenue in the dataset: 119065507
Total days in the dataset: 1322.0
Revenue per day: 90064.68003025718
Time Complexity for running the entire Algorithm 3: 23.926902055740356s

Lookahead is:

In [69]:
# Improved -- Chieh-Yu
# maximum revenue: 90664.25/day -> $25 threshold price

for i in list(np.arange(10, 50, 5)):
    print("Threshold price is:", i)
    THRESHOLD_PRICE = i
    LOOKAHEAD = 10
    CHARGING_PERCENTILE = 0.32
    DISCHARGING_PERCENTILE = 0.74
    test_run = run_all2(vic_data)
    print()

Threshold price is: 10
Total revenue in the dataset: 119508903
Total days in the dataset: 1322.0
Revenue per day: 90400.07791225416
Time Complexity for running the entire Algorithm 3: 27.145619869232178s

Threshold price is: 15
Total revenue in the dataset: 119723339
Total days in the dataset: 1322.0
Revenue per day: 90562.28366111951
Time Complexity for running the entire Algorithm 3: 26.966329097747803s

Threshold price is: 20
Total revenue in the dataset: 119850487
Total days in the dataset: 1322.0
Revenue per day: 90658.4621785174
Time Complexity for running the entire Algorithm 3: 25.240215063095093s

Threshold price is: 25
Total revenue in the dataset: 119858132
Total days in the dataset: 1322.0
Revenue per day: 90664.24508320726
Time Complexity for running the entire Algorithm 3: 25.941317319869995s

Threshold price is: 30
Total revenue in the dataset: 119843043
Total days in the dataset: 1322.0
Revenue per day: 90652.8313161876
Time Complexity for running the entire Algorithm 3

In [70]:
# Improved -- Chieh-Yu

for i in list(np.arange(50, 80, 5)):
    print("Threshold price is:", i)
    THRESHOLD_PRICE = i
    LOOKAHEAD = 10
    CHARGING_PERCENTILE = 0.32
    DISCHARGING_PERCENTILE = 0.74
    test_run = run_all2(vic_data)
    print()

Threshold price is: 50
Total revenue in the dataset: 119701002
Total days in the dataset: 1322.0
Revenue per day: 90545.38729198184
Time Complexity for running the entire Algorithm 3: 26.04196286201477s

Threshold price is: 55
Total revenue in the dataset: 119648337
Total days in the dataset: 1322.0
Revenue per day: 90505.54992435704
Time Complexity for running the entire Algorithm 3: 28.305874824523926s

Threshold price is: 60
Total revenue in the dataset: 119626116
Total days in the dataset: 1322.0
Revenue per day: 90488.741301059
Time Complexity for running the entire Algorithm 3: 32.70004105567932s

Threshold price is: 65
Total revenue in the dataset: 119625449
Total days in the dataset: 1322.0
Revenue per day: 90488.23676248109
Time Complexity for running the entire Algorithm 3: 27.55764389038086s

Threshold price is: 70
Total revenue in the dataset: 119623962
Total days in the dataset: 1322.0
Revenue per day: 90487.11195158851
Time Complexity for running the entire Algorithm 3: 2

In [95]:
# Run Entire Data Pipeline (Original)
LOOKAHEAD = 10
CHARGING_PERCENTILE = 0.32
DISCHARGING_PERCENTILE = 0.74
vic_price = run_all(vic_data)
#92550.54/day

Total revenue in the dataset: 122351814
Total days in the dataset: 1322.0
Revenue per day: 92550.54009077155
Time Complexity for running the entire Algorithm 3: 21.02471923828125s


In [106]:
vic_price.head(48)

Unnamed: 0,Time (UTC+10),Regions VIC Trading Price ($/MWh),raw_power,dispatch,revenue,opening,closing,decision1
1,2018-01-01 00:30:00,92.46,0,0,0,0,0,1
2,2018-01-01 01:00:00,87.62,0,0,0,0,0,1
3,2018-01-01 01:30:00,73.08,0,0,0,0,0,1
4,2018-01-01 02:00:00,70.18,0,0,0,0,0,1
5,2018-01-01 02:30:00,67.43,0,0,0,0,0,1
6,2018-01-01 03:00:00,66.31,0,0,0,0,0,0
7,2018-01-01 03:30:00,67.72,0,0,0,0,0,1
8,2018-01-01 04:00:00,65.5,0,0,0,0,0,0
9,2018-01-01 04:30:00,64.5,-300,-150,-9762,0,135,-1
10,2018-01-01 05:00:00,65.41,-300,-150,-9900,135,270,-1


In [104]:
sum(vic_price.loc[48:96, PRICE])

3367.85