# 负荷预测bias测算

### 日最大负荷bias

In [51]:
import os
import pandas as pd

def bias(actual, predict):
    bias = predict - actual
    return bias

def get_max_loads(file_path, date_col, load_col):
    data = pd.read_csv(file_path)

    data[date_col] = pd.to_datetime(data[date_col])

    max_loads = data.groupby(data[date_col].dt.date)[load_col].max()
    return max_loads

def process_files(actual_folder, predicted_folder, start_date, end_date, actual_date_col, actual_load_col, predicted_date_col, predicted_load_col):
    actual_files = sorted([os.path.join(actual_folder, f) for f in os.listdir(actual_folder) if f.endswith('.csv') and start_date <= f[:8] <= end_date])
    predicted_files = sorted([os.path.join(predicted_folder, f) for f in os.listdir(predicted_folder) if f.endswith('.csv') and start_date <= f[:8] <= end_date])

    actual_max_loads = {}
    predicted_max_loads = {}
    
    for actual_file, predicted_file in zip(actual_files, predicted_files):
        date = os.path.basename(actual_file)[:8]
        
        actual_max_load = get_max_loads(actual_file, actual_date_col, actual_load_col)
        predicted_max_load = get_max_loads(predicted_file, predicted_date_col, predicted_load_col)
        
        actual_max_loads.update(actual_max_load.to_dict())
        predicted_max_loads.update(predicted_max_load.to_dict())
    biases = []
    for date in actual_max_loads.keys():
        if date in predicted_max_loads:
            actual = actual_max_loads[date]
            predicted = predicted_max_loads[date]
            biases.append({'Date': date,'Bias': bias(actual, predicted)})

    return pd.DataFrame(biases)

actual_folder = '/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024'
predicted_folder = '/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12'
start_date = '20240101'
end_date = '20240711'
actual_date_col = 'Date'
actual_load_col = 'Load'
predicted_date_col = 'forecast_time'
predicted_load_col = 'mix'

biases_df = process_files(actual_folder, predicted_folder, start_date, end_date, actual_date_col, actual_load_col, predicted_date_col, predicted_load_col)

biases_df

Unnamed: 0,Date,Bias
0,2024-01-02,-1245.840841
1,2024-01-03,-1755.861874
2,2024-01-04,-1176.850899
3,2024-01-05,-643.323674
4,2024-01-06,610.531130
...,...,...
185,2024-07-06,-2237.688386
186,2024-07-07,31.261829
187,2024-07-08,-1219.436678
188,2024-07-09,1192.011743


### 日保供关键时间段负荷bias

In [52]:
def critbias(actual1, actual2, actual3, actual4, i1, i2, i3, i4):
    return 1/4 * ((i1 - actual1) + 
                   (i2 - actual2) + 
                   (i3 - actual3) + 
                   (i4 - actual4))

actual_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024"
predicted_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12"

date_range = pd.date_range(start="2024-01-01", end="2024-07-12")

time_range = pd.date_range("20:30", "21:15", freq="15T").time

results = []

for date in date_range:
    actual_file = os.path.join(actual_dir, date.strftime("%Y%m%d") + ".csv")
    predicted_file = os.path.join(predicted_dir, (date - pd.Timedelta(days=1)).strftime("%Y%m%d") + ".csv")
    
    if not os.path.exists(actual_file) or not os.path.exists(predicted_file):
        continue
    
    actual_data = pd.read_csv(actual_file)
    actual_data['Date'] = pd.to_datetime(actual_data['Date'])
    actual_data.set_index('Date', inplace=True)
    
    predicted_data = pd.read_csv(predicted_file)
    predicted_data['forecast_time'] = pd.to_datetime(predicted_data['forecast_time'])
    predicted_data.set_index('forecast_time', inplace=True)
    
    actual_loads = actual_data.between_time("20:30", "21:15")['Load'].tolist()
    predicted_loads = predicted_data.between_time("20:30", "21:15")['mix'].tolist()
    
    if len(actual_loads) == 4 and len(predicted_loads) == 4:
        bias = critbias(*actual_loads, *predicted_loads)
        results.append({"Date": date, "Bias": bias})

crit_results_df = pd.DataFrame(results)

crit_results_df


  time_range = pd.date_range("20:30", "21:15", freq="15T").time


Unnamed: 0,Date,Bias
0,2024-01-02,-1253.573855
1,2024-01-03,-1666.652411
2,2024-01-04,-1066.024966
3,2024-01-05,442.026319
4,2024-01-06,707.254850
...,...,...
185,2024-07-06,-2566.950844
186,2024-07-07,-2161.085960
187,2024-07-08,-1614.196368
188,2024-07-09,-1295.122692


### 午间低谷最小负荷bias

In [53]:
actual_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024"
predicted_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12"

date_range = pd.date_range(start="2024-01-02", end="2024-07-11")
results = []

def calculate_bias(actual, predict):
    bias_value = predict - actual
    return bias_value

for date in date_range:
    try:
        actual_file = os.path.join(actual_dir, date.strftime("%Y%m%d") + ".csv")
        predicted_file = os.path.join(predicted_dir, (date - pd.Timedelta(days=1)).strftime("%Y%m%d") + ".csv")

        actual_data = pd.read_csv(actual_file)
        actual_data['Date'] = pd.to_datetime(actual_data['Date'])
        actual_data.set_index('Date', inplace=True)

        predicted_data = pd.read_csv(predicted_file)
        predicted_data['forecast_time'] = pd.to_datetime(predicted_data['forecast_time'])
        predicted_data.set_index('forecast_time', inplace=True)

        actual_loads = actual_data.between_time("10:15", "16:00")['Load']
        predicted_loads = predicted_data.between_time("10:15", "16:00")['mix']

        if actual_loads.empty or predicted_loads.empty:
            print(f"No data available for date: {date.strftime('%Y-%m-%d')}")
            continue

        lowest_actual_time = actual_loads.idxmin()
        lowest_actual_load = actual_loads.min()

        if lowest_actual_time in predicted_loads.index:
            corresponding_predicted_load = predicted_loads.loc[lowest_actual_time]
            bias_result = calculate_bias(lowest_actual_load, corresponding_predicted_load)
            results.append({"Date": date, "Bias": bias_result})
        else:
            print(f"No corresponding predicted load for time: {lowest_actual_time}")

    except FileNotFoundError as e:
        print(f"File not found for date: {date.strftime('%Y-%m-%d')}. Excluded from the results.")
        continue

noonresults_df = pd.DataFrame(results)
noonresults_df


File not found for date: 2024-04-20. Excluded from the results.


Unnamed: 0,Date,Bias
0,2024-01-02,-563.607320
1,2024-01-03,-1608.432306
2,2024-01-04,-695.045188
3,2024-01-05,-351.561915
4,2024-01-06,99.615993
...,...,...
186,2024-07-07,-753.370216
187,2024-07-08,-842.129777
188,2024-07-09,702.904255
189,2024-07-10,265.640349


### 夜间低谷负荷bias

In [54]:
actual_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024"
predicted_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12"

date_range = pd.date_range(start="2024-01-02", end="2024-07-11")
results = []

def calculate_bias(actual, predict):
    bias_value = predict - actual
    return bias_value

for date in date_range:
    try:
        actual_file = os.path.join(actual_dir, date.strftime("%Y%m%d") + ".csv")
        predicted_file = os.path.join(predicted_dir, (date - pd.Timedelta(days=1)).strftime("%Y%m%d") + ".csv")

        actual_data = pd.read_csv(actual_file)
        actual_data['Date'] = pd.to_datetime(actual_data['Date'])
        actual_data.set_index('Date', inplace=True)

        predicted_data = pd.read_csv(predicted_file)
        predicted_data['forecast_time'] = pd.to_datetime(predicted_data['forecast_time'])
        predicted_data.set_index('forecast_time', inplace=True)

        actual_loads = actual_data.between_time("00:15", "06:00")['Load']
        predicted_loads = predicted_data.between_time("00:15", "06:00")['mix']

        if actual_loads.empty or predicted_loads.empty:
            print(f"No data available for date: {date.strftime('%Y-%m-%d')}")
            continue

        lowest_actual_time = actual_loads.idxmin()
        lowest_actual_load = actual_loads.min()

        if lowest_actual_time in predicted_loads.index:
            corresponding_predicted_load = predicted_loads.loc[lowest_actual_time]
            bias_result = calculate_bias(lowest_actual_load, corresponding_predicted_load)
            results.append({"Date": date, "Bias": bias_result})
        else:
            print(f"No corresponding predicted load for time: {lowest_actual_time}")

    except FileNotFoundError as e:
        print(f"File not found for date: {date.strftime('%Y-%m-%d')}. Excluded from the results.")
        continue

nightresults_df = pd.DataFrame(results)
nightresults_df


File not found for date: 2024-04-20. Excluded from the results.


Unnamed: 0,Date,Bias
0,2024-01-02,730.877615
1,2024-01-03,-218.694733
2,2024-01-04,-393.679109
3,2024-01-05,214.290036
4,2024-01-06,-164.159522
...,...,...
186,2024-07-07,-776.720222
187,2024-07-08,-655.598843
188,2024-07-09,-242.638802
189,2024-07-10,-1511.971774


### 日负荷预测综合bias

In [55]:
def overall(peak, crit, noon, night):
    return peak * 0.3 + crit * 0.3 + noon * 0.2 + night * 0.2

biases_df['Date'] = pd.to_datetime(biases_df['Date'])
crit_results_df['Date'] = pd.to_datetime(crit_results_df['Date'])
noonresults_df['Date'] = pd.to_datetime(noonresults_df['Date'])
nightresults_df['Date'] = pd.to_datetime(nightresults_df['Date'])

biases_df.set_index('Date', inplace=True)
crit_results_df.set_index('Date', inplace=True)
noonresults_df.set_index('Date', inplace=True)
nightresults_df.set_index('Date', inplace=True)

results = []

date_range = pd.date_range(start="2024-01-01", end="2024-07-11")

for date in date_range:
    try:
        peak_bias = biases_df.loc[date, 'Bias']
        crit_bias = crit_results_df.loc[date, 'Bias']
        noon_bias= noonresults_df.loc[date, 'Bias']
        night_bias = nightresults_df.loc[date, 'Bias']
        
        overall_bias = overall(peak_bias,crit_bias, noon_bias, night_bias)
        
        results.append({
            'Date': date,
            'Peak Bias': peak_bias,
            'Critical Bias': crit_bias,
            'Noon Trough Bias': noon_bias,
            'Evening Trough Bias': night_bias,
            'Overall Bias': overall_bias
        })
    except KeyError as e:
        print(f"Data for {date} is missing: {e}")
        continue

overallresults_df = pd.DataFrame(results)

overallresults_df


Data for 2024-01-01 00:00:00 is missing: Timestamp('2024-01-01 00:00:00')
Data for 2024-04-20 00:00:00 is missing: Timestamp('2024-04-20 00:00:00')
Data for 2024-07-11 00:00:00 is missing: Timestamp('2024-07-11 00:00:00')


Unnamed: 0,Date,Peak Bias,Critical Bias,Noon Trough Bias,Evening Trough Bias,Overall Bias
0,2024-01-02,-1245.840841,-1253.573855,-563.607320,730.877615,-716.370349
1,2024-01-03,-1755.861874,-1666.652411,-1608.432306,-218.694733,-1392.179693
2,2024-01-04,-1176.850899,-1066.024966,-695.045188,-393.679109,-890.607619
3,2024-01-05,-643.323674,442.026319,-351.561915,214.290036,-87.843582
4,2024-01-06,610.531130,707.254850,99.615993,-164.159522,382.427088
...,...,...,...,...,...,...
185,2024-07-06,-2237.688386,-2566.950844,-1146.071262,-1516.773635,-1973.960748
186,2024-07-07,31.261829,-2161.085960,-753.370216,-776.720222,-944.965327
187,2024-07-08,-1219.436678,-1614.196368,-842.129777,-655.598843,-1149.635638
188,2024-07-09,1192.011743,-1295.122692,702.904255,-242.638802,61.119806


### 关键时段负荷bias

In [56]:
import pandas as pd
import os
import numpy as np

def calculate_bias(date_range, actual_dir, predicted_dir, time_periods):
    def criticalbias(actuals, predictions):
        n = len(actuals)
        coefficient = 1 / n
        criticalbiases = coefficient * sum(predict - actual for actual, predict in zip(actuals, predictions))
        return criticalbiases

    results = []

    for date in date_range:
        actual_file = os.path.join(actual_dir, date.strftime("%Y%m%d") + ".csv")
        predicted_file = os.path.join(predicted_dir, (date - pd.Timedelta(days=1)).strftime("%Y%m%d") + ".csv")

        if not os.path.exists(actual_file) or not os.path.exists(predicted_file):
            continue

        actual_data = pd.read_csv(actual_file)
        actual_data['Date'] = pd.to_datetime(actual_data['Date'])
        actual_data.set_index('Date', inplace=True)

        predicted_data = pd.read_csv(predicted_file)
        predicted_data['forecast_time'] = pd.to_datetime(predicted_data['forecast_time'])
        predicted_data.set_index('forecast_time', inplace=True)

        for period, num_intervals in time_periods.items():
            start_time, end_time = period.split('-')
            actual_loads = actual_data.between_time(start_time, end_time)['Load'].tolist()
            predicted_loads = predicted_data.between_time(start_time, end_time)['mix'].tolist()

            if len(actual_loads) == num_intervals and len(predicted_loads) == num_intervals:
                bias1 = criticalbias(actual_loads, predicted_loads)
                results.append({"Date": date, "Period": period, "Bias": bias1})

    results_df = pd.DataFrame(results)
    return results_df

actual_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024"
predicted_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12"
date_range = pd.date_range(start="2024-01-01", end="2024-07-11")

# Define time periods
time_periods = {
    "00:15-06:45": 27,
    "08:00-12:00": 17,
    "11:00-15:00": 17,
    "12:15-17:00": 20,
    "18:15-21:45": 15,
    "22:00-23:45": 8
}

results_df = calculate_bias(date_range, actual_dir, predicted_dir, time_periods)
results_df.to_csv("critical_timeperiods_bias.csv")
results_df



Unnamed: 0,Date,Period,Bias
0,2024-01-02,00:15-06:45,609.840472
1,2024-01-02,08:00-12:00,-1612.113443
2,2024-01-02,11:00-15:00,-931.948473
3,2024-01-02,12:15-17:00,-753.936851
4,2024-01-02,18:15-21:45,-1158.419809
...,...,...,...
1137,2024-07-10,12:15-17:00,281.212963
1138,2024-07-10,18:15-21:45,213.190851
1139,2024-07-10,22:00-23:45,2172.793910
1140,2024-07-11,00:15-06:45,712.711470
