# 6.24-7.8 负荷预测准确率测算

### 日最大负荷准确率

In [4]:
import os
import pandas as pd

def accuracy(actual, predict):
    accuracy = (1 - abs((predict - actual) / actual)) * 100
    return accuracy

def get_max_loads(file_path, date_col, load_col):
    data = pd.read_csv(file_path)

    data[date_col] = pd.to_datetime(data[date_col])

    max_loads = data.groupby(data[date_col].dt.date)[load_col].max()
    return max_loads

def process_files(actual_folder, predicted_folder, start_date, end_date, actual_date_col, actual_load_col, predicted_date_col, predicted_load_col):
    actual_files = sorted([os.path.join(actual_folder, f) for f in os.listdir(actual_folder) if f.endswith('.csv') and start_date <= f[:8] <= end_date])
    predicted_files = sorted([os.path.join(predicted_folder, f) for f in os.listdir(predicted_folder) if f.endswith('.csv') and start_date <= f[:8] <= end_date])

    actual_max_loads = {}
    predicted_max_loads = {}
    
    for actual_file, predicted_file in zip(actual_files, predicted_files):
        date = os.path.basename(actual_file)[:8]
        
        actual_max_load = get_max_loads(actual_file, actual_date_col, actual_load_col)
        predicted_max_load = get_max_loads(predicted_file, predicted_date_col, predicted_load_col)
        
        actual_max_loads.update(actual_max_load.to_dict())
        predicted_max_loads.update(predicted_max_load.to_dict())
    accuracies = []
    for date in actual_max_loads.keys():
        if date in predicted_max_loads:
            actual = actual_max_loads[date]
            predicted = predicted_max_loads[date]
            accuracies.append({'Date': date,'Accuracy': accuracy(actual, predicted)})

    return pd.DataFrame(accuracies)

actual_folder = '/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024'
predicted_folder = '/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12'
start_date = '20240623'
end_date = '20240708'
actual_date_col = 'Date'
actual_load_col = 'Load'
predicted_date_col = 'forecast_time'
predicted_load_col = 'mix'

accuracies_df = process_files(actual_folder, predicted_folder, start_date, end_date, actual_date_col, actual_load_col, predicted_date_col, predicted_load_col)

accuracies_df

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Unnamed: 0,Date,Accuracy
0,2024-06-24,99.020763
1,2024-06-25,98.023081
2,2024-06-26,92.575792
3,2024-06-27,95.898724
4,2024-06-28,93.019916
5,2024-06-29,97.232863
6,2024-06-30,94.876547
7,2024-07-01,88.437013
8,2024-07-02,95.166632
9,2024-07-03,95.249397


### 日保供关键时间段负荷预测准确率

In [5]:
def critaccuracy(actual1, actual2, actual3, actual4, i1, i2, i3, i4):
    return 1/4 * (((1 - abs((actual1 - i1) / actual1)) * 100) + 
                   ((1 - abs((actual2 - i2) / actual2)) * 100) + 
                   ((1 - abs((actual3 - i3) / actual3)) * 100) + 
                   ((1 - abs((actual4 - i4) / actual4)) * 100))

actual_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024"
predicted_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12"

date_range = pd.date_range(start="2024-06-24", end="2024-07-08")

time_range = pd.date_range("20:30", "21:15", freq="15T").time

results = []

for date in date_range:
    actual_file = os.path.join(actual_dir, date.strftime("%Y%m%d") + ".csv")
    predicted_file = os.path.join(predicted_dir, (date - pd.Timedelta(days=1)).strftime("%Y%m%d") + ".csv")
    
    if not os.path.exists(actual_file) or not os.path.exists(predicted_file):
        continue
    
    actual_data = pd.read_csv(actual_file)
    actual_data['Date'] = pd.to_datetime(actual_data['Date'])
    actual_data.set_index('Date', inplace=True)
    
    predicted_data = pd.read_csv(predicted_file)
    predicted_data['forecast_time'] = pd.to_datetime(predicted_data['forecast_time'])
    predicted_data.set_index('forecast_time', inplace=True)
    
    actual_loads = actual_data.between_time("20:30", "21:15")['Load'].tolist()
    predicted_loads = predicted_data.between_time("20:30", "21:15")['mix'].tolist()
    
    if len(actual_loads) == 4 and len(predicted_loads) == 4:
        accuracy = critaccuracy(*actual_loads, *predicted_loads)
        results.append({"Date": date, "Accuracy": accuracy})

crit_results_df = pd.DataFrame(results)

crit_results_df


  time_range = pd.date_range("20:30", "21:15", freq="15T").time


Unnamed: 0,Date,Accuracy
0,2024-06-24,98.562686
1,2024-06-25,96.757361
2,2024-06-26,92.32846
3,2024-06-27,96.264081
4,2024-06-28,96.832225
5,2024-06-29,97.237217
6,2024-06-30,96.639014
7,2024-07-01,91.262907
8,2024-07-02,98.612267
9,2024-07-03,96.994417


### 午间低谷最小负荷预测准确率

In [6]:

def accuracy(actual, predict):
    return (1 - abs((predict - actual) / actual)) * 100

actual_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024"
predicted_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12"

date_range = pd.date_range(start="2024-06-24", end="2024-07-08")

results = []

for date in date_range:
    actual_file = os.path.join(actual_dir, date.strftime("%Y%m%d") + ".csv")
    predicted_file = os.path.join(predicted_dir, (date - pd.Timedelta(days=1)).strftime("%Y%m%d") + ".csv")

    actual_data = pd.read_csv(actual_file)
    actual_data['Date'] = pd.to_datetime(actual_data['Date'])
    actual_data.set_index('Date', inplace=True)
    
    predicted_data = pd.read_csv(predicted_file)
    predicted_data['forecast_time'] = pd.to_datetime(predicted_data['forecast_time'])
    predicted_data.set_index('forecast_time', inplace=True)
    
    actual_loads = actual_data.between_time("10:15", "16:00")['Load']
    predicted_loads = predicted_data.between_time("10:15", "16:00")['mix']
    
    if actual_loads.empty or predicted_loads.empty:
        print(f"No data available for date: {date.strftime('%Y-%m-%d')}")
        continue
    
    lowest_actual_time = actual_loads.idxmin()
    lowest_actual_load = actual_loads.min()
    
    if lowest_actual_time in predicted_loads.index:
        corresponding_predicted_load = predicted_loads.loc[lowest_actual_time]
        acc = accuracy(lowest_actual_load, corresponding_predicted_load)
        results.append({"Date": date, "Accuracy": acc})
    else:
        print(f"No corresponding predicted load for time: {lowest_actual_time}")

noonresults_df = pd.DataFrame(results)

noonresults_df


Unnamed: 0,Date,Accuracy
0,2024-06-24,98.42598
1,2024-06-25,95.582273
2,2024-06-26,91.308396
3,2024-06-27,96.219714
4,2024-06-28,95.985353
5,2024-06-29,99.126911
6,2024-06-30,91.876954
7,2024-07-01,89.177142
8,2024-07-02,93.157267
9,2024-07-03,98.224352


In [7]:
actual_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024"
predicted_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12"

date_range = pd.date_range(start="2024-06-24", end="2024-07-08")

results = []

for date in date_range:
    actual_file = os.path.join(actual_dir, date.strftime("%Y%m%d") + ".csv")
    predicted_file = os.path.join(predicted_dir, (date - pd.Timedelta(days=1)).strftime("%Y%m%d") + ".csv")

    actual_data = pd.read_csv(actual_file)
    actual_data['Date'] = pd.to_datetime(actual_data['Date'])
    actual_data.set_index('Date', inplace=True)
    
    predicted_data = pd.read_csv(predicted_file)
    predicted_data['forecast_time'] = pd.to_datetime(predicted_data['forecast_time'])
    predicted_data.set_index('forecast_time', inplace=True)
    
    actual_loads = actual_data.between_time("00:15", "06:00")['Load']
    predicted_loads = predicted_data.between_time("00:15", "06:00")['mix']
    
    if actual_loads.empty or predicted_loads.empty:
        print(f"No data available for date: {date.strftime('%Y-%m-%d')}")
        continue
    
    lowest_actual_time = actual_loads.idxmin()
    lowest_actual_load = actual_loads.min()
    
    if lowest_actual_time in predicted_loads.index:
        corresponding_predicted_load = predicted_loads.loc[lowest_actual_time]
        acc = accuracy(lowest_actual_load, corresponding_predicted_load)
        results.append({"Date": date, "Accuracy": acc})
    else:
        print(f"No corresponding predicted load for time: {lowest_actual_time}")

nightresults_df = pd.DataFrame(results)

nightresults_df


Unnamed: 0,Date,Accuracy
0,2024-06-24,95.986034
1,2024-06-25,96.451119
2,2024-06-26,96.291488
3,2024-06-27,94.913413
4,2024-06-28,99.020382
5,2024-06-29,97.905833
6,2024-06-30,99.35388
7,2024-07-01,99.039951
8,2024-07-02,96.529381
9,2024-07-03,98.351907


### 日负荷预测综合准确率

In [8]:
def overall(peak, crit, noon, night):
    return peak * 0.3 + crit * 0.3 + noon * 0.2 + night * 0.2

accuracies_df['Date'] = pd.to_datetime(accuracies_df['Date'])
crit_results_df['Date'] = pd.to_datetime(crit_results_df['Date'])
noonresults_df['Date'] = pd.to_datetime(noonresults_df['Date'])
nightresults_df['Date'] = pd.to_datetime(nightresults_df['Date'])

accuracies_df.set_index('Date', inplace=True)
crit_results_df.set_index('Date', inplace=True)
noonresults_df.set_index('Date', inplace=True)
nightresults_df.set_index('Date', inplace=True)

results = []

date_range = pd.date_range(start="2024-06-24", end="2024-07-08")

for date in date_range:
    try:
        peak_accuracy = accuracies_df.loc[date, 'Accuracy']
        crit_accuracy = crit_results_df.loc[date, 'Accuracy']
        noon_accuracy = noonresults_df.loc[date, 'Accuracy']
        night_accuracy = nightresults_df.loc[date, 'Accuracy']
        
        overall_accuracy = overall(peak_accuracy, crit_accuracy, noon_accuracy, night_accuracy)
        
        results.append({
            'Date': date,
            'Peak Accuracy': peak_accuracy,
            'Critical Accuracy': crit_accuracy,
            'Noon Trough Accuracy': noon_accuracy,
            'Evening Trough Accuracy': night_accuracy,
            'Overall Accuracy': overall_accuracy
        })
    except KeyError as e:
        print(f"Data for {date} is missing: {e}")
        continue

results_df = pd.DataFrame(results)

results_df


Unnamed: 0,Date,Peak Accuracy,Critical Accuracy,Noon Trough Accuracy,Evening Trough Accuracy,Overall Accuracy
0,2024-06-24,99.020763,98.562686,98.42598,95.986034,98.157437
1,2024-06-25,98.023081,96.757361,95.582273,96.451119,96.840811
2,2024-06-26,92.575792,92.32846,91.308396,96.291488,92.991252
3,2024-06-27,95.898724,96.264081,96.219714,94.913413,95.875467
4,2024-06-28,93.019916,96.832225,95.985353,99.020382,95.956789
5,2024-06-29,97.232863,97.237217,99.126911,97.905833,97.747573
6,2024-06-30,94.876547,96.639014,91.876954,99.35388,95.700835
7,2024-07-01,88.437013,91.262907,89.177142,99.039951,91.553395
8,2024-07-02,95.166632,98.612267,93.157267,96.529381,96.070999
9,2024-07-03,95.249397,96.994417,98.224352,98.351907,96.988396


### 关键时段预测准确率

In [16]:
def critaccuracy(actuals, predictions):
    n = len(actuals)
    coefficient = 1 / n
    accuracy = coefficient * sum((1 - abs((actual - predict) / actual)) * 100 for actual, predict in zip(actuals, predictions))
    return accuracy

actual_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/data_preprocessed/load/2024"  # Use correct path for actual data
predicted_dir = "/Users/maxz/Desktop/EQUOTA/load forecast/results/2024/start_lead_12"  # Use correct path for predicted data

time_periods = {
    "08:00-12:00": 17,
    "12:15-17:00": 20,
    "18:15-21:45": 15,
    "11:00-15:00": 17,
    "00:15-06:45": 27,
    "22:00-23:45": 8
}
date_range = pd.date_range(start="2024-06-24", end="2024-07-08")

results = []

for date in date_range:
    actual_file = os.path.join(actual_dir, date.strftime("%Y%m%d") + ".csv")
    predicted_file = os.path.join(predicted_dir, (date - pd.Timedelta(days=1)).strftime("%Y%m%d") + ".csv")
    
    if not os.path.exists(actual_file) or not os.path.exists(predicted_file):
        continue
    
    actual_data = pd.read_csv(actual_file)
    actual_data['Date'] = pd.to_datetime(actual_data['Date'])
    actual_data.set_index('Date', inplace=True)
    
    predicted_data = pd.read_csv(predicted_file)
    predicted_data['forecast_time'] = pd.to_datetime(predicted_data['forecast_time'])
    predicted_data.set_index('forecast_time', inplace=True)
    
    for period, num_intervals in time_periods.items():
        start_time, end_time = period.split('-')
        actual_loads = actual_data.between_time(start_time, end_time)['Load'].tolist()
        predicted_loads = predicted_data.between_time(start_time, end_time)['mix'].tolist()
        
        if len(actual_loads) == num_intervals and len(predicted_loads) == num_intervals:
            accuracy = critaccuracy(actual_loads, predicted_loads)
            results.append({"Date": date, "Period": period, "Accuracy": accuracy})

results_df = pd.DataFrame(results)
file_path = '/Users/maxz/Desktop/EQUOTA/period_accuracy_results_updated.csv'
results_df.to_csv(file_path, index=False)

results_df


Unnamed: 0,Date,Period,Accuracy
0,2024-06-24,08:00-12:00,97.643021
1,2024-06-24,12:15-17:00,97.750091
2,2024-06-24,18:15-21:45,98.434754
3,2024-06-24,11:00-15:00,98.421211
4,2024-06-24,00:15-06:45,96.818795
...,...,...,...
85,2024-07-08,12:15-17:00,96.779608
86,2024-07-08,18:15-21:45,96.396752
87,2024-07-08,11:00-15:00,95.832187
88,2024-07-08,00:15-06:45,96.970433
