In [66]:
import pandas as pd
import numpy as np
import os
import datetime as dt
import calendar
from matplotlib import pyplot as plt
import scipy.stats as stats
import glob

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

from mpl_toolkits.mplot3d import Axes3D  #3Dplot
import seaborn as sns
import japanize_matplotlib


from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Mean Absolute Error(MAE)用 
from sklearn.metrics import mean_absolute_error 
# Root Mean Squared Error(RMSE)用 
from sklearn.metrics import mean_squared_error


In [2]:
files = glob.glob('../../../卒論関連書類/データ/king_history/king_history1/*')
version = len(files)-1
df_king =pd.read_csv(f'../../../卒論関連書類/データ/king_history/king_history1/king{version}.csv',converters={'DateTime':pd.to_datetime})

In [70]:
# 1日ごとの再エネ誤差の平均値を出す
col_names = ['DateTime','北電太陽光想定(kWh)', '北電風力想定(kWh)', '北電太陽光実績(kWh)',
       '北電風力実績(kWh)', '九電太陽光想定(kWh)', '九電太陽光実績(kWh)', '九電風力想定(kWh)',
       '九電風力実績(kWh)', '東電太陽光想定(kWh)', '東電太陽光実績(kWh)', '東電風力想定(kWh)',
       '東電風力実績(kWh)', '関電太陽光想定(kWh)', '関電太陽光実績(kWh)', '関電風力想定(kWh)',
       '関電風力実績(kWh)', '中国太陽光想定(kWh)', '中国風力想定(kWh)', '中国太陽光実績(kWh)',
       '中国風力実績(kWh)','絶対値北電太陽光error(実績-想定)(kWh)',
       '絶対値北電風力error(実績-想定)(kWh)', '絶対値九電太陽光error(実績-想定)(kWh)',
       '絶対値九電風力error(実績-想定)(kWh)', '絶対値東電太陽光error(実績-想定)(kWh)',
       '絶対値東電風力error(実績-想定)(kWh)', '絶対値関電太陽光error(実績-想定)(kWh)',
       '絶対値関電風力error(実績-想定)(kWh)', '絶対値中国太陽光error(実績-想定)(kWh)',
       '絶対値中国風力error(実績-想定)(kWh)']
df_select = df_king[col_names]

area_name_list =['東電','関電','北電','九電','中国']

# area_name = '東電'
for area_name in area_name_list:
    select1_name =['DateTime',f'{area_name}太陽光実績(kWh)',f'{area_name}風力実績(kWh)',f'絶対値{area_name}太陽光error(実績-想定)(kWh)',f'絶対値{area_name}風力error(実績-想定)(kWh)'
    # ,f'{area_name}太陽光想定(kWh)'
    ]
    df_1 = df_select[select1_name].dropna()
    date_list = df_1['DateTime'].dt.date.drop_duplicates().values

    solarerror_list =[]
    winderror_list =[]
    start_date =date_list[0]
    end_date = date_list[-1]
    oneday_solar_mae_list =[]
    oneday_wind_mae_list =[]
    solar_rmse_list_oneday =[]
    wind_rmse_list_oneday =[]
    for date in date_list:
        df= df_1[df_1['DateTime'].dt.date ==date]
        solar_error_oneday = np.sum(df[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values) / np.sum(df[f'{area_name}太陽光実績(kWh)'].values)*100
        wind_error_oneday = np.sum(df[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values) / np.sum(df[f'{area_name}風力実績(kWh)'].values)*100
        solarerror_list.append(solar_error_oneday)
        winderror_list.append(wind_error_oneday)

        # MAE oneday
        solar_mae_oneday = np.sum(df[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values)/len(df)
        wind_mae_oneday = np.sum(df[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values)/len(df)
        oneday_solar_mae_list.append(solar_mae_oneday)
        oneday_wind_mae_list.append(wind_mae_oneday)

        # rmse oneday
        solar_rmse_oneday = np.sqrt(np.sum(df[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values**2)/len(df))
        wind_rmse_oneday = np.sqrt(np.sum(df[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values**2)/len(df))
        solar_rmse_list_oneday.append(solar_rmse_oneday)
        wind_rmse_list_oneday.append(wind_rmse_oneday)



    mean_oneday_solarerror = round(np.mean(np.array(solarerror_list)),1)
    mean_oneday_winderror = round(np.mean(np.array(winderror_list)),1)
    mean_solar_mae_oneday = round(np.mean(np.array(oneday_solar_mae_list)),1)
    mean_wind_mae_oneday = round(np.mean(np.array(oneday_wind_mae_list)),1)
    mean_solar_rmse_oneday =round(np.mean(np.array(solar_rmse_list_oneday)),1)
    mean_wind_rmse_oneday =round(np.mean(np.array(wind_rmse_list_oneday)),1)

    all_solar_wape =  round(np.sum(df_1[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values) / np.sum(df_1[f'{area_name}太陽光実績(kWh)'].values)*100,1)
    all_wind_wape =  round(np.sum(df_1[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values) / np.sum(df_1[f'{area_name}風力実績(kWh)'].values)*100,1)
    all_solar_mae =  round(np.sum(df_1[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values)/len(df_1),1)
    all_wind_mae =  round(np.sum(df_1[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values)/len(df_1),1)
    all_solar_rmse =  round(np.sqrt(np.sum(df_1[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values**2)/len(df_1)),1)
    all_wind_rmse =  round(np.sqrt(np.sum(df_1[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values**2)/len(df_1)),1)
    

    

    print(f'1日ごとの{area_name}の太陽光誤差は平均{mean_oneday_solarerror}%(1日ごとのWAPEの平均値)、風力誤差は平均{mean_oneday_winderror}%(1日ごとのWAPEの平均値)、　日は{start_date}~{end_date}')
    print(f'1日ごとの{area_name}の太陽光MAEは平均{mean_solar_mae_oneday}、風力MAEは平均{mean_wind_mae_oneday}')
    print(f'1日ごとの{area_name}の太陽光RMSEは平均{mean_solar_rmse_oneday}、風力RMSEは平均{mean_wind_rmse_oneday}')
    print(f'{area_name}の全体の、WAPEは太陽光{all_solar_wape}%、風力{all_wind_wape}%、MAEは太陽光{all_solar_mae}、風力{all_wind_mae}、RMSEは太陽光{all_solar_rmse}、風力{all_wind_rmse}')
    print('')
    



1日ごとの東電の太陽光誤差は平均26.4%(1日ごとのWAPEの平均値)、風力誤差は平均28.8%(1日ごとのWAPEの平均値)、　日は2018-04-01~2022-09-30
1日ごとの東電の太陽光MAEは平均170945.1、風力MAEは平均10278.5
1日ごとの東電の太陽光RMSEは平均299830.7、風力RMSEは平均12354.3
東電の全体の、WAPEは太陽光19.1%、風力24.5%、MAEは太陽光170758.0、風力10313.0、RMSEは太陽光364550.9、風力14484.8

1日ごとの関電の太陽光誤差は平均25.4%(1日ごとのWAPEの平均値)、風力誤差は平均53.7%(1日ごとのWAPEの平均値)、　日は2018-04-01~2022-09-30
1日ごとの関電の太陽光MAEは平均65445.1、風力MAEは平均9008.4
1日ごとの関電の太陽光RMSEは平均113822.7、風力RMSEは平均10472.3
関電の全体の、WAPEは太陽光19.5%、風力46.4%、MAEは太陽光66278.3、風力9047.1、RMSEは太陽光141062.2、風力13008.1

1日ごとの北電の太陽光誤差は平均25.0%(1日ごとのWAPEの平均値)、風力誤差は平均37.3%(1日ごとのWAPEの平均値)、　日は2021-10-01~2022-09-30
1日ごとの北電の太陽光MAEは平均18899.0、風力MAEは平均9462.7
1日ごとの北電の太陽光RMSEは平均32158.8、風力RMSEは平均10954.3
北電の全体の、WAPEは太陽光18.4%、風力27.6%、MAEは太陽光18899.0、風力9462.7、RMSEは太陽光37049.4、風力12434.2

1日ごとの九電の太陽光誤差は平均26.2%(1日ごとのWAPEの平均値)、風力誤差は平均87.4%(1日ごとのWAPEの平均値)、　日は2018-04-01~2022-09-30
1日ごとの九電の太陽光MAEは平均110525.5、風力MAEは平均18583.3
1日ごとの九電の太陽光RMSEは平均186248.2、風力RMSEは平均22377.3
九電の全体の、WAPEは太陽光18.3%、風力51.5%、MAEは太陽光110962.0、風力18675.4、RM

In [None]:
# 4社合算(ならし効果あり)　いう必要なしと判断
col_names = ['太陽光誤差合算(実績-想定)(kWh)(ならし)','風力誤差合算(実績-想定)(kWh)(ならし)', '太陽光絶対値誤差合算(実績-想定)(kWh)','風力絶対値誤差合算(実績-想定)(kWh)']
df_select = df_king[col_names]

area_name_list =['東電','関電','北電','九電','中国']

# area_name = '東電'
for area_name in area_name_list:
    select1_name =['DateTime',f'{area_name}太陽光実績(kWh)',f'{area_name}風力実績(kWh)',f'絶対値{area_name}太陽光error(実績-想定)(kWh)',f'絶対値{area_name}風力error(実績-想定)(kWh)'
    # ,f'{area_name}太陽光想定(kWh)'
    ]
    df_1 = df_select[select1_name].dropna()
    date_list = df_1['DateTime'].dt.date.drop_duplicates().values

    solarerror_list =[]
    winderror_list =[]
    start_date =date_list[0]
    end_date = date_list[-1]
    oneday_solar_mae_list =[]
    oneday_wind_mae_list =[]
    solar_rmse_list_oneday =[]
    wind_rmse_list_oneday =[]
    for date in date_list:
        df= df_1[df_1['DateTime'].dt.date ==date]
        solar_error_oneday = np.sum(df[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values) / np.sum(df[f'{area_name}太陽光実績(kWh)'].values)*100
        wind_error_oneday = np.sum(df[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values) / np.sum(df[f'{area_name}風力実績(kWh)'].values)*100
        solarerror_list.append(solar_error_oneday)
        winderror_list.append(wind_error_oneday)

        # MAE oneday
        solar_mae_oneday = np.sum(df[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values)/len(df)
        wind_mae_oneday = np.sum(df[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values)/len(df)
        oneday_solar_mae_list.append(solar_mae_oneday)
        oneday_wind_mae_list.append(wind_mae_oneday)

        # rmse oneday
        solar_rmse_oneday = np.sqrt(np.sum(df[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values**2)/len(df))
        wind_rmse_oneday = np.sqrt(np.sum(df[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values**2)/len(df))
        solar_rmse_list_oneday.append(solar_rmse_oneday)
        wind_rmse_list_oneday.append(wind_rmse_oneday)






In [3]:
df_king.columns

Index(['DateTime', 'intra_price(円/kWh)', 'spot_price(円/kWh)',
       'gap_price[intra-spot](円/kWh)', 'intra_volume（MWh/h）',
       'spot_volume(kWh)', '北電太陽光想定(kWh)', '北電風力想定(kWh)', '北電太陽光実績(kWh)',
       '北電風力実績(kWh)', '九電太陽光想定(kWh)', '九電太陽光実績(kWh)', '九電風力想定(kWh)',
       '九電風力実績(kWh)', '東電太陽光想定(kWh)', '東電太陽光実績(kWh)', '東電風力想定(kWh)',
       '東電風力実績(kWh)', '関電太陽光想定(kWh)', '関電太陽光実績(kWh)', '関電風力想定(kWh)',
       '関電風力実績(kWh)', '中国太陽光想定(kWh)', '中国風力想定(kWh)', '中国太陽光実績(kWh)',
       '中国風力実績(kWh)', '北海道予備率(%)', '東北予備率(%)', '東京予備率(%)', '中部予備率(%)',
       '北陸予備率(%)', '関西予備率(%)', '中国予備率(%)', '四国予備率(%)', '九州予備率(%)', '認可出力合計',
       '低下量合計', '停止・低下ユニット数合計', '北電太陽光error(実績-想定)(kWh)',
       '北電風力error(実績-想定)(kWh)', '九電太陽光error(実績-想定)(kWh)',
       '九電風力error(実績-想定)(kWh)', '東電太陽光error(実績-想定)(kWh)',
       '東電風力error(実績-想定)(kWh)', '関電太陽光error(実績-想定)(kWh)',
       '関電風力error(実績-想定)(kWh)', '中国太陽光error(実績-想定)(kWh)',
       '中国風力error(実績-想定)(kWh)', '絶対値北電太陽光error(実績-想定)(kWh)',
       '絶対値北電風力error(実績-想定)

In [65]:

area_name_list =['東電','関電','北電','九電','中国']

# area_name = '東電'
for area_name in area_name_list:
    select1_name =['DateTime',f'{area_name}太陽光実績(kWh)',f'{area_name}風力実績(kWh)',f'絶対値{area_name}太陽光error(実績-想定)(kWh)',f'絶対値{area_name}風力error(実績-想定)(kWh)'
    ,f'{area_name}太陽光想定(kWh)',f'{area_name}風力想定(kWh)'
    ]
    df_1 = df_select[select1_name].dropna()
    a=pd.DataFrame()
    a['a'] = np.abs(df_1[f'{area_name}太陽光実績(kWh)'] -df_1[f'{area_name}太陽光想定(kWh)']) - df_1[f'絶対値{area_name}太陽光error(実績-想定)(kWh)']
    a_max = round(a['a'].values.max(),4)
    if a_max ==0:
        print('ok')

    else:
        print(area_name,a_max)


ok
ok
ok
ok
ok


In [13]:
col_names = ['DateTime','北電太陽光想定(kWh)', '北電風力想定(kWh)', '北電太陽光実績(kWh)',
       '北電風力実績(kWh)', '九電太陽光想定(kWh)', '九電太陽光実績(kWh)', '九電風力想定(kWh)',
       '九電風力実績(kWh)', '東電太陽光想定(kWh)', '東電太陽光実績(kWh)', '東電風力想定(kWh)',
       '東電風力実績(kWh)', '関電太陽光想定(kWh)', '関電太陽光実績(kWh)', '関電風力想定(kWh)',
       '関電風力実績(kWh)', '中国太陽光想定(kWh)', '中国風力想定(kWh)', '中国太陽光実績(kWh)',
       '中国風力実績(kWh)','絶対値北電太陽光error(実績-想定)(kWh)',
       '絶対値北電風力error(実績-想定)(kWh)', '絶対値九電太陽光error(実績-想定)(kWh)',
       '絶対値九電風力error(実績-想定)(kWh)', '絶対値東電太陽光error(実績-想定)(kWh)',
       '絶対値東電風力error(実績-想定)(kWh)', '絶対値関電太陽光error(実績-想定)(kWh)',
       '絶対値関電風力error(実績-想定)(kWh)', '絶対値中国太陽光error(実績-想定)(kWh)',
       '絶対値中国風力error(実績-想定)(kWh)']
df_select = df_king[col_names]

In [63]:
df_select[]

Unnamed: 0,DateTime,北電太陽光想定(kWh),北電風力想定(kWh),北電太陽光実績(kWh),北電風力実績(kWh),九電太陽光想定(kWh),九電太陽光実績(kWh),九電風力想定(kWh),九電風力実績(kWh),東電太陽光想定(kWh),...,絶対値北電太陽光error(実績-想定)(kWh),絶対値北電風力error(実績-想定)(kWh),絶対値九電太陽光error(実績-想定)(kWh),絶対値九電風力error(実績-想定)(kWh),絶対値東電太陽光error(実績-想定)(kWh),絶対値東電風力error(実績-想定)(kWh),絶対値関電太陽光error(実績-想定)(kWh),絶対値関電風力error(実績-想定)(kWh),絶対値中国太陽光error(実績-想定)(kWh),絶対値中国風力error(実績-想定)(kWh)
0,2018-04-01 00:00:00,,,,,0.0,0.0,5889.905,8909.1,0.0,...,,,0.0,3019.195,0.0,9533.0,0.0,3293.0,0.0,48.0
1,2018-04-01 00:30:00,,,,,0.0,0.0,5642.430,8909.1,0.0,...,,,0.0,3266.670,0.0,9118.0,0.0,3946.0,0.0,1371.0
2,2018-04-01 01:00:00,,,,,0.0,0.0,5642.430,6929.3,0.0,...,,,0.0,1286.870,0.0,9874.0,0.0,3179.0,0.0,5228.0
3,2018-04-01 01:30:00,,,,,0.0,0.0,5444.450,5939.4,0.0,...,,,0.0,494.950,0.0,14933.0,0.0,1301.0,0.0,4484.0
4,2018-04-01 02:00:00,,,,,0.0,0.0,5295.965,5939.4,0.0,...,,,0.0,643.435,0.0,13105.0,0.0,3497.0,0.0,6055.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84410,2023-01-05 21:30:00,,,,,,,,,,...,,,,,,,,,,
84411,2023-01-05 22:00:00,,,,,,,,,,...,,,,,,,,,,
84412,2023-01-05 22:30:00,,,,,,,,,,...,,,,,,,,,,
84413,2023-01-05 23:00:00,,,,,,,,,,...,,,,,,,,,,


In [53]:
round(np.mean(np.array(solarerror_list)),1)

26.4

In [23]:
start_date = df_1[0:1]['DateTime'].dt.date.values



In [31]:
date_list = df_1['DateTime'].dt.date.drop_duplicates().values

In [46]:

solar_error_oneday = np.sum(df[f'絶対値{area_name}太陽光error(実績-想定)(kWh)'].values) / np.sum(df[f'{area_name}太陽光実績(kWh)'].values)*100
wind_error_oneday = np.sum(df[f'絶対値{area_name}風力error(実績-想定)(kWh)'].values) / np.sum(df[f'{area_name}風力実績(kWh)'].values)*100

In [47]:
solar_error_oneday

2.8085365867353276