In [40]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [10]:
def data_validation(meas,esti):
    mae = np.abs((meas - esti)).mean()
    cor = meas.corr(esti)
    rmse = np.sqrt(((meas - esti) ** 2).mean())
    nrmse = 100*rmse/meas.mean()
    mbe = (esti - meas).mean()
    num = len(meas)
    nmbe = 100*mbe/meas.mean()
    nmae = 100*mae/meas.mean()

    return [mae,rmse,mbe,cor,num,nrmse,nmae,nmbe]

def calculate_metrics(meas,esti,Site_id,Station_name):
    [mae,rmse,mbe,cor,num,nrmse,nmae,nmbe] = data_validation(meas,esti)
    return pd.DataFrame([{
        'site_id': Site_id,
        'site_name': Station_name,
        'number of sample': num,
        'MAE': f'{mae:.3f}',
        'nMAE': f'{nmae:.3f}%',
        'RMSE': f'{rmse:.3f}',
        'nRMSE': f'{nrmse:.3f}%',
        'MBE': f'{mbe:.3f}',
        'nMBE': f'{nmbe:.3f}%',
        'correlation': f"{cor:.3f}"}])

In [4]:
site_df = pd.read_csv('../data/DeDe_48stations/DeDe_48sites_metadata.csv')

In [38]:
# CAMS and ERA5 vs DeDe (15 min)
table_cams = pd.DataFrame()
table_era5 = pd.DataFrame()

for i, row in site_df.iterrows():
    df = pd.read_csv(f'../merged_data/solar_radiation_15min/solar_radiation_15min_site{row['Site_id']:02d}_2023.csv', parse_dates=['Datetime'])
    df.dropna(inplace=True)
    df = df[df['I'] >= 10]
    meas = df['I']

    esti = df['GHI']*4 
    table_cams = pd.concat([table_cams, calculate_metrics(meas, esti, row['Site_id'], row['Station_name'])], ignore_index=True)

    esti = df['ssrd']/3600
    table_era5 = pd.concat([table_era5, calculate_metrics(meas, esti, row['Site_id'], row['Station_name'])], ignore_index=True)

table = table_cams.merge(table_era5, on=['site_id', 'site_name', 'number of sample'], suffixes=('_CAMS', '_ERA5'))
table = table[['site_id', 'site_name', 'number of sample',
            'MAE_CAMS', 'MAE_ERA5','nMAE_CAMS', 'nMAE_ERA5',
            'RMSE_CAMS', 'RMSE_ERA5','nRMSE_CAMS', 'nRMSE_ERA5',
            'MBE_CAMS', 'MBE_ERA5','nMBE_CAMS', 'nMBE_ERA5',
            'correlation_CAMS', 'correlation_ERA5']]

table

Unnamed: 0,site_id,site_name,number of sample,MAE_CAMS,MAE_ERA5,nMAE_CAMS,nMAE_ERA5,RMSE_CAMS,RMSE_ERA5,nRMSE_CAMS,nRMSE_ERA5,MBE_CAMS,MBE_ERA5,nMBE_CAMS,nMBE_ERA5,correlation_CAMS,correlation_ERA5
0,1,กรมพัฒนาพลังงานทดแทน และ อนุรักษ์พลังงาน กรุงเ...,16098,92.382,139.116,22.512%,33.900%,143.296,194.067,34.918%,47.290%,23.053,40.165,5.617%,9.787%,0.874,0.769
1,2,สถานีอุตุนิยมวิทยานครราชสีมา,15050,88.313,132.333,19.653%,29.449%,143.717,187.273,31.983%,41.675%,21.154,7.076,4.708%,1.575%,0.891,0.804
2,3,ศูนย์อุตุนิยมวิทยาภาคตะวันออกเฉียงเหนือตอนบน จ...,16943,84.693,137.567,18.822%,30.573%,138.249,191.337,30.724%,42.523%,14.406,-2.878,3.202%,-0.640%,0.896,0.791
3,4,สถานีอุตุนิยมวิทยาร้อยเอ็ด,15979,85.146,137.113,18.873%,30.392%,136.455,190.183,30.246%,42.155%,16.417,-5.491,3.639%,-1.217%,0.897,0.789
4,5,สถานีอุตุนิยมวิทยาอุบลราชธานี,15144,93.613,143.943,20.152%,30.987%,148.577,199.116,31.984%,42.864%,0.64,-31.63,0.138%,-6.809%,0.884,0.787
5,6,สถานีอุตุนิยมวิทยานครพนม,16821,85.83,134.318,20.301%,31.770%,142.445,186.372,33.692%,44.082%,18.688,-1.402,4.420%,-0.332%,0.886,0.792
6,7,ส่วนอุทกวิทยาที่ 1 จ.หนองคาย,16803,75.046,128.581,16.578%,28.405%,124.504,177.961,27.504%,39.313%,7.842,-13.227,1.732%,-2.922%,0.916,0.821
7,8,สถานีอุตุนิยมวิทยาเลย,16069,106.399,141.878,27.149%,36.202%,161.452,197.18,41.196%,50.313%,70.386,52.222,17.960%,13.325%,0.886,0.796
8,9,ศูนย์บริการวิชาการที่ 7 จ.เชียงใหม่,8273,117.497,149.383,29.489%,37.492%,189.317,204.121,47.514%,51.230%,58.485,11.966,14.678%,3.003%,0.808,0.737
9,10,ศูนย์บริการวิชาการที่ 8 จ.พิษณุโลก,16665,84.934,123.723,19.647%,28.620%,133.453,173.692,30.871%,40.179%,41.467,15.643,9.592%,3.619%,0.907,0.817


In [45]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=table['site_id'], y=table['MAE_CAMS'].astype(float), mode='markers+lines', name='CAMS MAE'))
fig.add_trace(go.Scatter(x=table['site_id'], y=table['MAE_ERA5'].astype(float), mode='markers+lines', name='ERA5 MAE'))

fig.update_layout(
    title='MAE Comparison between CAMS and ERA5',
    xaxis_title='Site ID',
    yaxis_title='MAE (W/m²)',
    legend_title='Dataset')

fig.show()

In [39]:
# CAMS and ERA5 vs DeDe (1 hr)
table_cams = pd.DataFrame()
table_era5 = pd.DataFrame()

for i, row in site_df.iterrows():
    df = pd.read_csv(f'../merged_data/solar_radiation_1hr/solar_radiation_1hr_site{row['Site_id']:02d}_2023.csv', parse_dates=['Datetime'])
    df.dropna(inplace=True)
    df = df[df['I'] >= 10]
    meas = df['I']

    esti = df['GHI']
    table_cams = pd.concat([table_cams, calculate_metrics(meas, esti, row['Site_id'], row['Station_name'])], ignore_index=True)

    esti = df['ssrd']/3600
    table_era5 = pd.concat([table_era5, calculate_metrics(meas, esti, row['Site_id'], row['Station_name'])], ignore_index=True)

table = table_cams.merge(table_era5, on=['site_id', 'site_name', 'number of sample'], suffixes=('_CAMS', '_ERA5'))
table = table[['site_id', 'site_name', 'number of sample',
            'MAE_CAMS', 'MAE_ERA5','nMAE_CAMS', 'nMAE_ERA5',
            'RMSE_CAMS', 'RMSE_ERA5','nRMSE_CAMS', 'nRMSE_ERA5',
            'MBE_CAMS', 'MBE_ERA5','nMBE_CAMS', 'nMBE_ERA5',
            'correlation_CAMS', 'correlation_ERA5']]

table

Unnamed: 0,site_id,site_name,number of sample,MAE_CAMS,MAE_ERA5,nMAE_CAMS,nMAE_ERA5,RMSE_CAMS,RMSE_ERA5,nRMSE_CAMS,nRMSE_ERA5,MBE_CAMS,MBE_ERA5,nMBE_CAMS,nMBE_ERA5,correlation_CAMS,correlation_ERA5
0,1,กรมพัฒนาพลังงานทดแทน และ อนุรักษ์พลังงาน กรุงเ...,4024,117.426,140.845,28.643%,34.356%,160.891,195.7,39.245%,47.736%,22.016,42.154,5.370%,10.282%,0.837,0.769
1,2,สถานีอุตุนิยมวิทยานครราชสีมา,3772,115.33,136.718,25.845%,30.638%,165.365,193.866,37.058%,43.444%,21.106,10.325,4.730%,2.314%,0.854,0.793
2,3,ศูนย์อุตุนิยมวิทยาภาคตะวันออกเฉียงเหนือตอนบน จ...,4266,117.072,139.715,26.100%,31.147%,163.592,194.416,36.470%,43.342%,10.977,-3.193,2.447%,-0.712%,0.854,0.789
3,4,สถานีอุตุนิยมวิทยาร้อยเอ็ด,4013,117.842,138.076,26.351%,30.876%,162.894,191.603,36.425%,42.845%,16.43,-2.294,3.674%,-0.513%,0.853,0.789
4,5,สถานีอุตุนิยมวิทยาอุบลราชธานี,3795,119.842,142.144,26.020%,30.862%,165.594,196.468,35.953%,42.656%,1.276,-27.816,0.277%,-6.039%,0.852,0.79
5,6,สถานีอุตุนิยมวิทยานครพนม,4229,114.655,134.266,27.156%,31.801%,163.128,185.631,38.637%,43.967%,14.709,-2.073,3.484%,-0.491%,0.849,0.797
6,7,ส่วนอุทกวิทยาที่ 1 จ.หนองคาย,4225,107.345,129.754,23.846%,28.824%,148.111,178.59,32.902%,39.673%,5.683,-11.839,1.263%,-2.630%,0.88,0.822
7,8,สถานีอุตุนิยมวิทยาเลย,4051,119.744,141.726,30.658%,36.286%,175.017,197.515,44.810%,50.570%,65.704,51.241,16.822%,13.119%,0.86,0.799
8,9,ศูนย์บริการวิชาการที่ 7 จ.เชียงใหม่,2064,131.411,151.469,33.195%,38.262%,190.376,206.21,48.090%,52.090%,59.435,16.624,15.014%,4.199%,0.803,0.731
9,10,ศูนย์บริการวิชาการที่ 8 จ.พิษณุโลก,4152,105.445,125.95,24.349%,29.084%,152.256,177.378,35.159%,40.960%,40.183,17.671,9.279%,4.081%,0.873,0.81


In [23]:
# cams_forecast vs DeDe (15 min)
table_cams = pd.DataFrame()
for i, row in site_df.iterrows():
    df = pd.read_csv(f'../merged_data/cams_forecast_15min/cams_forecast_15min_site{row['Site_id']:02d}_2023.csv', parse_dates=['Datetime'])
    df.set_index('Datetime',inplace=True)
    df.dropna(inplace=True)
    df = df.between_time('07:00', '17:00')
    meas = df['ssrd'];esti = df['I']
    table_cams = pd.concat([table_cams, calculate_metrics(meas, esti, row['Site_id'], row['Station_name'])], ignore_index=True)
table_cams

Unnamed: 0,site_id,site_name,number of sample,MAE,nMAE,RMSE,nRMSE,MBE,nMBE,correlation
0,1,กรมพัฒนาพลังงานทดแทน และ อนุรักษ์พลังงาน กรุงเ...,14104,147.293,29.750%,201.132,40.624%,-35.551,-7.180%,0.714
1,2,สถานีอุตุนิยมวิทยานครราชสีมา,13284,141.156,28.660%,191.287,38.839%,8.656,1.758%,0.768
2,3,ศูนย์อุตุนิยมวิทยาภาคตะวันออกเฉียงเหนือตอนบน จ...,14924,147.634,30.672%,196.481,40.820%,21.194,4.403%,0.749
3,4,สถานีอุตุนิยมวิทยาร้อยเอ็ด,13981,147.242,30.442%,194.379,40.188%,22.951,4.745%,0.747
4,5,สถานีอุตุนิยมวิทยาอุบลราชธานี,13366,152.715,31.822%,201.736,42.037%,39.011,8.129%,0.751
5,6,สถานีอุตุนิยมวิทยานครพนม,14883,147.488,32.029%,196.31,42.631%,9.792,2.126%,0.742
6,7,ส่วนอุทกวิทยาที่ 1 จ.หนองคาย,14883,141.383,30.342%,189.568,40.683%,37.425,8.032%,0.774
7,8,สถานีอุตุนิยมวิทยาเลย,14350,147.284,31.915%,205.783,44.592%,-27.696,-6.002%,0.746
8,9,ศูนย์บริการวิชาการที่ 7 จ.เชียงใหม่,7638,151.322,35.909%,214.316,50.858%,2.604,0.618%,0.695
9,10,ศูนย์บริการวิชาการที่ 8 จ.พิษณุโลก,14760,129.966,27.245%,181.456,38.038%,2.524,0.529%,0.773


In [24]:
# cams_forecast vs DeDe (1 hr)
table_cams = pd.DataFrame()
for i, row in site_df.iterrows():
    df = pd.read_csv(f'../merged_data/cams_forecast_1hr/cams_forecast_1hr_site{row['Site_id']:02d}_2023.csv', parse_dates=['Datetime'])
    df.set_index('Datetime',inplace=True)
    df.dropna(inplace=True)
    df = df.between_time('07:00', '17:00')
    meas = df['ssrd'];esti = df['I']
    table_cams = pd.concat([table_cams, calculate_metrics(meas, esti, row['Site_id'], row['Station_name'])], ignore_index=True)
table_cams

Unnamed: 0,site_id,site_name,number of sample,MAE,nMAE,RMSE,nRMSE,MBE,nMBE,correlation
0,1,กรมพัฒนาพลังงานทดแทน และ อนุรักษ์พลังงาน กรุงเ...,3784,142.865,30.424%,195.687,41.672%,-36.335,-7.738%,0.753
1,2,สถานีอุตุนิยมวิทยานครราชสีมา,3564,141.985,30.412%,193.278,41.398%,3.494,0.748%,0.782
2,3,ศูนย์อุตุนิยมวิทยาภาคตะวันออกเฉียงเหนือตอนบน จ...,4004,145.321,31.833%,193.945,42.484%,19.201,4.206%,0.777
3,4,สถานีอุตุนิยมวิทยาร้อยเอ็ด,3751,145.111,31.664%,193.144,42.145%,17.995,3.927%,0.771
4,5,สถานีอุตุนิยมวิทยาอุบลราชธานี,3586,146.066,32.186%,193.767,42.697%,31.974,7.046%,0.785
5,6,สถานีอุตุนิยมวิทยานครพนม,3993,142.84,32.764%,190.051,43.593%,9.504,2.180%,0.776
6,7,ส่วนอุทกวิทยาที่ 1 จ.หนองคาย,3993,138.979,31.451%,185.947,42.079%,32.574,7.371%,0.798
7,8,สถานีอุตุนิยมวิทยาเลย,3850,144.154,32.917%,201.421,45.994%,-28.517,-6.512%,0.773
8,9,ศูนย์บริการวิชาการที่ 7 จ.เชียงใหม่,2049,147.366,36.785%,210.702,52.595%,-5.415,-1.352%,0.717
9,10,ศูนย์บริการวิชาการที่ 8 จ.พิษณุโลก,3960,128.002,28.238%,178.36,39.347%,-1.923,-0.424%,0.798
