Comparison of bands c1, c2 and c3 for GHI estimation at DRA using S4+M3

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from sklearn import metrics
plt.rcParams["font.family"] = "Times New Roman"

import warnings
warnings.filterwarnings("ignore")

In [2]:
def ci_to_csi3(x):
    
    csi_3 = []
    for i in x:
        if i < -0.2:
            csi_3.append(1.2)
        if i >= -0.2 and i < 0.8:
            csi_3.append(1-i)
        if i >= 0.8 and i < 1.1:
            csi = 2.0667 - 3.6667*i + 1.6667*i*i
            csi_3.append(csi)
        if i >= 1.1:
            csi_3.append(0.05)
            
    return csi_3

In [3]:
df_1m = pd.read_csv('c01_ci_month.csv')
df_1m['time'] = pd.to_datetime(df_1m['time'])
df_1m = df_1m.dropna()
df_1m = df_1m.set_index('time')
df_1m = df_1m.drop(columns=['time.1'])
csi = ci_to_csi3(list(df_1m['ci'].values))
df_1m['csi'] = csi
df_1m.head()

Unnamed: 0_level_0,ci,high,low,csi
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-01 16:05:00,0.007861,543.537684,243.298115,0.992139
2019-01-01 16:10:00,0.073763,543.537684,220.883821,0.926237
2019-01-01 16:15:00,0.019996,543.537684,237.920832,0.980004
2019-01-01 16:20:00,0.005287,543.537684,242.065771,0.994713
2019-01-01 16:25:00,0.017061,543.537684,238.324334,0.982939


In [4]:
df_2m = pd.read_csv('c02_ci_month.csv')
df_2m['time'] = pd.to_datetime(df_2m['time'])
df_2m = df_2m.dropna()
df_2m = df_2m.set_index('time')
df_2m = df_2m.drop(columns=['time.1'])
csi = ci_to_csi3(list(df_2m['ci'].values))
df_2m['csi'] = csi
df_2m.head()

Unnamed: 0_level_0,ci,high,low,csi
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-01 16:05:00,0.013135,415.861379,128.799757,0.986865
2019-01-01 16:10:00,0.067965,415.861379,115.346744,0.932035
2019-01-01 16:15:00,0.013846,415.861379,137.513389,0.986154
2019-01-01 16:20:00,-0.009069,415.861379,145.143145,1.009069
2019-01-01 16:25:00,0.013379,415.861379,141.295004,0.986621


In [5]:
df_3m = pd.read_csv('c03_ci_month.csv')
df_3m['time'] = pd.to_datetime(df_3m['time'])
df_3m = df_3m.dropna()
df_3m = df_3m.set_index('time')
df_3m = df_3m.drop(columns=['time.1'])
csi = ci_to_csi3(list(df_3m['ci'].values))
df_3m['csi'] = csi
df_3m.head()

Unnamed: 0_level_0,ci,high,low,csi
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-01 16:05:00,0.070286,264.945409,91.070463,0.929714
2019-01-01 16:10:00,0.172573,264.945409,74.283494,0.827427
2019-01-01 16:15:00,0.069412,264.945409,96.714001,0.930588
2019-01-01 16:20:00,0.038903,264.945409,104.515138,0.961097
2019-01-01 16:25:00,0.095777,264.945409,98.172615,0.904223


In [10]:
# read the ghi values of clear-sky models and measurements
df_g = pd.read_csv('dra_ghics_ghi_2019.csv')
df_g['time'] = pd.to_datetime(df_g['Unnamed: 0'])
df_g = df_g.set_index(df_g['time'])
df_g = df_g.drop(columns=['time','Unnamed: 0'])
df_g = df_g.dropna()
df_g.head()

Unnamed: 0_level_0,ghics_pvlib,ghics_tl,ghics_mc,zen,ghi,ghi_ns,ghics_rest
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-01 16:05:00,140.300612,156.313124,184.7688,79.396,154.18,174,174
2019-01-01 16:10:00,155.411056,171.977689,200.6832,78.628,168.46,189,189
2019-01-01 16:15:00,170.499527,187.561776,216.462,77.87,182.98,44,203
2019-01-01 16:20:00,185.519533,203.026862,232.0884,77.124,197.4,218,218
2019-01-01 16:25:00,200.432172,218.340727,247.536,76.386,211.72,52,233


In [11]:
index_c = [index for index in df_g.index if index in df_1m.index and index in df_2m.index and index in df_3m.index]
len(index_c)

15083

In [12]:
ghi_1m = df_1m.loc[index_c,'csi']*df_g.loc[index_c,'ghics_rest']
ghi_2m = df_2m.loc[index_c,'csi']*df_g.loc[index_c,'ghics_rest']
ghi_3m = df_3m.loc[index_c,'csi']*df_g.loc[index_c,'ghics_rest']
ghi = df_g.loc[index_c,'ghi']
ghi_ns = df_g.loc[index_c,'ghi_ns'] # data of NSRDB

In [15]:
rmse1 = np.sqrt(metrics.mean_squared_error(ghi_1m,ghi))
rmse2 = np.sqrt(metrics.mean_squared_error(ghi_2m,ghi))
rmse3 = np.sqrt(metrics.mean_squared_error(ghi_3m,ghi))
rmse4 = np.sqrt(metrics.mean_squared_error(ghi_ns,ghi))
mean = np.mean(ghi)
rmse1,rmse2,rmse3,rmse4,mean

(109.29986333421894,
 117.65165453853054,
 123.96612580293362,
 110.72164658204447,
 509.16306570310877)

In [16]:
mbe1 = np.mean(ghi_1m - ghi)
mbe2 = np.mean(ghi_2m - ghi)
mbe3 = np.mean(ghi_3m - ghi)
mbe4 = np.mean(ghi_ns - ghi)
mbe1,mbe2,mbe3,mbe4,mean

(7.179440601954184,
 15.196252126182278,
 22.725554801347517,
 -13.412220380560957,
 509.16306570310877)