In [None]:
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy import stats

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
drop_columns = 'TA00075'


df_obs_daily = pd.read_csv("/content/gdrive/MyDrive/rainfall-forecast/verification_validation/final_station_data_daily_v2_cmorph.csv")
df_obs_daily = df_obs_daily.set_index('Unnamed: 0')
df_obs_daily = df_obs_daily.loc['2020-01-04':'2020-12-31']
df_obs_daily = df_obs_daily.drop(columns=drop_columns)


df_sat_daily = pd.read_csv("/content/gdrive/MyDrive/rainfall-forecast/verification_validation/final_cmorph_data_daily_v2.csv")
df_sat_daily = df_sat_daily.set_index('Unnamed: 0')
df_sat_daily = df_sat_daily.loc['2020-01-04':'2020-12-31']
df_sat_daily = df_sat_daily.drop(columns=drop_columns)

df_obs_metadata = pd.read_csv('/content/gdrive/MyDrive/rainfall-forecast/station_data/Final results_csv/station_list_final_v2.csv')
df_obs_metadata = df_obs_metadata.set_index('STNID')

In [5]:
index = pd.Index(['Lat', 'Lon', 'ObsNumber', 'RMSE', 'MAE', 'Bias', 'PearsonR', 'SpearmanR', 'R2', 'POD', 'POFD', 'FAR', 'FB'], name="rows")
column = df_obs_daily.columns

daily_metrics_df = pd.DataFrame(data=np.nan, index=index, columns=column)
monthly_metrics_df = pd.DataFrame(data=np.nan, index=index, columns=column)

for station, station_id in enumerate(df_obs_daily.columns):
  print(station)
  # if station == 102:
  #   continue
  
  obs_daily = df_obs_daily[station_id]  
  sat_daily = df_sat_daily[station_id]

  obs_daily = obs_daily.to_numpy(copy=True)
  sat_daily = sat_daily.to_numpy(copy=True)

  obs_daily = obs_daily[~np.isnan(obs_daily)]
  sat_daily = sat_daily[~np.isnan(sat_daily)]

  n_of_sample = len(obs_daily)

  rmse = mean_squared_error(obs_daily, sat_daily, squared=False)
  mae = mean_absolute_error(obs_daily, sat_daily)
  pearsonr = stats.pearsonr(obs_daily, sat_daily)
  spearmanr = stats.spearmanr(obs_daily, sat_daily)
  rsq = r2_score(obs_daily, sat_daily)

  # correlation_matrix = np.corrcoef(obs_daily, sat_daily)
  # correlation_xy = correlation_matrix[0,1]
  # rsq = correlation_xy**2

  bias = sat_daily - obs_daily
  bias = bias.mean()

  obs_daily_bool = obs_daily.copy()
  sat_daily_bool = sat_daily.copy()

  obs_daily_bool[obs_daily_bool < 1] = 0
  obs_daily_bool[obs_daily_bool >= 1] = 1

  sat_daily_bool[sat_daily_bool < 1] = 0
  sat_daily_bool[sat_daily_bool >= 1] = 1

  category = np.empty([n_of_sample])

  for i in range(n_of_sample):
    if obs_daily_bool[i] == 1 and sat_daily_bool[i] == 1:
      category[i] = 1
    elif obs_daily_bool[i] == 1 and sat_daily_bool[i] == 0:
      category[i] = 2
    elif obs_daily_bool[i] == 0 and sat_daily_bool[i] == 1:
      category[i] = 3
    elif obs_daily_bool[i] == 0 and sat_daily_bool[i] == 0:
      category[i] = 4

  hit = np.count_nonzero(category == 1)
  miss = np.count_nonzero(category == 2)
  false_alarm = np.count_nonzero(category == 3)
  correct_negative = np.count_nonzero(category == 4)

  pod = hit/(hit + miss)
  pofd = miss/(miss+correct_negative)
  far = false_alarm/(hit + false_alarm)
  freq_bias = (hit+false_alarm)/(hit+miss)

  daily_metrics_df[station_id].loc['Lat'] = df_obs_metadata['LATITUDE'].loc[station_id]
  daily_metrics_df[station_id].loc['Lon'] = df_obs_metadata['LONGITUDE'].loc[station_id]
  daily_metrics_df[station_id].loc['RMSE'] = round(rmse,2)
  daily_metrics_df[station_id].loc['MAE'] = round(mae,2)
  daily_metrics_df[station_id].loc['Bias'] = round(bias,2)
  daily_metrics_df[station_id].loc['PearsonR'] = round(pearsonr[0],2)
  daily_metrics_df[station_id].loc['SpearmanR'] = round(spearmanr[0],2)
  daily_metrics_df[station_id].loc['R2'] = round(rsq,2)
  daily_metrics_df[station_id].loc['Probability of Detection'] = round(pod,2)
  daily_metrics_df[station_id].loc['Probability of False Detection'] = round(pofd,2)
  daily_metrics_df[station_id].loc['False Alarm Rate'] = round(far,2)
  daily_metrics_df[station_id].loc['Frequency Bias'] = round(freq_bias,2)
                                  
  print(station_id)
  print("Number of Observation: ", n_of_sample)
  print("RMSE: ", str(round(rmse,1)))
  print("MAE: ", str(round(mae,1)))
  print("Bias: ", str(round(bias,1)))
  print("Rsquare: ", str(round(rsq,1)))
  print("Probability of Detection: ", pod)
  print("Probability of False Detection: ", pofd)
  print("False Alarm Rate: ", far)
  print("Frequency Bias: ", freq_bias)


  # # print(obs_daily)
  # # print(sat_daily)

  # # print(obs_daily_bool)
  # # print(sat_daily_bool)
  print(" ")

  # obs_monthly = df_obs_monthly[station_id]
  # sat_monthly = df_sat_monthly[station_id]

  # obs_monthly = obs_monthly.to_numpy()
  # sat_monthly = sat_monthly.to_numpy()

  # print(station, station_id)

daily_metrics_df = daily_metrics_df.T
daily_metrics_df.to_csv('/content/gdrive/MyDrive/rainfall-forecast/verification_validation/metrics/metrics_cmorph_daily_2020.csv')      

0
TA00073
Number of Observation:  302
RMSE:  16.7
MAE:  5.2
Bias:  3.8
Rsquare:  -11.9
Probability of Detection:  0.6219512195121951
Probability of False Detection:  0.14220183486238533
False Alarm Rate:  0.39285714285714285
Frequency Bias:  1.024390243902439
 
1
TA00024
Number of Observation:  302
RMSE:  20.2
MAE:  5.7
Bias:  4.1
Rsquare:  -10.5
Probability of Detection:  0.4888888888888889
Probability of False Detection:  0.19008264462809918
False Alarm Rate:  0.26666666666666666
Frequency Bias:  0.6666666666666666
 
2
TA00026
Number of Observation:  302
RMSE:  18.4
MAE:  7.2
Bias:  5.3
Rsquare:  -7.7
Probability of Detection:  0.5747126436781609
Probability of False Detection:  0.19473684210526315
False Alarm Rate:  0.5535714285714286
Frequency Bias:  1.2873563218390804
 
3
TA00070
Number of Observation:  301
RMSE:  24.7
MAE:  7.9
Bias:  4.8
Rsquare:  -6.3
Probability of Detection:  0.422680412371134
Probability of False Detection:  0.24778761061946902
False Alarm Rate:  0.453333333