In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy import stats

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


**Eval Model 1**

In [None]:
drop_columns = 'TA00075'

df_obs_daily = pd.read_csv("/content/gdrive/MyDrive/rainfall-forecast/merge_model/evaluation/final_station_data_model1.csv")
df_obs_daily = df_obs_daily.set_index('Unnamed: 0')
df_obs_daily = df_obs_daily.drop(index = (pd.date_range('2016-01-01','2020-01-03').strftime('%Y-%m-%d')))
df_obs_daily = df_obs_daily.drop(columns=drop_columns)


df_sat_daily = pd.read_csv("/content/gdrive/MyDrive/rainfall-forecast/merge_model/evaluation/final_model1.csv")
df_sat_daily = df_sat_daily.set_index('Unnamed: 0')
df_sat_daily = df_sat_daily.drop(columns=drop_columns)

df_obs_metadata = pd.read_csv('/content/gdrive/MyDrive/rainfall-forecast/station_data/Final results_csv/station_list_final_v2.csv')
df_obs_metadata = df_obs_metadata.set_index('STNID')

index = pd.Index(['Lat', 'Lon', 'RMSE', 'MAE', 'Bias', 'PearsonR', 'SpearmanR', 'R2', 'Probability of Detection', 'Probability of False Detection', 'False Alarm Rate', 'Frequency Bias'], name="rows")
column = df_obs_daily.columns

daily_metrics_df = pd.DataFrame(data=np.nan, index=index, columns=column)
monthly_metrics_df = pd.DataFrame(data=np.nan, index=index, columns=column)

for station, station_id in enumerate(df_obs_daily.columns):
  print(station)
  if station == 7 or station == 102:
    continue
  
  obs_daily = df_obs_daily[station_id]  
  sat_daily = df_sat_daily[station_id]

  obs_daily = obs_daily.to_numpy(copy=True)
  sat_daily = sat_daily.to_numpy(copy=True)

  obs_daily = obs_daily[~np.isnan(obs_daily)]
  sat_daily = sat_daily[~np.isnan(sat_daily)]

  n_of_sample = len(obs_daily)

  rmse = mean_squared_error(obs_daily, sat_daily, squared=False)
  mae = mean_absolute_error(obs_daily, sat_daily)
  pearsonr = stats.pearsonr(obs_daily, sat_daily)
  spearmanr = stats.spearmanr(obs_daily, sat_daily)
  rsq = r2_score(obs_daily, sat_daily)

  # correlation_matrix = np.corrcoef(obs_daily, sat_daily)
  # correlation_xy = correlation_matrix[0,1]
  # rsq = correlation_xy**2

  bias = sat_daily - obs_daily
  bias = bias.mean()

  obs_daily_bool = obs_daily.copy()
  sat_daily_bool = sat_daily.copy()

  obs_daily_bool[obs_daily_bool < 1] = 0
  obs_daily_bool[obs_daily_bool >= 1] = 1

  sat_daily_bool[sat_daily_bool < 1] = 0
  sat_daily_bool[sat_daily_bool >= 1] = 1

  category = np.empty([n_of_sample])

  for i in range(n_of_sample):
    if obs_daily_bool[i] == 1 and sat_daily_bool[i] == 1:
      category[i] = 1
    elif obs_daily_bool[i] == 1 and sat_daily_bool[i] == 0:
      category[i] = 2
    elif obs_daily_bool[i] == 0 and sat_daily_bool[i] == 1:
      category[i] = 3
    elif obs_daily_bool[i] == 0 and sat_daily_bool[i] == 0:
      category[i] = 4

  hit = np.count_nonzero(category == 1)
  miss = np.count_nonzero(category == 2)
  false_alarm = np.count_nonzero(category == 3)
  correct_negative = np.count_nonzero(category == 4)


  if hit + miss == 0:
    pod = np.nan
  else:
    pod = hit/(hit + miss)

  if miss + correct_negative == 0:
    pofd = np.nan
  else:
    pofd = miss/(miss + correct_negative)

  if hit + false_alarm == 0:
    far = np.nan
  else:
    far = false_alarm/(hit + false_alarm)

  if hit + miss == 0:
    freq_bias = np.nan
  else:
    freq_bias = (hit+false_alarm)/(hit+miss)


  #pod = hit/(hit + miss)
  #pofd = miss/(miss+correct_negative)
  #far = false_alarm/(hit + false_alarm)
  #freq_bias = (hit+false_alarm)/(hit+miss)

  daily_metrics_df[station_id].loc['Lat'] = df_obs_metadata['LATITUDE'].loc[station_id]
  daily_metrics_df[station_id].loc['Lon'] = df_obs_metadata['LONGITUDE'].loc[station_id]
  daily_metrics_df[station_id].loc['RMSE'] = round(rmse,2)
  daily_metrics_df[station_id].loc['MAE'] = round(mae,2)
  daily_metrics_df[station_id].loc['Bias'] = round(bias,2)
  daily_metrics_df[station_id].loc['PearsonR'] = round(pearsonr[0],2)
  daily_metrics_df[station_id].loc['SpearmanR'] = round(spearmanr[0],2)
  daily_metrics_df[station_id].loc['R2'] = round(rsq,2)
  daily_metrics_df[station_id].loc['Probability of Detection'] = round(pod,2)
  daily_metrics_df[station_id].loc['Probability of False Detection'] = round(pofd,2)
  daily_metrics_df[station_id].loc['False Alarm Rate'] = round(far,2)
  daily_metrics_df[station_id].loc['Frequency Bias'] = round(freq_bias,2)
                                  
  print(station_id)
  print("Number of Observation: ", n_of_sample)
  print("RMSE: ", str(round(rmse,1)))
  print("MAE: ", str(round(mae,1)))
  print("Bias: ", str(round(bias,1)))
  print("Rsquare: ", str(round(rsq,1)))
  print("Probability of Detection: ", pod)
  print("Probability of False Detection: ", pofd)
  print("False Alarm Rate: ", far)
  print("Frequency Bias: ", freq_bias)


  # # print(obs_daily)
  # # print(sat_daily)

  # # print(obs_daily_bool)
  # # print(sat_daily_bool)
  print(" ")

  # obs_monthly = df_obs_monthly[station_id]
  # sat_monthly = df_sat_monthly[station_id]

  # obs_monthly = obs_monthly.to_numpy()
  # sat_monthly = sat_monthly.to_numpy()

  # print(station, station_id)

daily_metrics_df = daily_metrics_df.T
daily_metrics_df.to_csv('/content/gdrive/MyDrive/rainfall-forecast/merge_model/evaluation/metrics_model_1_daily.csv')      

**Eval Model 2**

In [None]:
drop_columns = 'TA00075'

df_obs_daily = pd.read_csv("/content/gdrive/MyDrive/rainfall-forecast/merge_model/evaluation/final_station_data_model2.csv")
df_obs_daily = df_obs_daily.set_index('Unnamed: 0')
df_obs_daily = df_obs_daily.drop(index = (pd.date_range('2016-01-01','2020-01-03').strftime('%Y-%m-%d')))
df_obs_daily = df_obs_daily.drop(columns=drop_columns)


df_sat_daily = pd.read_csv("/content/gdrive/MyDrive/rainfall-forecast/merge_model/evaluation/final_model2.csv")
df_sat_daily = df_sat_daily.set_index('Unnamed: 0')
df_sat_daily = df_sat_daily.drop(columns=drop_columns)

df_obs_metadata = pd.read_csv('/content/gdrive/MyDrive/rainfall-forecast/station_data/Final results_csv/station_list_final_v2.csv')
df_obs_metadata = df_obs_metadata.set_index('STNID')

index = pd.Index(['Lat', 'Lon', 'RMSE', 'MAE', 'Bias', 'PearsonR', 'SpearmanR', 'R2', 'Probability of Detection', 'Probability of False Detection', 'False Alarm Rate', 'Frequency Bias'], name="rows")
column = df_obs_daily.columns

daily_metrics_df = pd.DataFrame(data=np.nan, index=index, columns=column)
monthly_metrics_df = pd.DataFrame(data=np.nan, index=index, columns=column)

for station, station_id in enumerate(df_obs_daily.columns):
  print(station)
  if station == 7 or station == 102:
    continue
  
  obs_daily = df_obs_daily[station_id]  
  sat_daily = df_sat_daily[station_id]

  obs_daily = obs_daily.to_numpy(copy=True)
  sat_daily = sat_daily.to_numpy(copy=True)

  obs_daily = obs_daily[~np.isnan(obs_daily)]
  sat_daily = sat_daily[~np.isnan(sat_daily)]

  n_of_sample = len(obs_daily)

  rmse = mean_squared_error(obs_daily, sat_daily, squared=False)
  mae = mean_absolute_error(obs_daily, sat_daily)
  pearsonr = stats.pearsonr(obs_daily, sat_daily)
  spearmanr = stats.spearmanr(obs_daily, sat_daily)
  rsq = r2_score(obs_daily, sat_daily)

  # correlation_matrix = np.corrcoef(obs_daily, sat_daily)
  # correlation_xy = correlation_matrix[0,1]
  # rsq = correlation_xy**2

  bias = sat_daily - obs_daily
  bias = bias.mean()

  obs_daily_bool = obs_daily.copy()
  sat_daily_bool = sat_daily.copy()

  obs_daily_bool[obs_daily_bool < 1] = 0
  obs_daily_bool[obs_daily_bool >= 1] = 1

  sat_daily_bool[sat_daily_bool < 1] = 0
  sat_daily_bool[sat_daily_bool >= 1] = 1

  category = np.empty([n_of_sample])

  for i in range(n_of_sample):
    if obs_daily_bool[i] == 1 and sat_daily_bool[i] == 1:
      category[i] = 1
    elif obs_daily_bool[i] == 1 and sat_daily_bool[i] == 0:
      category[i] = 2
    elif obs_daily_bool[i] == 0 and sat_daily_bool[i] == 1:
      category[i] = 3
    elif obs_daily_bool[i] == 0 and sat_daily_bool[i] == 0:
      category[i] = 4

  hit = np.count_nonzero(category == 1)
  miss = np.count_nonzero(category == 2)
  false_alarm = np.count_nonzero(category == 3)
  correct_negative = np.count_nonzero(category == 4)


  if hit + miss == 0:
    pod = np.nan
  else:
    pod = hit/(hit + miss)

  if miss + correct_negative == 0:
    pofd = np.nan
  else:
    pofd = miss/(miss + correct_negative)

  if hit + false_alarm == 0:
    far = np.nan
  else:
    far = false_alarm/(hit + false_alarm)

  if hit + miss == 0:
    freq_bias = np.nan
  else:
    freq_bias = (hit+false_alarm)/(hit+miss)


  #pod = hit/(hit + miss)
  #pofd = miss/(miss+correct_negative)
  #far = false_alarm/(hit + false_alarm)
  #freq_bias = (hit+false_alarm)/(hit+miss)

  daily_metrics_df[station_id].loc['Lat'] = df_obs_metadata['LATITUDE'].loc[station_id]
  daily_metrics_df[station_id].loc['Lon'] = df_obs_metadata['LONGITUDE'].loc[station_id]
  daily_metrics_df[station_id].loc['RMSE'] = round(rmse,2)
  daily_metrics_df[station_id].loc['MAE'] = round(mae,2)
  daily_metrics_df[station_id].loc['Bias'] = round(bias,2)
  daily_metrics_df[station_id].loc['PearsonR'] = round(pearsonr[0],2)
  daily_metrics_df[station_id].loc['SpearmanR'] = round(spearmanr[0],2)
  daily_metrics_df[station_id].loc['R2'] = round(rsq,2)
  daily_metrics_df[station_id].loc['Probability of Detection'] = round(pod,2)
  daily_metrics_df[station_id].loc['Probability of False Detection'] = round(pofd,2)
  daily_metrics_df[station_id].loc['False Alarm Rate'] = round(far,2)
  daily_metrics_df[station_id].loc['Frequency Bias'] = round(freq_bias,2)
                                  
  print(station_id)
  print("Number of Observation: ", n_of_sample)
  print("RMSE: ", str(round(rmse,1)))
  print("MAE: ", str(round(mae,1)))
  print("Bias: ", str(round(bias,1)))
  print("Rsquare: ", str(round(rsq,1)))
  print("Probability of Detection: ", pod)
  print("Probability of False Detection: ", pofd)
  print("False Alarm Rate: ", far)
  print("Frequency Bias: ", freq_bias)


  # # print(obs_daily)
  # # print(sat_daily)

  # # print(obs_daily_bool)
  # # print(sat_daily_bool)
  print(" ")

  # obs_monthly = df_obs_monthly[station_id]
  # sat_monthly = df_sat_monthly[station_id]

  # obs_monthly = obs_monthly.to_numpy()
  # sat_monthly = sat_monthly.to_numpy()

  # print(station, station_id)

daily_metrics_df = daily_metrics_df.T
daily_metrics_df.to_csv('/content/gdrive/MyDrive/rainfall-forecast/merge_model/evaluation/metrics_model_2_daily.csv')

In [2]:
eval_1 = pd.read_csv('/content/gdrive/MyDrive/rainfall-forecast/merge_model/evaluation/metrics_model_1_daily.csv')
eval_2 = pd.read_csv('/content/gdrive/MyDrive/rainfall-forecast/merge_model/evaluation/metrics_model_2_daily.csv')

In [3]:
eval_1.mean(), eval_2.mean()

(Lat          -2.645810
 Lon          33.582983
 RMSE          9.036800
 MAE           5.340500
 Bias          0.272800
 PearsonR     -0.211900
 SpearmanR    -0.292900
 R2           -0.130400
 POD           0.964000
 POFD          0.703871
 FAR           0.708000
 FB            3.737300
 dtype: float64, Lat          -2.645810
 Lon          33.582983
 RMSE          9.048600
 MAE           5.467900
 Bias          0.572500
 PearsonR     -0.026900
 SpearmanR     0.254800
 R2           -0.151400
 POD           0.908900
 POFD          0.602959
 FAR           0.716400
 FB            3.628500
 dtype: float64)

In [4]:
tamsat = np.mean(pd.read_csv('/content/gdrive/MyDrive/rainfall-forecast/verification_validation/metrics/metrics_tamsat_daily_2020.csv'))
imerg = np.mean(pd.read_csv('/content/gdrive/MyDrive/rainfall-forecast/verification_validation/metrics/metrics_imerg_daily_2020.csv'))
chirps = np.mean(pd.read_csv('/content/gdrive/MyDrive/rainfall-forecast/verification_validation/metrics/metrics_chirps_daily_2020.csv'))
cmorph = np.mean(pd.read_csv('/content/gdrive/MyDrive/rainfall-forecast/verification_validation/metrics/metrics_cmorph_daily_2020.csv'))

In [None]:
tamsat

Lat                              -12.449049
Lon                               23.514934
RMSE                               9.965980
MAE                                4.253922
Bias                              -1.906667
PearsonR                           0.068529
SpearmanR                          0.125490
R2                                -0.440000
Probability of Detection           0.223824
Probability of False Detection     0.274314
False Alarm Rate                   0.577941
Frequency Bias                     0.557353
dtype: float64

In [None]:
imerg

Lat                               -2.681218
Lon                               33.638844
RMSE                              10.569208
MAE                                4.599109
Bias                               0.450198
PearsonR                           0.337525
SpearmanR                          0.461485
R2                                -0.916832
Probability of Detection           0.680693
Probability of False Detection     0.151782
False Alarm Rate                   0.459208
Frequency Bias                     1.341782
dtype: float64

In [None]:
chirps

Lat                               -2.647849
Lon                               33.589548
RMSE                              11.564646
MAE                                5.462020
Bias                               0.299899
PearsonR                           0.247273
SpearmanR                          0.290707
R2                                -1.119091
Probability of Detection           0.478081
Probability of False Detection     0.235960
False Alarm Rate                   0.477778
Frequency Bias                     0.968283
dtype: float64

In [None]:
cmorph

Lat         -12.449049
Lon          23.514934
ObsNumber          NaN
RMSE         18.474020
MAE           7.079510
Bias          3.336667
PearsonR      0.279804
SpearmanR     0.409804
R2           -7.164804
POD                NaN
POFD               NaN
FAR                NaN
FB                 NaN
dtype: float64