In [None]:
%load_ext autoreload
%autoreload 2
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from tqdm import tqdm
import time


from utils.load import load_radar_dataset, load_raingauge_dataset, load_cml_dataset, get_gauge_coordinate_mappings, read_config
from models.idw import run_IDW_benchmark
from models.kriging import kriging_external_drift

In [None]:
config = read_config('config.yaml')

In [None]:
radar_df = load_radar_dataset(folder_name='test')
raingauge_df = load_raingauge_dataset('rainfall_data.csv', N=0)
cml_df = load_cml_dataset('CML_data_processed_2025.nc')
print(radar_df.shape)
print(raingauge_df.shape)
print(cml_df.shape)

In [None]:
print(raingauge_df)

# IDW Interpolation with rain gauge

In [None]:
#visualise training split

In [None]:
raingauge_df_5mins = raingauge_df.mul(12)
raingauge_df_15mins = raingauge_df.resample('15min').sum().mul(4) #resamples to 15 mins and converts to mm/hour rainfall rate
station_dict = get_gauge_coordinate_mappings()
raingauge_choice = raingauge_df_5mins

idw_RMSE = run_IDW_benchmark(raingauge_choice, 
                             station_dict, 
                             power=1, 
                             training_split=config['dataset_parameters']['train_size'], 
                             visualise_split=True, 
                             seed=111, 
                             loss_hist=True)

# Kriging interpolation with rain gauge

In [None]:
random.seed(111)
total_RMSE_loss = 0.0
invalid_kriges = 0
count = 0
training_ratio = config['dataset_parameters']['train_size']
station_names = []
station_dict = get_gauge_coordinate_mappings()
for key in station_dict.keys():
  station_names.append(key)

training_stations = random.sample(station_names, int(len(station_names) * training_ratio))
validation_stations = [s for s in station_names if s not in training_stations]

loss_arr = []

start = time.time()

for i in tqdm(range(len(raingauge_choice))):
  count += 1
  df = raingauge_df_5mins.iloc[i].fillna(0)

  kriging_result, keiging_variance = kriging_external_drift(df=df, 
                                                            station_names=training_stations, 
                                                            station_dict=station_dict, 
                                                            variogram_model='linear', 
                                                            method='universal')
  # print(kriging_result) #kriging_result[row][col]
  # plt.imshow(kriging_result, origin='lower')
  if kriging_result is None:
    invalid_kriges += 1
    continue

  #Calculate loss
  RSE_loss = 0.0
  station_count = 0
  for validation_station in validation_stations:
    rain_gauge_value = df[validation_station]
    lat, long = station_dict[validation_station]
    row = math.floor((lat - 1.14) / 0.01)
    col = math.floor((long - 103.6) / 0.01)
    kriged_value = kriging_result[row][col]

    error = np.sqrt((kriged_value - rain_gauge_value) ** 2)
    RSE_loss += error
    loss_arr.append(error)
    station_count += 1

  RMSE = RSE_loss / station_count

  total_RMSE_loss += RMSE
  # print(f"RMSE: {RMSE}")

end = time.time()

print(f"invalid kriges: {invalid_kriges}")
print(f"final average loss: {total_RMSE_loss / (len(raingauge_choice)-invalid_kriges)}")
print(f"final average loss (0 rain = 0 loss): {total_RMSE_loss / (len(raingauge_choice))}")
print(f"Time taken = {end - start}")

plt.figure(figsize=(15,8))
plt.hist(loss_arr, bins=30, log=True)
plt.show()


In [None]:
print(df)