In [1]:
import numpy as np
import pickle
import pandas as pd
import xarray as xr

def save_pickle(data,output_name):
    with open(output_name, 'wb') as file:
        pickle.dump(data, file)

def load_pickle(filename):
    with open(filename, 'rb') as file:
        # Use pickle.load() to deserialize and load the data
        loaded_data = pickle.load(file)
    return loaded_data

def build_df_station(stasiun,elevasi,jarak,latitude,longitude,T,pu_gpm_station, pu_station):
    df = pd.DataFrame({"Nama Stasiun":stasiun,
      "Elevasi": elevasi,
      "Jarak": jarak,
      "latitude":latitude,
      "longitude":longitude,
      "T": T,
      "pu_gpm":pu_gpm_station,
      "pu_station": pu_station})
    
    return df

def concat_df(df1,df2):
    output = pd.concat([df1,df2], ignore_index = True)
    return output

def build_dataset(df_stations, pu_stations, pu_gpm_at_stations,coord_gpm_at_station):
    df = 0
    T = np.arange(2,101)
    for n,station in enumerate(pu_stations):
        feature_station =  df_stations[df_stations["Nama Stasiun"] == station][["Elevasi","Jarak","Lintang","Bujur"]].values[0]
        station_99 = [station]*len(T)
        elevasi = [feature_station[0]]*len(T)
        jarak = [feature_station[1]]*len(T)
        latitude = [coord_gpm_at_station[station][0]]*len(T)
        longitude = [coord_gpm_at_station[station][1]]*len(T)
        pu_gpm_station = pu_gpm_at_stations[station]
        pu_station = pu_stations[station]
        #print(len(station_99),len(elevasi),len(jarak),len(latitude),len(longitude),len(pu_gpm_station),len(pu_station))
        if n == 0:
            df = build_df_station(station_99,elevasi,jarak,latitude,longitude,T,pu_gpm_station, pu_station)
        else:
            df_station = build_df_station(station_99,elevasi,jarak,latitude,longitude,T,pu_gpm_station, pu_station)
            df = concat_df(df,df_station)
    return df

def get_index_coord(lats, lons, lat, lon):
    lats1 = abs(lats - lat)
    lons1 = abs(lons - lon)
    
    idx_lat = np.argmin(lats1)
    idx_lon = np.argmin(lons1)

    min_lats = lats[idx_lat]
    min_lons = lons[idx_lon]
    
    return idx_lat,idx_lon, min_lats, min_lons
    
def get_pu_gpm_at_station(df_stations, pu_station, pu_gpm):
    lats = pu_gpm['latitude'].values
    lons = pu_gpm['longitude'].values
    pu_gpm_val = pu_gpm['periode_ulang'].values
    coord_gpm_at_station = {}
    pu_gpm_at_station = {}
    for station in pu_station:
        latlon = df_stations[df_stations['Nama Stasiun'] == station][["Lintang","Bujur"]].values
        lat,lon = latlon[0][0], latlon[0][1]
        idx_lat, idx_lon, min_lat, min_lon = get_index_coord(lats, lons, lat, lon)
        coord_gpm_at_station[station] = [min_lat, min_lon]
        pu_gpm_at_station[station] = pu_gpm_val[:,idx_lon, idx_lat]
    return pu_gpm_at_station,coord_gpm_at_station

In [3]:
df = pd.read_excel("C:/Users/62812/Documents/Kerjaan Meteorologi/GPM-Correction/GPM-Correction/data/elevasi_jarak_obs.xlsx")
pu_gpm_indonesia = xr.open_dataset("C:/Users/62812/Documents/Kerjaan Meteorologi/Periode Ulang/Nilai Periode Ulang Indonesia.nc")
pu_stasiun_indonesia = load_pickle("C:/Users/62812/Documents/Kerjaan Meteorologi/GPM-Correction/GPM-Correction/research/Nilai Periode Ulang Stasiun Indonesia.pickle")

In [5]:
pu_gpm_at_stations, coord_gpm_at_station = get_pu_gpm_at_station(df_stations = df, 
                                           pu_station = pu_stasiun_indonesia,
                                           pu_gpm = pu_gpm_indonesia)

In [12]:
dataset = build_dataset(df,pu_stasiun_indonesia,pu_gpm_at_stations, coord_gpm_at_station)

dataset.to_csv("clean dataset.csv")

## Data Preperation II

In [9]:
#read data
df = pd.read_excel("C:/Users/62812/Documents/Kerjaan Meteorologi/GPM-Correction/GPM-Correction/data/elevasi_jarak_obs.xlsx")
path_ann_max_gpm = "C:/Users/62812/Documents/Kerjaan Meteorologi/Data/annual max gpm.nc"

annual_max_gpm = xr.open_dataset(path_ann_max_gpm)
lats = annual_max_gpm['latitude'].values
lons = annual_max_gpm['longitude'].values

ann_max_values = annual_max_gpm['__xarray_dataarray_variable__'].values

arr_ann_max_gpm = ann_max_values[1:-1,:,:]

In [10]:
T = [2,5,10,25,50,100]
idx_pu = []
for n,i in enumerate(range(1,101)):
    if i in T:
        idx_pu.append(n-1)

In [11]:
#load periode ulang semua stasiun dan semua T
pu_stasiun_indonesia = load_pickle("C:/Users/62812/Documents/Kerjaan Meteorologi/GPM-Correction/GPM-Correction/research/Nilai Periode Ulang Stasiun Indonesia.pickle")

#Ambil nilai periode ulang yang sesuai T saja
pu_stasiun = {}
for key,val in pu_stasiun_indonesia.items():
    pu_stasiun[key] = np.array(val)[idx_pu]

In [12]:
def get_ann_max_gpm_at_station(df_stations, pu_station, ann_max_gpm, lats, lons):
    coord_gpm_at_station = {}
    ann_max_gpm_at_stasiun = {}
    for station in pu_station:
        latlon = df_stations[df_stations['Nama Stasiun'] == station][["Lintang","Bujur"]].values
        lat,lon = latlon[0][0], latlon[0][1]
        idx_lat, idx_lon, min_lat, min_lon = get_index_coord(lats, lons, lat, lon)
        coord_gpm_at_station[station] = [min_lat, min_lon]
        ann_max_gpm_at_stasiun[station] = ann_max_gpm[:,idx_lon, idx_lat]
    return ann_max_gpm_at_stasiun,coord_gpm_at_station

In [13]:
ann_max_gpm_at_stasiun,coord_gpm_at_station = get_ann_max_gpm_at_station(df, pu_stasiun, arr_ann_max_gpm, lats, lons)

In [14]:
output_dataset = {}
for stasiun in pu_stasiun:
    output_dataset[stasiun] = (ann_max_gpm_at_stasiun[stasiun], pu_stasiun[stasiun])

In [35]:
#save_pickle(data = output_dataset, output_name = "Dataset Approach 2.pkl")

### Data Preparation 3

In [15]:
#read data
df = pd.read_excel("C:/Users/62812/Documents/Kerjaan Meteorologi/GPM-Correction/GPM-Correction/data/elevasi_jarak_obs.xlsx")
path_ann_max_gpm = "C:/Users/62812/Documents/Kerjaan Meteorologi/Data/annual max gpm.nc"

annual_max_gpm = xr.open_dataset(path_ann_max_gpm)
lats = annual_max_gpm['latitude'].values
lons = annual_max_gpm['longitude'].values

ann_max_values = annual_max_gpm['__xarray_dataarray_variable__'].values

arr_ann_max_gpm = ann_max_values[1:-1,:,:]

In [16]:
def get_ann_max_grid_gpm_at_station(df_stations, pu_station, ann_max_gpm, lats, lons, number_of_grid):
    coord_gpm_at_station = {}
    ann_max_gpm_at_stasiun = {}
    for station in pu_station:
        latlon = df_stations[df_stations['Nama Stasiun'] == station][["Lintang","Bujur"]].values
        lat,lon = latlon[0][0], latlon[0][1]
        idx_lat, idx_lon, min_lat, min_lon = get_index_coord(lats, lons, lat, lon)
        coord_gpm_at_station[station] = [min_lat, min_lon]
        idx_lon_start, idx_lon_end = idx_lon - (int(number_of_grid/2)), idx_lon + (int(number_of_grid/2)+1)
        idx_lat_start, idx_lat_end = idx_lat - (int(number_of_grid/2)), idx_lat + (int(number_of_grid/2)+1)
        ann_max_gpm_at_stasiun[station] = ann_max_gpm[:,idx_lon_start:idx_lon_end, idx_lat_start:idx_lat_end]
    return ann_max_gpm_at_stasiun,coord_gpm_at_station

In [17]:
ann_max_gpm_at_stasiun,coord_gpm_at_station = get_ann_max_grid_gpm_at_station(df, pu_stasiun, arr_ann_max_gpm, lats, lons,3)

In [21]:
dataset3 = {}
for key,val in pu_stasiun.items():
    dataset3[key] = (ann_max_gpm_at_stasiun[key],pu_stasiun[key])

In [25]:
save_pickle(data = dataset3, output_name = "Dataset Approach 3.pkl")