In [1]:
# Imports + my helper functions

import sys
import os
import xarray as xr
import netCDF4 as nc
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import importlib

sys.path.append('/home/563/ls7238/code/australia_precipitation/module')
import helper_funcs
importlib.reload(helper_funcs)


<module 'helper_funcs' from '/home/563/ls7238/code/australia_precipitation/module/helper_funcs.py'>

In [20]:
# Make list of the file paths

base_dir = '/g/data/k10/dl6968/BoM_gauges/netcdf/30min_acc'  
nc_files = []

for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith('.nc'):
            nc_files.append(os.path.join(root, file))


  

In [23]:
# Function for reading an nc file, making annual means or totals, and then adding to a df.

def annual_rainfall_sum(nc_files):
    """
    Calculate annual rainfall statistics from half-hourly station NetCDF files.

    Parameters
    ----------
    nc_files : list of str
        List of NetCDF file paths (one per station).

    Returns
    -------
    pd.DataFrame
        Annual totals/means with years as index, stations as columns.
    """
    
    annual_data = {}

    for file in nc_files:
        ds = xr.open_dataset(file)
        rain = ds["prcp"]

        # Convert to pandas DataFrame with datetime index
        df = rain.to_pandas()
        df.index = pd.to_datetime(df.index)

        # Annual aggregation
        daily = df.resample("D").sum()
        yearly = daily.resample("YE").sum()

        # Station ID:
        station_id = ds.attrs.get("Station_Number")

        yearly.index = yearly.index.year
        annual_data[station_id] = yearly.squeeze()

        ds.close()

    # Combine into DataFrame
    annual_df = pd.concat(annual_data.values(), axis=1, keys=annual_data.keys())
    annual_df.index.name = "Year"
    annual_df = annual_df.sort_index()

    return annual_df


In [35]:
# Test set:

tester = ['/g/data/k10/dl6968/BoM_gauges/netcdf/30min_inst/NSW/NSW_068241.nc','/g/data/k10/dl6968/BoM_gauges/netcdf/30min_inst/NSW/NSW_061366.nc']

test = annual_rainfall_sum(tester)

In [36]:
test

Unnamed: 0_level_0,068241,061366
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1989,,0.0
1990,,0.0
1991,,0.0
1992,,0.0
1993,,0.0
1994,,0.0
1995,,10220.4
1996,,17274.4
1997,,24333.6
1998,,28291.4


In [43]:
# Clearly an error above, pulling out capital city longest running stations for testing

# ID: 066037	Name: SYDNEY AIRPORT AMO
# ID: 086282	Name: MELBOURNE AIRPORT
# ID: 014015	Name: DARWIN AIRPORT
# ID: 040913	Name: BRISBANE
# ID: 009225	Name: PERTH METRO
# ID: 094029	Name: HOBART (ELLERSLIE ROAD)

tester_stations_inst = ['/g/data/k10/dl6968/BoM_gauges/netcdf/30min_acc/NSW/NSW_066037.nc', #0
                        '/g/data/k10/dl6968/BoM_gauges/netcdf/30min_acc/VIC/VIC_086282.nc', #1
                        '/g/data/k10/dl6968/BoM_gauges/netcdf/30min_acc/NT/NT_014015.nc',   #2
                        '/g/data/k10/dl6968/BoM_gauges/netcdf/30min_acc/QLD/QLD_040913.nc',  #3
                        '/g/data/k10/dl6968/BoM_gauges/netcdf/30min_acc/WA/WA_009225.nc',    #4
                        '/g/data/k10/dl6968/BoM_gauges/netcdf/30min_acc/TAS/TAS_094029.nc']   #5

In [44]:
# Sydney

syd_ds = xr.open_dataset(tester_stations_inst[0])
syd_rain = syd_ds["prcp"]
syd_df = syd_rain.to_pandas()
syd_df.index = pd.to_datetime(syd_df.index)
syd_ds.close()

day_data = syd_df.loc["2021-03-20"]

day_data


time
2021-03-20 00:00:00     24.8
2021-03-20 00:30:00     29.2
2021-03-20 01:00:00     37.0
2021-03-20 01:30:00     40.6
2021-03-20 02:00:00     43.0
2021-03-20 02:30:00     43.4
2021-03-20 03:00:00     45.0
2021-03-20 03:30:00     64.4
2021-03-20 04:00:00     69.4
2021-03-20 04:30:00     73.6
2021-03-20 05:00:00     80.4
2021-03-20 05:30:00     98.4
2021-03-20 06:00:00     99.2
2021-03-20 06:30:00     99.2
2021-03-20 07:00:00     99.2
2021-03-20 07:30:00    102.0
2021-03-20 08:00:00    103.0
2021-03-20 08:30:00    103.6
2021-03-20 09:00:00    104.0
2021-03-20 09:30:00    104.8
2021-03-20 10:00:00    104.8
2021-03-20 10:30:00    104.8
2021-03-20 11:00:00    104.8
2021-03-20 11:30:00    104.8
2021-03-20 12:00:00    104.8
2021-03-20 12:30:00    105.0
2021-03-20 13:00:00    105.0
2021-03-20 13:30:00    105.0
2021-03-20 14:00:00    105.0
2021-03-20 14:30:00    105.0
2021-03-20 15:00:00    105.0
2021-03-20 15:30:00    105.2
2021-03-20 16:00:00    105.4
2021-03-20 16:30:00    105.4
2021-03-2

In [45]:
# Melbourne

mel_ds = xr.open_dataset(tester_stations_inst[1])
mel_rain = mel_ds["prcp"]
mel_df = mel_rain.to_pandas()
mel_df.index = pd.to_datetime(mel_df.index)
mel_ds.close()

day_data = mel_df.loc["2005-02-02"]

day_data


time
2005-02-02 00:00:00      8.2
2005-02-02 00:30:00      NaN
2005-02-02 01:00:00      NaN
2005-02-02 01:30:00     11.8
2005-02-02 02:00:00      NaN
2005-02-02 02:30:00      NaN
2005-02-02 03:00:00      NaN
2005-02-02 03:30:00      NaN
2005-02-02 04:00:00     19.8
2005-02-02 04:30:00     22.6
2005-02-02 05:00:00     24.6
2005-02-02 05:30:00     26.8
2005-02-02 06:00:00     29.2
2005-02-02 06:30:00     32.0
2005-02-02 07:00:00      NaN
2005-02-02 07:30:00     42.0
2005-02-02 08:00:00     43.2
2005-02-02 08:30:00     44.2
2005-02-02 09:00:00     45.4
2005-02-02 09:30:00      NaN
2005-02-02 10:00:00     48.6
2005-02-02 10:30:00     50.6
2005-02-02 11:00:00     53.2
2005-02-02 11:30:00     57.2
2005-02-02 12:00:00      NaN
2005-02-02 12:30:00     64.2
2005-02-02 13:00:00     67.0
2005-02-02 13:30:00     71.0
2005-02-02 14:00:00     74.4
2005-02-02 14:30:00      NaN
2005-02-02 15:00:00     81.6
2005-02-02 15:30:00      NaN
2005-02-02 16:00:00      NaN
2005-02-02 16:30:00      NaN
2005-02-0

In [46]:
# ID: 014015	Name: DARWIN AIRPORT

dar_ds = xr.open_dataset(tester_stations_inst[2])
dar_rain = dar_ds["prcp"]
dar_df = dar_rain.to_pandas()
dar_df.index = pd.to_datetime(dar_df.index)
dar_ds.close()

day_data = dar_df.loc["2005-02-02"]

day_data

time
2005-02-02 00:00:00     0.0
2005-02-02 00:30:00     0.0
2005-02-02 01:00:00     0.0
2005-02-02 01:30:00     0.0
2005-02-02 02:00:00     0.0
2005-02-02 02:30:00     NaN
2005-02-02 03:00:00     6.6
2005-02-02 03:30:00     7.2
2005-02-02 04:00:00     7.6
2005-02-02 04:30:00     7.6
2005-02-02 05:00:00     7.6
2005-02-02 05:30:00     7.6
2005-02-02 06:00:00     7.6
2005-02-02 06:30:00     7.8
2005-02-02 07:00:00     7.8
2005-02-02 07:30:00     7.8
2005-02-02 08:00:00     7.8
2005-02-02 08:30:00     7.8
2005-02-02 09:00:00     NaN
2005-02-02 09:30:00    19.8
2005-02-02 10:00:00    20.2
2005-02-02 10:30:00    20.2
2005-02-02 11:00:00    20.2
2005-02-02 11:30:00    20.4
2005-02-02 12:00:00    20.4
2005-02-02 12:30:00    20.4
2005-02-02 13:00:00    20.4
2005-02-02 13:30:00    20.4
2005-02-02 14:00:00    20.4
2005-02-02 14:30:00    20.4
2005-02-02 15:00:00    20.4
2005-02-02 15:30:00    20.4
2005-02-02 16:00:00    20.4
2005-02-02 16:30:00    20.4
2005-02-02 17:00:00    20.4
2005-02-02 17:3

In [47]:
# Brisbane

dar_ds = xr.open_dataset(tester_stations_inst[3])
dar_rain = dar_ds["prcp"]
dar_df = dar_rain.to_pandas()
dar_df.index = pd.to_datetime(dar_df.index)
dar_ds.close()

day_data = dar_df.loc["2022-02-26"]

day_data

time
2022-02-26 00:00:00      3.6
2022-02-26 00:30:00      5.2
2022-02-26 01:00:00      6.6
2022-02-26 01:30:00      7.6
2022-02-26 02:00:00     12.8
2022-02-26 02:30:00     17.2
2022-02-26 03:00:00     19.4
2022-02-26 03:30:00     22.8
2022-02-26 04:00:00     31.6
2022-02-26 04:30:00     42.0
2022-02-26 05:00:00     45.4
2022-02-26 05:30:00     50.0
2022-02-26 06:00:00     54.8
2022-02-26 06:30:00     60.8
2022-02-26 07:00:00     68.8
2022-02-26 07:30:00     74.2
2022-02-26 08:00:00     84.6
2022-02-26 08:30:00     97.0
2022-02-26 09:00:00    100.4
2022-02-26 09:30:00    101.6
2022-02-26 10:00:00    102.4
2022-02-26 10:30:00    108.2
2022-02-26 11:00:00    117.8
2022-02-26 11:30:00    119.8
2022-02-26 12:00:00    120.2
2022-02-26 12:30:00    125.2
2022-02-26 13:00:00    130.0
2022-02-26 13:30:00    133.0
2022-02-26 14:00:00    133.2
2022-02-26 14:30:00    134.8
2022-02-26 15:00:00    138.4
2022-02-26 15:30:00    142.2
2022-02-26 16:00:00    147.2
2022-02-26 16:30:00    153.8
2022-02-2

In [48]:
# perth

dar_ds = xr.open_dataset(tester_stations_inst[4])
dar_rain = dar_ds["prcp"]
dar_df = dar_rain.to_pandas()
dar_df.index = pd.to_datetime(dar_df.index)
dar_ds.close()

day_data = dar_df.loc["2010-06-23"]

day_data

time
2010-06-23 00:00:00    19.6
2010-06-23 00:30:00    19.6
2010-06-23 01:00:00    19.6
2010-06-23 01:30:00     0.0
2010-06-23 02:00:00     0.0
2010-06-23 02:30:00     0.6
2010-06-23 03:00:00     0.6
2010-06-23 03:30:00     0.6
2010-06-23 04:00:00     0.6
2010-06-23 04:30:00     1.0
2010-06-23 05:00:00     1.0
2010-06-23 05:30:00     1.0
2010-06-23 06:00:00     1.0
2010-06-23 06:30:00     3.2
2010-06-23 07:00:00     3.2
2010-06-23 07:30:00     3.2
2010-06-23 08:00:00     3.2
2010-06-23 08:30:00     3.2
2010-06-23 09:00:00     3.8
2010-06-23 09:30:00     3.8
2010-06-23 10:00:00     3.8
2010-06-23 10:30:00     3.8
2010-06-23 11:00:00     3.8
2010-06-23 11:30:00     3.8
2010-06-23 12:00:00     3.8
2010-06-23 12:30:00     3.8
2010-06-23 13:00:00     3.8
2010-06-23 13:30:00     3.8
2010-06-23 14:00:00     3.8
2010-06-23 14:30:00     3.8
2010-06-23 15:00:00     3.8
2010-06-23 15:30:00     3.8
2010-06-23 16:00:00     3.8
2010-06-23 16:30:00     3.8
2010-06-23 17:00:00     3.8
2010-06-23 17:3

In [49]:
# ID: 094029	Name: HOBART (ELLERSLIE ROAD)

dar_ds = xr.open_dataset(tester_stations_inst[5])
dar_rain = dar_ds["prcp"]
dar_df = dar_rain.to_pandas()
dar_df.index = pd.to_datetime(dar_df.index)
dar_ds.close()

day_data = dar_df.loc["2010-08-11"]

day_data

time
2010-08-11 00:00:00     5.8
2010-08-11 00:30:00     7.8
2010-08-11 01:00:00     NaN
2010-08-11 01:30:00    11.4
2010-08-11 02:00:00    11.8
2010-08-11 02:30:00    12.0
2010-08-11 03:00:00    14.0
2010-08-11 03:30:00    15.2
2010-08-11 04:00:00    15.8
2010-08-11 04:30:00    16.4
2010-08-11 05:00:00    17.8
2010-08-11 05:30:00    20.2
2010-08-11 06:00:00    21.6
2010-08-11 06:30:00    23.4
2010-08-11 07:00:00    25.4
2010-08-11 07:30:00    27.6
2010-08-11 08:00:00    30.0
2010-08-11 08:30:00    32.4
2010-08-11 09:00:00    35.2
2010-08-11 09:30:00    39.2
2010-08-11 10:00:00    43.0
2010-08-11 10:30:00    44.6
2010-08-11 11:00:00    46.0
2010-08-11 11:30:00    47.0
2010-08-11 12:00:00    47.6
2010-08-11 12:30:00    48.0
2010-08-11 13:00:00     NaN
2010-08-11 13:30:00    49.2
2010-08-11 14:00:00    50.6
2010-08-11 14:30:00    53.0
2010-08-11 15:00:00    54.2
2010-08-11 15:30:00    55.4
2010-08-11 16:00:00    56.8
2010-08-11 16:30:00    57.4
2010-08-11 17:00:00    58.0
2010-08-11 17:3