In [1]:
import iris
import os
import glob as glob
import datetime as datetime
import iris.coord_categorisation as cat
import sys
import iris.plot as iplt

# Set up path to root directory
root_fp = "/nfs/a319/gy17m2a/PhD/"
os.chdir(root_fp)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)


# Create path to files containing functions
sys.path.insert(0, root_fp + 'Scripts/GlobalFunctions')
from Spatial_plotting_functions import *
from Spatial_geometry_functions import *


# ### Load necessary spatial data
# This is a square area surrounding Leeds
leeds_at_centre_gdf = create_leeds_at_centre_outline({'init' :'epsg:3857'})


# ### Establish the corresponding ensemble member numbers
em_matching_dict = {'01':'bc005', '04': 'bc006', '05': 'bc007', '06':'bc009',  '07':'bc010', 
                    '08': 'bc011', '09':'bc013', '10': 'bc015', '11': 'bc016', '12': 'bc017', '13':'bc018', '15':'bc012'}

resolution = '2.2km'
yrs_range = "2002_2020"

In [2]:
em_1hr = '05'
em_30mins = em_matching_dict[em_1hr]
month_num = '07'

In [None]:
for yr in range(2002,2021):
    for month_num in ['06', '07', '08']:
        print(yr, month_num)
        ####################################################### 
        #######################################################
        ## Get one month of data - HOURLY
        ####################################################### 
        #######################################################
        ### Get a list of filenames for hourly data
        general_filename_1hr = f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_hourly/2.2km/{em_1hr}/{yrs_range}/pr_rcp85_land-cpm_uk_2.2km_{em_1hr}_1hr_{yr}{month_num}*'
        filenames_1hr = []
        for filename in glob.glob(general_filename_1hr):
                filenames_1hr.append(filename)

        # If don't find any files matching this string in the 2001_2020 folder, then check the 1980_2001
        if len(filenames_1hr) == 0:
            general_filename_1hr = f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_hourly/2.2km/{em_1hr}/1980_2001/pr_rcp85_land-cpm_uk_2.2km_{em_1hr}_1hr_{yr}{month_num}*'
            for filename in glob.glob(general_filename_1hr):
                    filenames_1hr.append(filename)

                
        # ### Load in the data and remove the ensemble member dimension
        monthly_cubes_list_1hr = iris.load(filenames_1hr)
        cube_1hr = monthly_cubes_list_1hr[0]
        cube_1hr = cube_1hr[0,:,:,:]

        #######################################################
        #######################################################
        ## Get one month of data - 30mins
        #######################################################
        #######################################################
        # ### Get all files for this ensemble member
        general_filename_30mins = f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_first30mins/{yrs_range}/{em_30mins}/{em_30mins}a.pr{yr}{month_num}*'
        filenames_first30mins = []
        for filename_30mins in glob.glob(general_filename_30mins):
            filenames_first30mins.append(filename_30mins)
        filenames_first30mins.sort()

        # ### Load in the data 
        monthly_cubes_list_30mins = iris.load(filenames_first30mins)

        # Equalise
        for cube in monthly_cubes_list_30mins:
            for attr in ['forecast_period', 'forecast_reference_time']:
                if attr in cube.attributes:
                    del cube.attributes[attr]


        monthly_cube_30mins = monthly_cubes_list_30mins.concatenate_cube()      


        # ### Trim to be the same shape as the hourly data
        monthly_cube_30mins_1st = monthly_cube_30mins[:,24:-24,24:-24]

        # ### Convert units of 30 mins data
        # Check current units
        # print(monthly_cube_30mins_1st.units)
        # Set the units to those of the 1 hr cube
        monthly_cube_30mins_1st.units = cube_1hr.units
        # print(monthly_cube_30mins_1st.units)

        # Convert the data to also be this unit
        monthly_cube_30mins_1st_data = monthly_cube_30mins_1st.data
        monthly_cube_30mins_1st_data = monthly_cube_30mins_1st_data*3600

        monthly_cube_30mins_1st.data = monthly_cube_30mins_1st_data


        #######################################################
        #######################################################
        ## Find the second half of the hour, using the first half of hour and hourly values
        #######################################################
        #######################################################
        # get the hourly data
        cube_1hr_data = cube_1hr.data
        # calculate value for second half of hour
        second_half_of_the_hour_mean_hourly_rainfall_rate_data = (2.0 *cube_1hr_data)-monthly_cube_30mins_1st_data
        # Create a new cube for the second half of the hour (start by copying the first half of hour cube)
        monthly_cube_30mins_2nd = monthly_cube_30mins_1st.copy()
        # Set values as calculated
        monthly_cube_30mins_2nd.data = second_half_of_the_hour_mean_hourly_rainfall_rate_data


        # ### Edit the times to be 30 mins later
        # get the times from the first half hour
        first_half_hour_times = monthly_cube_30mins_1st.coord('time').copy()
        # add 30 mins
        second_half_hour_times = first_half_hour_times + 0.5
        # for the second hald hour cube, remove the time dimension and then re-add the edited one
        monthly_cube_30mins_2nd.remove_coord('time')
        monthly_cube_30mins_2nd.add_dim_coord(second_half_hour_times, 0)


        print(f"minimum 1st half hour is {np.nanmin(monthly_cube_30mins_1st.data):.2f}")
        print(f"minimum whole hour is {np.nanmin(cube_1hr.data):.2f}")
        print(f"minimum 2nd half hour is {np.nanmin(monthly_cube_30mins_2nd.data):.2f}")

        print(f"mean 1st half hour is {np.nanmean(monthly_cube_30mins_1st.data):.5f}")
        print(f"mean whole hour is {np.nanmean(cube_1hr.data):.5f}")
        print(f"mean 2nd half hour is {np.nanmean(monthly_cube_30mins_2nd.data):.5f}")

        print(f"max 1st half hour is {np.nanmax(monthly_cube_30mins_1st.data):.2f}")
        print(f"max whole hour is {np.nanmax(cube_1hr.data):.2f}")
        print(f"max 2nd half hour is {np.nanmax(monthly_cube_30mins_2nd.data):.2f}")

        cube_1hr_flat = cube_1hr.data.data.flatten()
        flat = second_half_of_the_hour_mean_hourly_rainfall_rate_data.data.flatten()
        df = pd.DataFrame({'hr':cube_1hr_flat, '1st_half':monthly_cube_30mins_1st_data.flatten(), '2nd_half': flat})
        sorted_df = df.sort_values(by='hr')

        # Find instances where when there is no rain in the hourly data, there is in the 30 mins
        hour_0 = df[df['hr'] == 0]
        print(f"DF of rows with a val in 1st half hour, but not in full hour {hour_0[hour_0['1st_half']>0.001]}")


In [9]:
month_num = '06'
yr = 2020

####################################################### 
#######################################################
## Get one month of data - HOURLY
####################################################### 
#######################################################
### Get a list of filenames for hourly data
general_filename_1hr = f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_hourly/2.2km/{em_1hr}/{yrs_range}/pr_rcp85_land-cpm_uk_2.2km_{em_1hr}_1hr_{yr}{month_num}*'
filenames_1hr = []
for filename in glob.glob(general_filename_1hr):
        filenames_1hr.append(filename)

# If don't find any files matching this string in the 2001_2020 folder, then check the 1980_2001
if len(filenames_1hr) == 0:
    general_filename_1hr = f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_hourly/2.2km/{em_1hr}/1980_2001/pr_rcp85_land-cpm_uk_2.2km_{em_1hr}_1hr_{yr}{month_num}*'
    for filename in glob.glob(general_filename_1hr):
            filenames_1hr.append(filename)


# ### Load in the data and remove the ensemble member dimension
monthly_cubes_list_1hr = iris.load(filenames_1hr)
cube_1hr = monthly_cubes_list_1hr[0]
cube_1hr = cube_1hr[0,:,:,:]

#######################################################
#######################################################
## Get one month of data - 30mins
#######################################################
#######################################################
# ### Get all files for this ensemble member
general_filename_30mins = f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_first30mins/{yrs_range}/{em_30mins}/{em_30mins}a.pr{yr}{month_num}*'
filenames_first30mins = []
for filename_30mins in glob.glob(general_filename_30mins):
    filenames_first30mins.append(filename_30mins)
filenames_first30mins.sort()

# ### Load in the data 
monthly_cubes_list_30mins = iris.load(filenames_first30mins)

# Equalise
for cube in monthly_cubes_list_30mins:
    for attr in ['forecast_period', 'forecast_reference_time']:
        if attr in cube.attributes:
            del cube.attributes[attr]


monthly_cube_30mins = monthly_cubes_list_30mins.concatenate_cube()      


# ### Trim to be the same shape as the hourly data
monthly_cube_30mins_1st = monthly_cube_30mins[:,24:-24,24:-24]

# ### Convert units of 30 mins data
# Check current units
# print(monthly_cube_30mins_1st.units)
# Set the units to those of the 1 hr cube
monthly_cube_30mins_1st.units = cube_1hr.units
# print(monthly_cube_30mins_1st.units)

# Convert the data to also be this unit
monthly_cube_30mins_1st_data = monthly_cube_30mins_1st.data
monthly_cube_30mins_1st_data = monthly_cube_30mins_1st_data*3600

monthly_cube_30mins_1st.data = monthly_cube_30mins_1st_data


#######################################################
#######################################################
## Find the second half of the hour, using the first half of hour and hourly values
#######################################################
#######################################################
# get the hourly data
cube_1hr_data = cube_1hr.data
# calculate value for second half of hour
second_half_of_the_hour_mean_hourly_rainfall_rate_data = (2.0 *cube_1hr_data)-monthly_cube_30mins_1st_data
# Create a new cube for the second half of the hour (start by copying the first half of hour cube)
monthly_cube_30mins_2nd = monthly_cube_30mins_1st.copy()
# Set values as calculated
monthly_cube_30mins_2nd.data = second_half_of_the_hour_mean_hourly_rainfall_rate_data


# ### Edit the times to be 30 mins later
# get the times from the first half hour
first_half_hour_times = monthly_cube_30mins_1st.coord('time').copy()
# add 30 mins
second_half_hour_times = first_half_hour_times + 0.5
# for the second hald hour cube, remove the time dimension and then re-add the edited one
monthly_cube_30mins_2nd.remove_coord('time')
monthly_cube_30mins_2nd.add_dim_coord(second_half_hour_times, 0)


print(f"minimum 1st half hour is {np.nanmin(monthly_cube_30mins_1st.data):.2f}")
print(f"minimum whole hour is {np.nanmin(cube_1hr.data):.2f}")
print(f"minimum 2nd half hour is {np.nanmin(monthly_cube_30mins_2nd.data):.2f}")

print(f"mean 1st half hour is {np.nanmean(monthly_cube_30mins_1st.data):.5f}")
print(f"mean whole hour is {np.nanmean(cube_1hr.data):.5f}")
print(f"mean 2nd half hour is {np.nanmean(monthly_cube_30mins_2nd.data):.5f}")

print(f"max 1st half hour is {np.nanmax(monthly_cube_30mins_1st.data):.2f}")
print(f"max whole hour is {np.nanmax(cube_1hr.data):.2f}")
print(f"max 2nd half hour is {np.nanmax(monthly_cube_30mins_2nd.data):.2f}")

# cube_1hr_flat = cube_1hr.data.data.flatten()
# flat = second_half_of_the_hour_mean_hourly_rainfall_rate_data.data.flatten()
# df = pd.DataFrame({'hr':cube_1hr_flat, '1st_half':monthly_cube_30mins_1st_data.flatten(), '2nd_half': flat})
# sorted_df = df.sort_values(by='hr')

# # Find instances where when there is no rain in the hourly data, there is in the 30 mins
# hour_0 = df[df['hr'] == 0]
# print(f"DF of rows with a val in 1st half hour, but not in full hour {hour_0[hour_0['1st_half']>0.001]}")


minimum 1st half hour is 0.00
minimum whole hour is 0.00
minimum 2nd half hour is -181.00
mean 1st half hour is 0.08368
mean whole hour is 0.13337
mean 2nd half hour is 0.18306
max 1st half hour is 181.00
max whole hour is 74.68
max 2nd half hour is 149.35


In [8]:
month_num = '07'
yr = 2020

####################################################### 
#######################################################
## Get one month of data - HOURLY
####################################################### 
#######################################################
### Get a list of filenames for hourly data
general_filename_1hr = f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_hourly/2.2km/{em_1hr}/{yrs_range}/pr_rcp85_land-cpm_uk_2.2km_{em_1hr}_1hr_{yr}{month_num}*'
filenames_1hr = []
for filename in glob.glob(general_filename_1hr):
        filenames_1hr.append(filename)

# If don't find any files matching this string in the 2001_2020 folder, then check the 1980_2001
if len(filenames_1hr) == 0:
    general_filename_1hr = f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_hourly/2.2km/{em_1hr}/1980_2001/pr_rcp85_land-cpm_uk_2.2km_{em_1hr}_1hr_{yr}{month_num}*'
    for filename in glob.glob(general_filename_1hr):
            filenames_1hr.append(filename)


# ### Load in the data and remove the ensemble member dimension
monthly_cubes_list_1hr = iris.load(filenames_1hr)
cube_1hr = monthly_cubes_list_1hr[0]
cube_1hr = cube_1hr[0,:,:,:]

#######################################################
#######################################################
## Get one month of data - 30mins
#######################################################
#######################################################
# ### Get all files for this ensemble member
general_filename_30mins = f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_first30mins/{yrs_range}/{em_30mins}/{em_30mins}a.pr{yr}{month_num}*'
filenames_first30mins = []
for filename_30mins in glob.glob(general_filename_30mins):
    filenames_first30mins.append(filename_30mins)
filenames_first30mins.sort()

# ### Load in the data 
monthly_cubes_list_30mins = iris.load(filenames_first30mins)

# Equalise
for cube in monthly_cubes_list_30mins:
    for attr in ['forecast_period', 'forecast_reference_time']:
        if attr in cube.attributes:
            del cube.attributes[attr]


monthly_cube_30mins = monthly_cubes_list_30mins.concatenate_cube()      


# ### Trim to be the same shape as the hourly data
monthly_cube_30mins_1st = monthly_cube_30mins[:,24:-24,24:-24]

# ### Convert units of 30 mins data
# Check current units
# print(monthly_cube_30mins_1st.units)
# Set the units to those of the 1 hr cube
monthly_cube_30mins_1st.units = cube_1hr.units
# print(monthly_cube_30mins_1st.units)

# Convert the data to also be this unit
monthly_cube_30mins_1st_data = monthly_cube_30mins_1st.data
monthly_cube_30mins_1st_data = monthly_cube_30mins_1st_data*3600

monthly_cube_30mins_1st.data = monthly_cube_30mins_1st_data


#######################################################
#######################################################
## Find the second half of the hour, using the first half of hour and hourly values
#######################################################
#######################################################
# get the hourly data
cube_1hr_data = cube_1hr.data
# calculate value for second half of hour
second_half_of_the_hour_mean_hourly_rainfall_rate_data = (2.0 *cube_1hr_data)-monthly_cube_30mins_1st_data
# Create a new cube for the second half of the hour (start by copying the first half of hour cube)
monthly_cube_30mins_2nd = monthly_cube_30mins_1st.copy()
# Set values as calculated
monthly_cube_30mins_2nd.data = second_half_of_the_hour_mean_hourly_rainfall_rate_data


# ### Edit the times to be 30 mins later
# get the times from the first half hour
first_half_hour_times = monthly_cube_30mins_1st.coord('time').copy()
# add 30 mins
second_half_hour_times = first_half_hour_times + 0.5
# for the second hald hour cube, remove the time dimension and then re-add the edited one
monthly_cube_30mins_2nd.remove_coord('time')
monthly_cube_30mins_2nd.add_dim_coord(second_half_hour_times, 0)


print(f"minimum 1st half hour is {np.nanmin(monthly_cube_30mins_1st.data):.2f}")
print(f"minimum whole hour is {np.nanmin(cube_1hr.data):.2f}")
print(f"minimum 2nd half hour is {np.nanmin(monthly_cube_30mins_2nd.data):.2f}")

print(f"mean 1st half hour is {np.nanmean(monthly_cube_30mins_1st.data):.5f}")
print(f"mean whole hour is {np.nanmean(cube_1hr.data):.5f}")
print(f"mean 2nd half hour is {np.nanmean(monthly_cube_30mins_2nd.data):.5f}")

print(f"max 1st half hour is {np.nanmax(monthly_cube_30mins_1st.data):.2f}")
print(f"max whole hour is {np.nanmax(cube_1hr.data):.2f}")
print(f"max 2nd half hour is {np.nanmax(monthly_cube_30mins_2nd.data):.2f}")

# cube_1hr_flat = cube_1hr.data.data.flatten()
# flat = second_half_of_the_hour_mean_hourly_rainfall_rate_data.data.flatten()
# df = pd.DataFrame({'hr':cube_1hr_flat, '1st_half':monthly_cube_30mins_1st_data.flatten(), '2nd_half': flat})
# sorted_df = df.sort_values(by='hr')

# # Find instances where when there is no rain in the hourly data, there is in the 30 mins
# hour_0 = df[df['hr'] == 0]
# print(f"DF of rows with a val in 1st half hour, but not in full hour {hour_0[hour_0['1st_half']>0.001]}")


minimum 1st half hour is 0.00
minimum whole hour is 0.00
minimum 2nd half hour is -117.00
mean 1st half hour is 0.10422
mean whole hour is 0.13337
mean 2nd half hour is 0.16252
max 1st half hour is 117.10
max whole hour is 74.68
max 2nd half hour is 149.35
