## Importing packages

In [1]:
import os
import numpy as np
import pandas as pd
import iris
import iris.coord_categorisation
import iris.analysis
import matplotlib.pyplot as plt
from iris.cube import CubeList

In [2]:
# Useful functions

def add_season_year_dim(cube, seasons=None):
    '''A function that returns a cube with the season_year attached as a
    dimension

    Input args:
    ----------
    cube  -- the cube to add the season_year to
    '''
    try:
        cube.remove_coord('season_year')
    except:
        pass
    if seasons:
        iris.coord_categorisation.add_season_year(cube, 'time', name='season_year',
                                        seasons=seasons)
    else:
        iris.coord_categorisation.add_season_year(cube, 'time', name='season_year',
                                        seasons=('djf','mam','jja','son')) # Define if different seasons are needed
    return cube

def add_season_dim(cube):
    '''A function that returns a cube with the season attached as a
    dimension

    Input args:
    ----------
    cube  -- the cube to add the season to
    '''
    try:
        cube.remove_coord('season')
    except:
        pass
    iris.coord_categorisation.add_season(cube, 'time', name='season',
                                         seasons=('djf','mam','jja','son'))
    return cube


def add_day_of_month_dim(cube):
    '''A function that returns a cube with the day of the month attached as a
    dimension

    Input args:
    ----------
    cube  -- the cube to add the day of the month to
    '''
    try:
        cube.remove_coord('day_of_month')
    except:
        pass
    iris.coord_categorisation.add_day_of_month(cube, 'time', name='day_of_month')
    return cube

def add_day_of_year_dim(cube):
    '''A function that returns a cube with the day of the year attached as a
    dimension

    Input args:
    ----------
    cube  -- the cube to add the day of the year to
    '''
    try:
        cube.remove_coord('day_of_year')
    except:
        pass
    iris.coord_categorisation.add_day_of_year(cube, 'time', name='day_of_year')
    return cube

def add_hour_dim(cube):
    '''A function that returns a cube with the hour attached as a
    dimension

    Input args:
    ----------
    cube  -- the cube to add the hour to
    '''
    try:
        cube.remove_coord('hour')
    except:
        pass
    iris.coord_categorisation.add_hour(cube, 'time', name='hour')
    return cube

In [3]:
def load_my_files():
    heat_indeces_cubelist = CubeList([])
    
    filepath = f'{os.getcwd()}/heat_index_cubes_nc_files/{location_name}_heat_index'
    
    for i in range(1, 22):
        heat_indeces_cubelist.append(iris.load_cube(f'{filepath}_{i}.nc'))
        print(f'Loaded Heat Index {i} cube')

    for cube in heat_indeces_cubelist:
        iris.coord_categorisation.add_day_of_year(cube, 'time') # Note: not added to basic cubes below 
        cube.convert_units('Kelvin')
    
    tas_cube = iris.load_cube(f'{os.getcwd()}/{location_name}_tas_hourly_{start_year}-{end_year}.nc')
    print('Loaded tas cube')
    rh_cube = iris.load_cube(f'{os.getcwd()}/{location_name}_rh_hourly_{start_year}-{end_year}.nc')
    print('Loaded rh cube')
    td_cube = iris.load_cube(f'{os.getcwd()}/{location_name}_td_hourly_{start_year}-{end_year}.nc')
    print('Loaded td cube')
    es_cube = iris.load_cube(f'{os.getcwd()}/{location_name}_es_hourly_{start_year}-{end_year}.nc')
    print('Loaded es cube')

    # heat_indeces_cubelist.sort(key=lambda x: int(''.join(filter(str.isdigit, x.name()))))
    
    return heat_indeces_cubelist, tas_cube, rh_cube, td_cube, es_cube

In [4]:
def add_dimensions_to_my_cubes():
    for cube in heat_indeces_cubelist, tas_cube, rh_cube, td_cube, es_cube:
        iris.coord_categorisation.add_day_of_year(cube, 'time') # Note: not added to basic cubes below

        ##TODO: add anything else I want from cube_creation_backend that I removed for efficiency


In [5]:
def create_yearly_aggregates():
    global tas_cube_yearly_mean, rh_cube_yearly_mean, td_cube_yearly_mean, es_cube_yearly_mean, tas_cube_yearly_min, rh_cube_yearly_min, td_cube_yearly_min, es_cube_yearly_min, tas_cube_yearly_max, rh_cube_yearly_max, td_cube_yearly_max, es_cube_yearly_max
    
    tas_cube_yearly_mean = tas_cube.aggregated_by('season_year', iris.analysis.MEAN)
    tas_cube_yearly_mean = tas_cube_yearly_mean.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    rh_cube_yearly_mean = rh_cube.aggregated_by('season_year', iris.analysis.MEAN)
    rh_cube_yearly_mean = rh_cube_yearly_mean.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    td_cube_yearly_mean = td_cube.aggregated_by('season_year', iris.analysis.MEAN)
    td_cube_yearly_mean = td_cube_yearly_mean.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    es_cube_yearly_mean = es_cube.aggregated_by('season_year', iris.analysis.MEAN)
    es_cube_yearly_mean = es_cube_yearly_mean.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)

    # Min cubes
    tas_cube_yearly_min = tas_cube.aggregated_by('season_year', iris.analysis.MIN)
    tas_cube_yearly_min = tas_cube_yearly_min.collapsed(['latitude', 'longitude'], iris.analysis.MIN)
    rh_cube_yearly_min = rh_cube.aggregated_by('season_year', iris.analysis.MIN)
    rh_cube_yearly_min = rh_cube_yearly_min.collapsed(['latitude', 'longitude'], iris.analysis.MIN)
    td_cube_yearly_min = td_cube.aggregated_by('season_year', iris.analysis.MIN)
    td_cube_yearly_min = td_cube_yearly_min.collapsed(['latitude', 'longitude'], iris.analysis.MIN)
    es_cube_yearly_min = es_cube.aggregated_by('season_year', iris.analysis.MIN)
    es_cube_yearly_min = es_cube_yearly_min.collapsed(['latitude', 'longitude'], iris.analysis.MIN)

    # Max cubes
    tas_cube_yearly_max = tas_cube.aggregated_by('season_year', iris.analysis.MAX)
    tas_cube_yearly_max = tas_cube_yearly_max.collapsed(['latitude', 'longitude'], iris.analysis.MAX)
    rh_cube_yearly_max = rh_cube.aggregated_by('season_year', iris.analysis.MAX)
    rh_cube_yearly_max = rh_cube_yearly_max.collapsed(['latitude', 'longitude'], iris.analysis.MAX)
    td_cube_yearly_max = td_cube.aggregated_by('season_year', iris.analysis.MAX)
    td_cube_yearly_max = td_cube_yearly_max.collapsed(['latitude', 'longitude'], iris.analysis.MAX)
    es_cube_yearly_max = es_cube.aggregated_by('season_year', iris.analysis.MAX)
    es_cube_yearly_max = es_cube_yearly_max.collapsed(['latitude', 'longitude'], iris.analysis.MAX)

def create_overall_aggregates():
    global tas_cube_overall_mean, rh_cube_overall_mean, td_cube_overall_mean, es_cube_overall_mean, tas_cube_overall_min, rh_cube_overall_min, td_cube_overall_min, es_cube_overall_min, tas_cube_overall_max, rh_cube_overall_max, td_cube_overall_max, es_cube_overall_max    
# Calculating the OVERALL () mean, min, and max for each variable
    # Mean cubes
    tas_cube_overall_mean = tas_cube.aggregated_by('season_year', iris.analysis.MEAN)
    tas_cube_overall_mean = tas_cube_overall_mean.collapsed('time', iris.analysis.MEAN)
    rh_cube_overall_mean = rh_cube.aggregated_by('season_year', iris.analysis.MEAN)
    rh_cube_overall_mean = rh_cube_overall_mean.collapsed('time', iris.analysis.MEAN)
    td_cube_overall_mean = td_cube.aggregated_by('season_year', iris.analysis.MEAN)
    td_cube_overall_mean = td_cube_overall_mean.collapsed('time', iris.analysis.MEAN)
    es_cube_overall_mean = es_cube.aggregated_by('season_year', iris.analysis.MEAN)
    es_cube_overall_mean = es_cube_overall_mean.collapsed('time', iris.analysis.MEAN)

    # Min cubes
    tas_cube_overall_min = tas_cube.aggregated_by('season_year', iris.analysis.MIN)
    tas_cube_overall_min = tas_cube_overall_min.collapsed('time', iris.analysis.MIN)
    rh_cube_overall_min = rh_cube.aggregated_by('season_year', iris.analysis.MIN)
    rh_cube_overall_min = rh_cube_overall_min.collapsed('time', iris.analysis.MIN)
    td_cube_overall_min = td_cube.aggregated_by('season_year', iris.analysis.MIN)
    td_cube_overall_min = td_cube_overall_min.collapsed('time', iris.analysis.MIN)
    es_cube_overall_min = es_cube.aggregated_by('season_year', iris.analysis.MIN)
    es_cube_overall_min = es_cube_overall_min.collapsed('time', iris.analysis.MIN)

    # Max cubes
    tas_cube_overall_max = tas_cube.aggregated_by('season_year', iris.analysis.MAX)
    tas_cube_overall_max = tas_cube_overall_max.collapsed('time', iris.analysis.MAX)
    rh_cube_overall_max = rh_cube.aggregated_by('season_year', iris.analysis.MAX)
    rh_cube_overall_max = rh_cube_overall_max.collapsed('time', iris.analysis.MAX)
    td_cube_overall_max = td_cube.aggregated_by('season_year', iris.analysis.MAX)
    td_cube_overall_max = td_cube_overall_max.collapsed('time', iris.analysis.MAX)
    es_cube_overall_max = es_cube.aggregated_by('season_year', iris.analysis.MAX)
    es_cube_overall_max = es_cube_overall_max.collapsed('time', iris.analysis.MAX)

In [6]:
# Heat index cubes over time
def collapse_cubes_for_time_series():
    global time_series_cubelist
    """
    Collapses the cubes in the cubelist along the time dimension.
    
    Parameters
    ----------
    cubelist :
        An instance of `iris.cube.CubeList` containing the heat index cubes
    """
    time_series_cubelist = CubeList([])
    for cube in heat_indeces_cubelist:
        cube = cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
        time_series_cubelist.append(cube)
    return time_series_cubelist

# Heat index cubes over space
def collapse_cubes_for_maps():
    global maps_cubelist
    """
    Collapses the cubes in the cubelist along the time dimension.
    
    Parameters
    ----------
    cubelist :
        An instance of `iris.cube.CubeList` containing the heat index cubes
    """
    maps_cubelist = []
    for cube in heat_indeces_cubelist:
        cube = cube.aggregated_by('season_year', iris.analysis.MEAN)
        cube = cube.collapsed('time', iris.analysis.MEAN)
        maps_cubelist.append(cube)
    return maps_cubelist

In [7]:
def collapse_air_temperature_for_time_series():
    global tas_time_series_cube
    tas_time_series_cube = tas_cube.aggregated_by('season_year', iris.analysis.MEAN)
    tas_time_series_cube = tas_time_series_cube.collapsed(['latitude','longitude'], iris.analysis.MEAN)
    return tas_time_series_cube

def collapse_air_temperature_for_maps():
    global tas_map_cube
    tas_map_cube = tas_cube.collapsed('time', iris.analysis.MEAN)
    return tas_map_cube

# Basic plots

In [8]:
def plot_my_region():
    fig, ax = plt.subplots()
    shapefile.plot(ax=ax)
    plt.show()

In [9]:
def calculate_mean_tas_per_month():
    mean_tas_over_space = tas_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    mean_tas_per_month = mean_tas_over_space.aggregated_by('month', iris.analysis.MEAN)
    return mean_tas_per_month

def calculate_mean_rh_per_month():
    mean_rh_over_space = rh_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    mean_rh_per_month = mean_rh_over_space.aggregated_by('month', iris.analysis.MEAN)
    return mean_rh_per_month

def plot_monthly_average_tas_and_rh():
    mean_tas_per_month = calculate_mean_tas_per_month()
    mean_rh_per_month = calculate_mean_rh_per_month()

    # Plot the average temperature per month
    fig, ax = plt.subplots()
    
    plt.plot(mean_tas_per_month.coord('month').points, mean_tas_per_month.data, label='tas', color='red', marker='o')

    ax.set_xlabel('Month')
    ax.set_ylabel('Temperature (˚C)')

    plt.title(f'Average Temperature per Month ({start_year}-{end_year})')
    plt.savefig(f'{unprocessed_output_folder}/{location_name}_average_temperature_per_month.png')
    plt.show()

    # Plot the average humidity per month

    fig, ax = plt.subplots()

    plt.plot(mean_rh_per_month.coord('month').points, mean_rh_per_month.data, label='rh', color='blue', marker='o')
    ax.set_xlabel('Month')
    ax.set_ylabel('Relative Humidity (%)')
    plt.title(f'Average Relative Humidity per Month ({start_year}-{end_year})')
    plt.savefig(f'{unprocessed_output_folder}/{location_name}_average_relative_humidity_per_month.png')
    plt.show()

    # Plot the average temperature and humidity per month
    fig, ax_temp = plt.subplots()
    ax_temp.plot(mean_tas_per_month.coord('month').points, mean_tas_per_month.data, label='Temperature', color='red', marker='o')

    ## Set labels for primary axis
    ax_temp.set_xlabel('Month')
    ax_temp.set_ylabel('Temperature (˚C)', color='red')
    ax_temp.tick_params(axis='y', labelcolor='red')

    ## Create secondary axis for humidity
    ax_hum = ax_temp.twinx()
    ax_hum.plot(mean_rh_per_month.coord('month').points, mean_rh_per_month.data, label='Humidity', color='blue', marker='o')

    ## Set labels for secondary axis
    ax_hum.set_ylabel('Relative Humidity (%)', color='blue')
    ax_hum.tick_params(axis='y', labelcolor='blue')

    ## Title
    plt.title(f'Average Temperature and Relative Humidity per Month ({start_year}-{end_year})')

    ## Save and show plot
    plt.savefig(f'{unprocessed_output_folder}/{location_name}_average_temperature_and_relative_humidity_per_month.png')
    plt.show()

In [10]:
# calculate_avg_tas_per_month_per_year()

In [11]:
# create_average_temperature_per_month_per_year_plots()

In [12]:
## TODO: Plot using months 

In [13]:
# Check the mean average heat index per hour for the DJF season across space and time
def plot_hourly_average_tas_and_rh():

    # Create a dataframe of the mean temperature and humidity per hour to plot
    mean_tas_per_hour = calculate_mean_tas_per_hour()
    mean_rh_per_hour = calculate_mean_rh_per_hour()

    mean_values_per_hour_dict = {'hour': mean_tas_per_hour.coord('hour').points,
                                'mean_tas': mean_tas_per_hour.data,
                                'mean_rh': mean_rh_per_hour.data}

    mean_values_per_hour_df = pd.DataFrame(mean_values_per_hour_dict)
    mean_values_per_hour_df = mean_values_per_hour_df.sort_values(by='hour').reset_index(drop=True)
    mean_values_per_hour_df

    fig = plt.figure(figsize=[10, 5])

    ## Plot of the average temperature per hour

    fig, ax = plt.subplots()
    plt.plot(mean_values_per_hour_df['hour'], mean_values_per_hour_df['mean_tas'], label='tas', color='red', marker='o')

    ax.set_xlabel('Hour')
    ax.set_ylabel('Temperature (˚C)')
    ax.set_xticks(np.arange(0, 24, 1))
    ax.set_xlim(0, 23)
    plt.title(f'Average Temperature per Hour ({start_year}-{end_year})')
    plt.savefig(f'{unprocessed_output_folder}/{location_name}_average_temperature_per_hour.png')
    plt.show()

    # Plots
    ## Plot of the average relative humidity per hour
    fig = plt.figure(figsize=[10, 5])

    fig, ax = plt.subplots()
    plt.plot(mean_values_per_hour_df['hour'], mean_values_per_hour_df['mean_rh'], label='rh', color='blue', marker='o')

    ax.set_xlabel('Hour')
    ax.set_ylabel('Relative Humidity (%)')
    ax.set_xticks(np.arange(0, 24, 1))
    ax.set_xlim(0, 23)
    plt.title(f'Average Relative Humidity per Hour ({start_year}-{end_year})')
    plt.savefig(f'{unprocessed_output_folder}/{location_name}_average_relative_humidity_per_hour.png')
    plt.show()


    ## Combined plot per hour
    fig, ax_temp = plt.subplots()
    ax_temp.plot(mean_values_per_hour_df['hour'], mean_values_per_hour_df['mean_tas'], label='Temperature', color='red', marker='o')

    ax_temp.set_xticks(np.arange(0, 24, 1))
    ax_temp.set_xlim(0, 23)

    ax_temp.set_xlabel('Hour')
    ### Set labels for primary axis
    ax_temp.set_ylabel('Temperature (˚C)', color='red')
    ax_temp.tick_params(axis='y', labelcolor='red')

    ### Create secondary axis for humidity
    ax_hum = ax_temp.twinx()
    ax_hum.plot(mean_values_per_hour_df['hour'], mean_values_per_hour_df['mean_rh'],label='Humidity', color='blue', marker='o')

    ### Set labels for secondary axis
    ax_hum.set_ylabel('Humidity (%)', color='blue')
    ax_hum.tick_params(axis='y', labelcolor='blue')

    ### Title
    plt.title(f'Average Temperature and Humidity per Hour ({start_year}-{end_year})')

    ### Save and show plot
    plt.savefig(f'{unprocessed_output_folder}/{location_name}_average_temperature_and_humidity_per_hour.png')
    plt.show()

    return mean_tas_per_hour, mean_rh_per_hour

In [14]:
def calculate_mean_tas_per_month_per_season_year():
    mean_tas_over_space = tas_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    mean_tas_per_month_per_season_year = mean_tas_over_space.aggregated_by(['season_year','month'], iris.analysis.MEAN)
    return mean_tas_per_month_per_season_year

def calculate_mean_rh_per_month_per_season_year():
    mean_rh_over_space = rh_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    mean_rh_per_month_per_season_year = mean_rh_over_space.aggregated_by(['season_year','month'], iris.analysis.MEAN)
    return mean_rh_per_month_per_season_year

# //TODO: Move December ('Dec') to be the first month of the season year plot's x-axis

def create_average_temperature_per_month_per_season_year_plots():
    
    # Ploft the average temperature per month
    mean_tas_per_month_per_season_year = calculate_mean_tas_per_month_per_season_year()

    season_years = mean_tas_per_month_per_season_year.coord('season_year').points
    months = mean_tas_per_month_per_season_year.coord('month').points
    tas_data = mean_tas_per_month_per_season_year.data

    overall_mean_tas_per_month = calculate_mean_tas_per_month()

    # Reordering months to start with December for clearer visualisation of season year

    plt.figure()

    for i, season_year in enumerate(np.unique(season_years)):
        yearly_data = tas_data[season_years == season_year]
        plt.plot(months[season_years == season_year], yearly_data, label=str(season_year), color = 'grey', linestyle='--')
    
    plt.plot(overall_mean_tas_per_month.coord('month').points, overall_mean_tas_per_month.data, label='Overall Mean', color='red', linestyle='-')

    plt.xlabel('Month')
    plt.ylabel('Average Temperature')
    plt.title('Average Temperature per Season Year per Month')
    
    plt.show()

In [15]:
def calculate_mean_temperature_per_hour():
    mean_tas_per_hour = tas_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    mean_tas_per_hour = mean_tas_per_hour.aggregated_by('hour', iris.analysis.MEAN)
    return mean_tas_per_hour

def calculate_mean_rh_per_hour():
    mean_rh_per_hour = rh_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    mean_rh_per_hour = mean_rh_per_hour.aggregated_by('hour', iris.analysis.MEAN)
    return mean_rh_per_hour

def calculate_mean_tas_per_hour():
    mean_tas_per_hour = tas_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    mean_tas_per_hour = mean_tas_per_hour.aggregated_by('hour', iris.analysis.MEAN)
    return mean_tas_per_hour

def mean_tas_per_day_of_month():
    mean_tas_per_day_of_month = tas_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)

    mean_tas_per_day_of_month = mean_tas_per_day_of_month.aggregated_by('day_of_month', iris.analysis.MEAN)
    return mean_tas_per_day_of_month

def plot_mean_tas_per_day_of_month():
    fig, ax = plt.subplots()
    plt.plot(mean_tas_per_day_of_month.coord('day_of_month'))

# Mean tas and rh per hour

In [16]:
# //TODO: Expand to other variables


def calculate_mean_tas_per_month_per_season_year():
    mean_tas_over_space = tas_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    mean_tas_per_month_per_season_year = mean_tas_over_space.aggregated_by(['season_year','month'], iris.analysis.MEAN)
    return mean_tas_per_month_per_season_year

def calculate_mean_rh_per_month_per_season_year():
    mean_rh_over_space = rh_cube.collapsed(['latitude', 'longitude'], iris.analysis.MEAN)
    mean_rh_per_month_per_season_year = mean_rh_over_space.aggregated_by(['season_year','month'], iris.analysis.MEAN)
    return mean_rh_per_month_per_season_year

# //TODO: Move December ('Dec') to be the first month of the season year plot's x-axis

def create_average_temperature_per_month_per_season_year_plots():
    
    # Plot the average temperature per month
    mean_tas_per_month_per_season_year = calculate_mean_tas_per_month_per_season_year()

    season_years = mean_tas_per_month_per_season_year.coord('season_year').points
    months = mean_tas_per_month_per_season_year.coord('month').points
    tas_data = mean_tas_per_month_per_season_year.data

    overall_mean_tas_per_month = calculate_mean_tas_per_month()

    # Reordering months to start with December for clearer visualisation of season year


    plt.figure()

    for i, season_year in enumerate(np.unique(season_years)):
        yearly_data = tas_data[season_years == season_year]
        plt.plot(months[season_years == season_year], yearly_data, label=str(season_year), color = 'grey', linestyle='--')
    
    plt.plot(overall_mean_tas_per_month.coord('month').points, overall_mean_tas_per_month.data, label='Overall Mean', color='red', linestyle='-')

    plt.xlabel('Month')
    plt.ylabel('Average Temperature')
    plt.title('Average Temperature per Season Year per Month')
    
    plt.show()

# Work in progress



#### Yearly summer mean tas, rh, td, and es

In [17]:
# SPATIALLY COLLAPSED CUBES FOR TIME SERIES PLOTS
# # Calculating the yearly mean, min, and max for each variable
# Mean cubes

# Defining a function to both print the statistics and write them to a file for later viewing
def write_summary_statistics():
    def write_summary(statistics, file):
        file.write(statistics + '\n')
    with open('summary_statistics.txt', 'w') as file:
        write_summary(f'Summary statistics for the yearly summer mean temperature at surface: \n'
                                    f'- Maximum: {tas_cube_yearly_mean.data.max()} \n'
                                    f'- Minimum: {tas_cube_yearly_mean.data.min()} \n'
                                    f'- Mean: {tas_cube_yearly_mean.data.mean()} \n'
                                    f'- Standard deviation: {tas_cube_yearly_mean.data.std()} \n'
                                    f'- Variance: {tas_cube_yearly_mean.data.var()} \n', file)

        write_summary(f'Summary statistics for the yearly summer mean relative humidity: \n'
                                    f'- Maximum: {rh_cube_yearly_mean.data.max()} \n'
                                    f'- Minimum: {rh_cube_yearly_mean.data.min()} \n'
                                    f'- Mean: {rh_cube_yearly_mean.data.mean()} \n'
                                    f'- Standard deviation: {rh_cube_yearly_mean.data.std()} \n'
                                    f'- Variance: {rh_cube_yearly_mean.data.var()} \n', file)

        write_summary(f'Summary statistics for the yearly summer mean dew point temperature: \n'
                                    f'- Maximum: {td_cube_yearly_mean.data.max()} \n'
                                    f'- Minimum: {td_cube_yearly_mean.data.min()} \n'
                                    f'- Mean: {td_cube_yearly_mean.data.mean()} \n'
                                    f'- Standard deviation: {td_cube_yearly_mean.data.std()} \n'
                                    f'- Variance: {td_cube_yearly_mean.data.var()} \n', file)

        write_summary(f'Summary statistics for the yearly summer mean water vapour pressure: \n'
                                    f'- Maximum: {es_cube_yearly_mean.data.max()} \n'
                                    f'- Minimum: {es_cube_yearly_mean.data.min()} \n'
                                    f'- Mean: {es_cube_yearly_mean.data.mean()} \n'
                                    f'- Standard deviation: {es_cube_yearly_mean.data.std()} \n'
                                    f'- Variance: {es_cube_yearly_mean.data.var()} \n', file)

In [18]:
# Ensure the output folder exists
# Create a 2x2 grid of subplots
def plot_yearly_average_tas_and_rh():
    slope, intercept = np.polyfit(tas_cube_yearly_mean.coord('season_year').points, tas_cube_yearly_mean.data, 1)

    y_fit = slope * tas_cube_yearly_mean.coord('season_year').points + intercept

    y_fits = []
    for index, cube in enumerate([tas_cube_yearly_mean, rh_cube_yearly_mean, 
                                  #td_cube_yearly_mean, es_cube_yearly_mean
                                ]):
        slope, intercept = np.polyfit(cube.coord('season_year').points, cube.data, 1)
        y_fit = slope * cube.coord('season_year').points + intercept
        y_fits.append(y_fit)

    fig, axs = plt.subplots(2, 1, figsize=(15, 10))
    fig.suptitle('Yearly Summer Mean Analysis')

    # Plot 1: Yearly summer mean temperature at surface
    axs[0].plot(tas_cube_yearly_mean.coord('season_year').points, tas_cube_yearly_mean.data, color='red', linestyle='-', label='Mean')
    axs[0].plot(tas_cube_yearly_min.coord('season_year').points, tas_cube_yearly_min.data, color='grey', linestyle='--')
    axs[0].plot(tas_cube_yearly_max.coord('season_year').points, tas_cube_yearly_max.data, color='grey', linestyle='--')
    axs[0].plot(tas_cube_yearly_mean.coord('season_year').points, y_fits[0], color='black', linestyle='-', label='Overall Trend')
    axs[0].set_title('Temperature at Surface')
    axs[0].set_ylabel('Temperature (°C)')
    axs[0].set_xlabel('Season Year')
    axs[0].plot()

    # Plot 2: Yearly summer mean relative humidity
    axs[1].plot(rh_cube_yearly_mean.coord('season_year').points, rh_cube_yearly_mean.data, color='blue')
    axs[1].plot(rh_cube_yearly_min.coord('season_year').points, rh_cube_yearly_min.data, color='grey', linestyle='--')
    axs[1].plot(rh_cube_yearly_max.coord('season_year').points, rh_cube_yearly_max.data, color='grey', linestyle='--')
    axs[1].plot(tas_cube_yearly_mean.coord('season_year').points, y_fits[1], color='black', linestyle='-', label='Overall Trend')
    axs[1].set_title('Relative Humidity')
    axs[1].set_ylabel('Relative humidity (%)')
    axs[1].set_xlabel('Season Year')

    # # Plot 3: Yearly summer mean dew point temperature
    # axs[1, 0].plot(td_cube_yearly_mean.coord('season_year').points, td_cube_yearly_mean.data, color='green')
    # axs[1, 0].plot(td_cube_yearly_min.coord('season_year').points, td_cube_yearly_min.data, color='grey', linestyle='--')
    # axs[1, 0].plot(td_cube_yearly_max.coord('season_year').points, td_cube_yearly_max.data, color='grey', linestyle='--')
    # axs[1, 0].plot(tas_cube_yearly_mean.coord('season_year').points, y_fits[2], color='black', linestyle='-', label='Overall Trend')
    # axs[1, 0].set_title('Dew Point Temperature')
    # axs[1, 0].set_ylabel('Dew point temperature (°C)')
    # axs[1, 0].set_xlabel('Season Year')

    # # Plot 4: Yearly summer mean water vapour pressure
    # axs[1, 1].plot(es_cube_yearly_mean.coord('season_year').points, es_cube_yearly_mean.data, color='purple')
    # axs[1, 1].plot(es_cube_yearly_min.coord('season_year').points, es_cube_yearly_min.data, color='grey', linestyle='--')
    # axs[1, 1].plot(es_cube_yearly_max.coord('season_year').points, es_cube_yearly_max.data, color='grey', linestyle='--')
    # axs[1, 1].plot(tas_cube_yearly_mean.coord('season_year').points, y_fits[3], color='black', linestyle='-', label='Overall Trend')
    # axs[1, 1].set_title('Water Vapour Pressure')
    # axs[1, 1].set_ylabel('Water vapour pressure (kPa)')
    # axs[1, 1].set_xlabel('Season Year')

    # Adjust layout
    plt.tight_layout(rect=[0, 0, 1, 0.95])

    # Save the figure
    plt.savefig(f'{unprocessed_output_folder}/{location_name}_yearly_analysis.png')

    # Show the figure
    plt.show()

In [19]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FormatStrFormatter

def plot_min_mean_max_analysis_maps():
    fig = plt.figure(figsize=(17, 19))
    fig.subplots_adjust(left=0.2)  # Adjust the left padding to increase space for y-labels and titles
    subfigs = fig.subfigures(3, 1)
    
    subfigs[0].suptitle('Min', x=0.05, y=0.6, ha='left', va='center', fontsize=14, rotation=90)
    subfigs[1].suptitle('Mean', x=0.05, y=0.6, ha='left', va='center', fontsize=14, rotation=90)
    subfigs[2].suptitle('Max', x=0.05, y=0.6, ha='left', va='center', fontsize=14, rotation=90)

    axstop = subfigs[0].subplots(1, 2, sharex=True, sharey=True)
    axsmid = subfigs[1].subplots(1, 2, sharex=True, sharey=True)
    axsbott = subfigs[2].subplots(1, 2, sharex=True, sharey=True)

    longitude = tas_cube_overall_mean.coord('longitude').points
    latitude = tas_cube_overall_mean.coord('latitude').points
    lon, lat = np.meshgrid(longitude, latitude)  # Create a meshgrid for plotting

    # Plot 1: Yearly summer min temperature at surface
    mesh1 = axstop[0].pcolormesh(lon, lat, tas_cube_overall_min.data, cmap='Reds')
    cbar1 = fig.colorbar(mesh1, ax=axstop[0], orientation='horizontal')
    cbar1.set_ticks(np.linspace(tas_cube_overall_min.data.min(), tas_cube_overall_min.data.max(), 5))
    cbar1.ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))  # Format ticks to 1 decimal place
    axstop[0].set_xlabel('Longitude')
    axstop[0].set_ylabel('Latitude')

    shapefile.plot(ax=axstop[0], color='none', edgecolor='black', linewidth=1)  # Overlay the shape file on the first plot
    axstop[0].set_title(f'Temperature at Surface \n', fontsize=14)

    # Plot 2: Yearly summer mean humidity
    mesh2 = axstop[1].pcolormesh(lon, lat, rh_cube_overall_min.data, cmap='Blues')
    cbar2 = fig.colorbar(mesh2, ax=axstop[1], orientation='horizontal')
    cbar2.set_ticks(np.linspace(rh_cube_overall_min.data.min(), rh_cube_overall_min.data.max(), 5))
    cbar2.ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))  # Format ticks to 1 decimal place
    axstop[1].set_xlabel('Longitude')
    axstop[1].set_ylabel('Latitude')

    shapefile.plot(ax=axstop[1], color='none', edgecolor='black', linewidth=1)  # Overlay the shape file on the first plot
    axstop[1].set_title(f'Humidity (%) \n', fontsize=14)

    # Plot 3: Yearly summer mean temperature at surface
    mesh3 = axsmid[0].pcolormesh(lon, lat, tas_cube_overall_mean.data, cmap='Reds')
    cbar3 = fig.colorbar(mesh3, ax=axsmid[0], orientation='horizontal')
    cbar3.set_ticks(np.linspace(tas_cube_overall_mean.data.min(), tas_cube_overall_mean.data.max(), 5))
    cbar3.ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))  # Format ticks to 1 decimal place
    axsmid[0].set_xlabel('Longitude')
    axsmid[0].set_ylabel('Latitude')

    shapefile.plot(ax=axsmid[0], color='none', edgecolor='black', linewidth=1)  # Overlay the shape file on the first plot

    # Plot 4: Yearly summer mean humidity
    mesh4 = axsmid[1].pcolormesh(lon, lat, rh_cube_overall_mean.data, cmap='Blues')
    cbar4 = fig.colorbar(mesh4, ax=axsmid[1], orientation='horizontal')
    cbar4.set_ticks(np.linspace(rh_cube_overall_mean.data.min(), rh_cube_overall_mean.data.max(), 5))
    cbar4.ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))  # Format ticks to 1 decimal place
    axsmid[1].set_xlabel('Longitude')
    axsmid[1].set_ylabel('Latitude')

    shapefile.plot(ax=axsmid[1], color='none', edgecolor='black', linewidth=1)  # Overlay the shape file on the first plot

    # Plot 5: Yearly summer max temperature at surface
    mesh5 = axsbott[0].pcolormesh(lon, lat, tas_cube_overall_max.data, cmap='Reds')
    cbar5 = fig.colorbar(mesh5, ax=axsbott[0], orientation='horizontal')
    cbar5.set_ticks(np.linspace(tas_cube_overall_max.data.min(), tas_cube_overall_max.data.max(), 5))
    cbar5.ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))  # Format ticks to 1 decimal place
    axsbott[0].set_xlabel('Longitude')
    axsbott[0].set_ylabel('Latitude')
    shapefile.plot(ax=axsbott[0], color='none', edgecolor='black', linewidth=1)  # Overlay the shape file on the first plot

    # Plot 6: Yearly summer max humidity
    mesh6 = axsbott[1].pcolormesh(lon, lat, rh_cube_overall_max.data, cmap='Blues')
    cbar6 = fig.colorbar(mesh6, ax=axsbott[1], orientation='horizontal')
    cbar6.set_ticks(np.linspace(rh_cube_overall_max.data.min(), rh_cube_overall_max.data.max(), 5))
    cbar6.ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))  # Format ticks to 1 decimal place
    axsbott[1].set_xlabel('Longitude')
    axsbott[1].set_ylabel('Latitude')
    shapefile.plot(ax=axsbott[1], color='none', edgecolor='black', linewidth=1)  # Overlay the shape file on the first plot
    
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.savefig(f'{unprocessed_output_folder}/{location_name}_analysis_maps.png')
    plt.show()

# Heat index Algorithms

### 21 Heat Index Algorithms from Anderson et al. (2013)

### Lu and Romps (2022)

# 2. Comparing the algorithms

# Creating daily, monthly, and yearly cubes

In [22]:
# //TODO: Create these cubes

In [23]:
def normalise_cubelist_globally():
    global normalized_cubelist
    normalized_cubelist = []

    # Find global min and max across all cubes
    all_data = np.concatenate([cube.data.flatten() for cube in time_series_cubelist])
    global_min = np.min(all_data)
    global_max = np.max(all_data)

    # Normalize each cube's data to [0, 1] based on global min and max, then scale to [-1, 1]
    for cube in time_series_cubelist:
        data = cube.data
        normalized_data = (data - global_min) / (global_max - global_min)  # Normalize to [0, 1]
        scaled_data = 2 * normalized_data - 1  # Scale to [-1, 1]
        normalized_cube = cube.copy(data=scaled_data)
        normalized_cubelist.append(normalized_cube)

    normalized_cubelist = CubeList(normalized_cubelist)
    return normalized_cubelist

## Processed outputs

In [24]:
def plot_yearly_mean_values_from_heat_indexes():
    output_folder = f"outputs/{location_name}/heat_index_outputs"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Plotting
    plt.figure(figsize=(10, 6))

    cmap = plt.cm.viridis
    colors = cmap(np.linspace(0, 1, len(time_series_cubelist)))

    for i, cube in enumerate(time_series_cubelist):
        years = cube.coord('season_year').points
        mean_values = cube.data
        
        # Check if the current cube is the 12th one (index 11)
        if i == 21:
            color = 'red'  # Set color to red for cube 12
        else:
            color = colors[i]  # Use the original color mapping for other cubes
        
        plt.plot(years, mean_values, label=f'{i+1}', color=color)

    plt.xlabel('Season Year')
    plt.ylabel('Mean Value')
    plt.title('Yearly Mean Values from Heat Indexes')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Index')

    plt.savefig(f'{output_folder}/{location_name}_mean_analysis_maps.png')
    plt.show()

In [25]:
def plot_yearly_max_values_from_heat_indexes():
    output_folder = f"outputs/{location_name}/heat_index_outputs"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Plotting
    plt.figure(figsize=(10, 6))

    cmap = plt.cm.viridis
    colors = cmap(np.linspace(0, 1, len(time_series_cubelist)))

    for i, cube in enumerate(time_series_cubelist):
        years = cube.coord('season_year').points
        mean_values = cube.data
        
        # Check if the current cube is the 12th one (index 11)
        if i == 21:
            color = 'red'  # Set color to red for cube 12
        else:
            color = colors[i]  # Use the original color mapping for other cubes
        
        plt.plot(years, mean_values, label=f'{i+1}', color=color)

    plt.xlabel('Season Year')
    plt.ylabel('Mean Value')
    plt.title('Yearly Mean Values from Heat Indexes')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Index')

    plt.savefig(f'{output_folder}/{location_name}_mean_analysis_timeseries.png')
    plt.show()

In [26]:
## NOTE: LIKELY TO LOOK VERY SIMILAR IN OUTPUT TO THE ONE ABOVE

def create_table_of_differences_between_heat_index_and_air_temperature():
    """
    Creates a table of differences between the heat index and air temperature for each cube.
    """
    cube_names = [cube.long_name for cube in time_series_cubelist]
    average_differences = []

    for cube in time_series_cubelist:
        # Calculate the point-wise difference between cube data and tas_time_series_cube data
        differences = cube.data - tas_time_series_cube.data
        # Compute the average of these differences
        average_difference = np.mean(differences)
        average_differences.append(average_difference)

    # Create the DataFrame
    table = pd.DataFrame({
        "Algorithm": cube_names,
        "Average Difference with Air Temperature": average_differences
    })

    # Optionally, sort the table by the average difference with air temperature
    table = table.sort_values(by="Average Difference with Air Temperature", key = abs)

    csv_file_path = f'{output_folder}/{location_name}_heat_index_vs_air_temperature.csv'
    table.to_csv(csv_file_path, index=False)

    return table

# Assuming time_series_cubelist and tas_time_series_cube are defined
# table = create_table_of_differences_between_heat_index_and_air_temperature(time_series_cubelist, tas_time_series_cube)
# print(table)

In [27]:
def plot_difference_from_air_temperature():
    algorithm_labels = list(range(1, len(time_series_cubelist) + 1))

    colors = plt.cm.viridis(np.linspace(0, 1, len(time_series_cubelist)))  # Generate colors

    # Initialize the plot
    plt.figure(figsize=(10, 6))

    # Plot each cube's difference and fill the area
    for i, cube in enumerate(time_series_cubelist):
        years = cube.coord('year').points
        difference = cube.data - tas_time_series_cube.data
        plt.plot(years, difference, label=f'{i+1}', color=colors[i])
        # plt.fill_between(years, 0, difference, color=colors[i], alpha=0.1)
        plt.xticks(ticks = years, minor = True)

            # Highlight the 22nd cube differently (Lu and Romps, 2022)
        if i == 21:  # Assuming the 22nd cube is at index 21
            plt.plot(years, difference, label=f'{i+1}', color='red')
            # plt.fill_between(years, 0, difference, color='red', alpha=0.2)

    plt.axhline(y=0, color='black', linestyle='dashed', linewidth=2)

    # Explicitly set y-axis to start at 0
    plt.xlim(years[0], years[-1])

    # Finalize the plot
    plt.xlabel('Year')
    plt.ylabel('Difference from Air Temperature')
    plt.title('Yearly Mean Difference from Air Temperature for Each Index')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Index')

    # Save and show the plot
    plt.savefig(f'{output_folder}/{location_name}_difference_from_air_temperature_analysis.png')
    plt.show()

In [28]:
def plot_mean_differences_from_Lu_and_Romps_for_each_index():
    
    colors = plt.cm.viridis(np.linspace(0, 1, len(time_series_cubelist)))  # Generate colors

    plt.figure(figsize=(10, 6))

    # Plot the difference between the values of the lu and romps cube in time_series_cube_list and all other cubes in the list
    for i, cube in enumerate(time_series_cubelist):
        years = cube.coord('season_year').points
        difference = (cube.data - (time_series_cubelist[21].data))  # Assuming the lu and romps cube is at index 21
        plt.plot(years, difference, label=f'{i+1}')

        # Highlight the 22nd cube differently (Lu and Romps, 2022)
        if i == 21:  # Assuming the 22nd cube is at index 21
            plt.plot(years, difference, label=f'{i+1}', color='red', linestyle = 'dashed', linewidth=2)
            # plt.fill_between(years, 0, difference, color='red', alpha=0.2)


    # Explicitly set y-axis to start at 0
    plt.xlim(years[0], years[-1])

    # Finalize the plot
    plt.xlabel('Year')
    plt.ylabel('Difference from Lu and Romps')
    plt.title('Yearly Mean Difference from Lu and Romps for Each Index')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Index')

    # Save and show the plot
    plt.savefig(f'{output_folder}/{location_name}_difference_from_Lu_and_analysis.png')
    plt.show()

In [29]:
def create_table_of_differences_between_heat_index_and_Lu_and_Romps():
    global average_difference_with_Lu_and_Romps
    cube_names = []
    for cube in time_series_cubelist:
        cube_names.append(cube.long_name)
    
    heat_index_22_name = heat_index_22_cube.long_name

    average_difference_with_Lu_and_Romps = []
    for cube in time_series_cubelist:
        average_difference_with_Lu_and_Romps.append(cube.data.mean() - time_series_cubelist[21].data.mean())

    table = pd.DataFrame(columns=["Algorithm", "Difference with Lu and Romps"])
    table["Algorithm"] = cube_names
    table["Difference with Lu and Romps"] = average_difference_with_Lu_and_Romps
    table.sort_values(by="Difference with Lu and Romps", inplace=True, key=abs)

    table = table[table["Algorithm"] != heat_index_22_name]

    csv_file_path = f'{output_folder}/{location_name}_heat_index_vs_air_temperature.csv'
    table.to_csv(csv_file_path, index=False)

    print(table)

In [30]:
def plot_heat_index_comparison_maps():

    # Set the desired layout
    rows = 5
    columns = 5

    fig, axs = plt.subplots(rows, columns, figsize=(5*columns, 4*rows))  # Adjust figsize as needed
    fig.suptitle('Heat Index Comparison Maps')

    # Flatten axs array for easy indexing if it's 2D (when rows > 1 and columns > 1)
    if rows > 1 and columns > 1:
        axs = axs.flatten()

    for i, cube in enumerate(maps_cubelist):
        # Extract longitude and latitude from the cube
        longitude = cube.coord('longitude').points
        latitude = cube.coord('latitude').points
        lon, lat = np.meshgrid(longitude, latitude)  # Create a meshgrid for plotting
        
        # Plot data with actual latitude and longitude
        mesh = axs[i].pcolormesh(lon, lat, cube.data, cmap='Reds')
        cbar = fig.colorbar(mesh, ax=axs[i], orientation='horizontal')
        cbar.set_ticks(np.linspace(cube.data.min(), cube.data.max(), 5))
        cbar.set_ticklabels([f'{val:.2f}' for val in np.linspace(cube.data.min(), cube.data.max(), 5)])
        
        axs[i].set_title(f'Heat Index {i+1}')

        shapefile.plot(ax=axs[i], color='none', edgecolor='black', linewidth=1)  # Overlay the shape file on the first plot
        
        # Set axis titles
        axs[i].set_xlabel('Longitude')
        axs[i].set_ylabel('Latitude')

    # Hide any unused subplots
    for j in range(i+1, rows*columns):
        fig.delaxes(axs[j])

    plt.tight_layout(pad=3.0)

    # Save the figure
    output_folder = f"outputs/{location_name}/heat_index_outputs"
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
        
    plt.savefig(f'{output_folder}/{location_name}_heat_index_comparison_maps.png')

    plt.show()

In [31]:
def plot_heat_index_maps_relative_to_Lu_and_Romps():
    # Set the desired layout
    rows = 5
    columns = 5

    fig, axs = plt.subplots(rows, columns, figsize=(5*columns, 4*rows))  # Adjust figsize as needed
    fig.suptitle('Heat Index Difference Maps')

    # Flatten axs array for easy indexing if it's 2D (when rows > 1 and columns > 1)
    if rows > 1 and columns > 1:
        axs = axs.flatten()

    # Reference cube data for comparison
    Lu_and_Romps_cube_data = maps_cubelist[21].data

    for i, cube in enumerate(maps_cubelist):
        # Calculate the difference
        difference_data = cube.data - Lu_and_Romps_cube_data

        # Extract longitude and latitude from the cube
        longitude = cube.coord('longitude').points
        latitude = cube.coord('latitude').points
        lon, lat = np.meshgrid(longitude, latitude)  # Create a meshgrid for plotting
        
        # Plot the difference data with actual latitude and longitude
        mesh = axs[i].pcolormesh(lon, lat, difference_data, cmap='coolwarm')
        cbar = fig.colorbar(mesh, ax=axs[i], orientation='horizontal')
        cbar.set_ticks(np.linspace(difference_data.min(), difference_data.max(), 5))
        cbar.set_ticklabels([f'{val:.2f}' for val in np.linspace(difference_data.min(), difference_data.max(), 5)])
        
        axs[i].set_title(f'Heat Index Difference {i+1}')

        shapefile.plot(ax=axs[i], color='none', edgecolor='black', linewidth=1)  # Overlay the shape file on the plot
        
        # Set axis titles
        axs[i].set_xlabel('Longitude')
        axs[i].set_ylabel('Latitude')

    # Hide any unused subplots
    for j in range(i+1, rows*columns):
        fig.delaxes(axs[j])

    plt.tight_layout(pad=3.0)

    # Save the figure
    output_folder = f"outputs/{location_name}/heat_index_difference_maps"
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
        
    plt.savefig(f'{output_folder}/{location_name}_heat_index_difference_maps.png')

    plt.show()

In [32]:
def plot_normalized_yearly_mean_values_from_heat_indexes():
    # Assuming normalized_cubelist is defined
    plt.figure()  # Adjust figure size as needed

    colors = plt.cm.viridis(np.linspace(0, 1, len(normalized_cubelist)))  # Generate colors

    all_mean_values = []  # List to collect all mean values so you can create an overall mean line in red

    for i, cube in enumerate(normalized_cubelist):
        years = cube.coord('season_year').points
        mean_values = cube.data
        all_mean_values.append(mean_values)
        
        # Change label to 'Algorithm {i+1}'
        plt.plot(years, mean_values, label=f'Algorithm {i+1}', color=colors[i])
        plt.xticks(ticks = years,minor = True)

    for i, cube in enumerate(normalized_cubelist):
        years = cube.coord('season_year').points
        mean_values = cube.data
        all_mean_values.append(mean_values)
        
        # Change label to 'Algorithm {i+1}'
        plt.plot(years, mean_values, label=f'Algorithm {i+1}', color=colors[i])
        plt.xticks(ticks = years,minor = True)

    # Calculate the overall mean from all cubes
    overall_mean = np.mean(all_mean_values)

    # Plot the overall mean line
    plt.plot(years, all_mean_values, label = 'Overall Mean', color='red', linestyle='--', alpha=0.5)

    # Set common title, labels, and legend
    plt.title('Yearly Mean Values from Cubes')
    plt.xlabel('Year')
    plt.ylabel('Mean Value')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

    # Explicitly set y-axis to start at 0
    plt.xlim(years[0], years[-1])

    output_folder = f"outputs/{location_name}/heat_index_outputs"
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    plt.savefig(f'{output_folder}/{location_name}_standardized_yearly_mean_of_mean_values_per_algorithm.png')

    plt.tight_layout(rect=[0.03, 0.03, 1, 0.95])  # Adjust layout
    plt.show()

In [33]:
def create_average_mean_per_year_across_algorithms():
    average_mean_per_year_across_algorithms = []
    for cube in normalized_cubelist:
        years = cube.coord('season_year').points
        values = cube.data

        for year, value in zip(years, values):
            average_mean_per_year_across_algorithms.append({'Season Year': year, 'Value': value})


In [34]:
def plot_normalized_algorithms():
    # Step 1: Aggregate Yearly Mean Values
    algorithm_yearly_means = {}
    for i, cube in enumerate(normalized_cubelist):
        years = cube.coord('season_year').points
        mean_values = cube.data  # Assuming this already gives yearly mean values
        algorithm_yearly_means[f'Algorithm {i+1}'] = (years, mean_values)

    # Step 2: Plot Each Algorithm's Yearly Mean Values
    plt.figure(figsize=(12, 8))  # Adjust figure size as needed

    for algorithm, (years, mean_values) in algorithm_yearly_means.items():
        plt.plot(years, mean_values, label=algorithm)  # Plot with label for legend

    # Step 3: Add Legends and Labels
    plt.legend()  # Show legend to identify each algorithm
    plt.xlabel('Season Year')  # X-axis label
    plt.ylabel('Mean Value')  # Y-axis label
    plt.title('Comparison of Yearly Mean of Mean Standardised Values from Algorithms')  # Plot title
    plt.grid(True)  # Optional: Add grid for easier reading

    # Save the plot
    output_folder = f"outputs/{location_name}/heat_index_outputs"
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    plt.savefig(f'{output_folder}/{location_name}_comparison_of_algorithms.png')

    plt.show()