## SILO gridded_data_testing
### Downloads a NetCDF file from an S3 bucket and saves it to a local directory.

In [None]:
import boto3
import os

def download_nc_file(year, data_type):
    """Downloads a NetCDF file from an S3 bucket and saves it to a local directory.

    Args:
        year (int): The year of the data to download.
        data_type (str): The type of data to download. Must be 'monthly_rain' or 'max_temp'.

    Returns:
        str: The path to the downloaded file if successful, otherwise None.
    """
    s3_bucket_name = 'silo-open-data'

    if data_type == 'monthly_rain':
        s3_key = f'Official/annual/monthly_rain/{year}.monthly_rain.nc'
        save_dir = "/home/roddyb/projects/wheatbelt_rainfall_analyser/data/gridded/monthly_rain"
    elif data_type == 'max_temp':
        s3_key = f'Official/annual/max_temp/{year}.max_temp.nc'
        save_dir = "/home/roddyb/projects/wheatbelt_rainfall_analyser/data/gridded/max_temp"
    else:
        print("Invalid data_type. Choose 'monthly_rain' or 'max_temp'")
        return None
        
    file_path = os.path.join(save_dir, f'{year}.{data_type}.nc')

    s3 = boto3.client('s3')

    try:
        os.makedirs(save_dir, exist_ok=True)  # Create directory if it doesn't exist
        s3.download_file(s3_bucket_name, s3_key, file_path)
        return file_path
    except Exception as e:
        print(f"Error downloading {year}.{data_type}.nc: {e}")
        return None

In [None]:
download_nc_file(2025, "max_temp")

## Extracts and plots monthly data for a specified variable from chosen time

In [59]:

import netCDF4
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

def extract_and_plot_data(lat, lon, data_type, variable_name, start_year, start_month, end_year, end_month):
    """Extracts and plots monthly data for a specified variable.

    Args:
        lat (float): Latitude of the location.
        lon (float): Longitude of the location.
        data_type (str): The data type ('max_temp' or 'monthly_rain').
        variable_name (str): The name of the variable to extract (e.g., 'max_temp', 'monthly_rain').
        start_year (int): The starting year.
        start_month (int): The starting month (1-12).
        end_year (int): The ending year.
        end_month (int): The ending month (1-12).
    """

    month_labels = []
    data = []  # Store data for the specified data_type

    current_year = start_year
    current_month = start_month

    while (current_year < end_year) or (current_year == end_year and current_month <= end_month):
        month_labels.append(f'{current_month:02d}-{str(current_year)[2:]}')  # Short year format
        if data_type == 'max_temp':
            file_path = f"/home/roddyb/projects/wheatbelt_rainfall_analyser/data/gridded/max_temp/{current_year}.max_temp.nc"
        elif data_type == 'monthly_rain':
            file_path = f"/home/roddyb/projects/wheatbelt_rainfall_analyser/data/gridded/monthly_rain/{current_year}.monthly_rain.nc"
        else:
            print("Invalid data_type. Choose 'max_temp' or 'monthly_rain'.")
            return

        try:
            dataset = netCDF4.Dataset(file_path, 'r')

            # Find the index of the latitude and longitude
            lat_vals = dataset.variables['lat'][:]
            lon_vals = dataset.variables['lon'][:]
            lat_index = np.argmin(np.abs(lat_vals - lat))
            lon_index = np.argmin(np.abs(lon_vals - lon))

            # Extract the data for the specified month
            month_index = current_month - 1

            # Check if the month_index is valid for the current year's file
            time_var = dataset.variables['time']
            if month_index < 0 or month_index >= len(time_var[:]):
                print(f"Warning: Month {current_month} is out of range for year {current_year}. Skipping.")
                value = np.nan
            else:
                value = dataset.variables[variable_name][month_index, lat_index, lon_index]
                if hasattr(value, 'filled'):
                    value = value.filled(np.nan)

            data.append(value)  # Append to the correct list
            dataset.close()

        except Exception as e:
            print(f"Error processing {current_year}-{current_month}: {e}")
            data.append(np.nan)  # Append NaN in case of error
            # If one variable fails, stop processing, just for the single variable plot
            break

        current_month += 1
        if current_month > 12:
            current_month = 1
            current_year += 1

    # Now that we have extracted the data, plot it
    plt.clf()  # Clear the figure
    data = np.array(data)

    # Plot the data, excluding NaN values
    valid_indices = ~np.isnan(data)
    if np.any(valid_indices):
        plt.plot(np.array(month_labels)[valid_indices], data[valid_indices], marker='o', label=variable_name)
        plt.xlabel("Month")
        plt.ylabel(variable_name)
        plt.title(f"Monthly {variable_name} at Lat:{lat}, Lon:{lon}")
        plt.grid(True)
        plt.legend()

        # Adjust x-axis tick label rotation
        plt.xticks(rotation=45, ha='right')  # Rotate labels for better readability
        plt.tight_layout()  # Adjust layout to prevent labels from overlapping

    else:
        print("No valid data to plot.")

    plt.savefig("plot.png")  # Save the plot

 


In [62]:

latitude = -27.54
longitude = 151.91
data_type = 'max_temp'  # Plot just one
variable_name = 'max_temp'
start_year = 2024
start_month = 9
end_year = 2025
end_month = 1
extract_and_plot_data(latitude, longitude, data_type, variable_name, start_year, start_month, end_year, end_month)  